comparison rg_rnaStarSolo.xml @ 7:e403d27e8f24 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit a8e319862d723654c372a6d71e5de76e052586a9"
author iuc
date Wed, 05 Aug 2020 09:41:22 -0400
parents c23da6257d6a
children 00fbfac99d39
comparison
equal deleted inserted replaced
6:178bdbdb6d24 7:e403d27e8f24
1 <tool id="rna_starsolo" name="RNA STARSolo" version="@VERSION@@WRAPPER@" profile="17.01"> 1 <tool id="rna_starsolo" name="RNA STARSolo" version="@VERSION@" profile="17.01">
2 <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description> 2 <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <token name="@WRAPPER@">1</token>
6 </macros> 5 </macros>
7 <expand macro="requirements"/> 6 <expand macro="requirements"/>
8 <expand macro="stdio" > 7 <expand macro="stdio" >
9 <regex match="Segmentation fault" source="both" level="fatal" /> 8 <regex match="Segmentation fault" source="both" level="fatal" />
10 </expand> 9 </expand>
12 <command><![CDATA[ 11 <command><![CDATA[
13 @TEMPINDEX@ 12 @TEMPINDEX@
14 STAR 13 STAR
15 @REFGENOMEHANDLING@ 14 @REFGENOMEHANDLING@
16 15
16 --readFilesIn
17 ## Check that the input pairs are of the same type 17 ## Check that the input pairs are of the same type
18 ## otherwise STARsolo will run for a long time and then error out. 18 ## otherwise STARsolo will run for a long time and then error out.
19 ## We consume either repeats of two inputs R1 + R2 19 ## We consume either repeats of two inputs R1 + R2
20 ## or a collection of paired reads. 20 ## or a collection of paired reads.
21 21
22 #try 22 #if str($input_types.use) == "repeat":
23 #set $last = None 23 #set $reads1 = []
24 #for $x in $input_types.input_repeats: 24 #set $reads2 = []
25 #if str($input_types.use) == "repeat": 25 #for $r1, $r2 in zip($input_types.input1, $input_types.input2):
26 #set $r1 = $x.input1
27 #set $r2 = $x.input2
28 #elif str($input_types.use) == "list_paired":
29 #set $r1 = $x.forward
30 #set $r2 = $x.reverse
31 #else
32 Wrong Type
33 #stop
34 #end if
35
36 #assert $r1.datatype == $r2.datatype 26 #assert $r1.datatype == $r2.datatype
37 27 #silent $reads1.append(str($r1))
38 ## Test that all pairs are of the same type 28 #silent $reads2.append(str($r2))
39 #if $last:
40 #assert $last.datatype == $r1.datatype
41 #end if
42 #set $last = $r1
43 #end for 29 #end for
44 #except AssertionError 30 #set $reads1 = ','.join($reads1)
45 Input types are not the same! 31 #set $reads2 = ','.join($reads2)
46 #stop 32 #elif str($input_types.use) == "list_paired":
47 #end try 33 #set $r1 = $input_types.input_collection.forward
34 #set $r2 = $input_types.input_collection.reverse
35 #set $reads1 = $r1
36 #set $reads2 = $r2
37 #end if
48 38
49 ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1] 39 ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1]
50 ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs 40 ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs
51 #if str($input_types.use) == "repeat":
52 #set $reads2 = ','.join([ '%s' % $x.input2 for $i,$x in enumerate($input_types.input_repeats)])
53 #set $reads1 = ','.join([ '%s' % $x.input1 for $i,$x in enumerate($input_types.input_repeats)])
54 #else if str($input_types.use) == "list_paired"
55 #set $reads2 = ','.join([ '%s' % $x.reverse for $i,$x in enumerate($input_types.input_repeats)])
56 #set $reads1 = ','.join([ '%s' % $x.forward for $i,$x in enumerate($input_types.input_repeats)])
57 #end if
58
59 --readFilesIn
60 $reads2 $reads1 41 $reads2 $reads1
61 42
62 #if $last.is_of_type('fastq.gz', 'fastqsanger.gz'): 43 #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'):
63 @FASTQ_GZ_OPTION@ 44 @FASTQ_GZ_OPTION@
64 #end if 45 #end if
65 46
66 ## Droplet is the only mode available for now 47 ## Droplet is the only mode available for now
67 --soloType Droplet 48 --soloType Droplet
68 49
69 ## 1 - check length of barcode, 0 - do not check 50 ## 1 - check length of barcode, 0 - do not check
70 ## Good for checking custom chemistries 51 ## Good for checking custom chemistries
71 --soloCBwhitelist '$soloCBwhitelist' 52 --soloCBwhitelist '$soloCBwhitelist'
72 --soloBarcodeReadLength '$solo.soloBarcodeReadLength' 53 --soloBarcodeReadLength $solo.soloBarcodeReadLength
73 54
74 #if str($solo.params.chemistry) == "CR2": 55 #if str($solo.params.chemistry) == "CR2":
75 --soloCBstart 1 56 --soloCBstart 1
76 --soloCBlen 16 57 --soloCBlen 16
77 --soloUMIstart 17 58 --soloUMIstart 17
80 --soloCBstart 1 61 --soloCBstart 1
81 --soloCBlen 16 62 --soloCBlen 16
82 --soloUMIstart 17 63 --soloUMIstart 17
83 --soloUMIlen 12 64 --soloUMIlen 12
84 #else if str($solo.params.chemistry) == "custom": 65 #else if str($solo.params.chemistry) == "custom":
85 --soloCBstart '$solo.params.soloCBstart' 66 --soloCBstart $solo.params.soloCBstart
86 --soloCBlen '$solo.params.soloCBlen' 67 --soloCBlen $solo.params.soloCBlen
87 --soloUMIstart '$solo.params.soloUMIstart' 68 --soloUMIstart $solo.params.soloUMIstart
88 --soloUMIlen '$solo.params.soloUMIlen' 69 --soloUMIlen $solo.params.soloUMIlen
89 #end if 70 #end if
90 71
91 --soloStrand '$solo.soloStrand' 72 --soloStrand $solo.soloStrand
92 --soloFeatures '$solo.soloFeatures' 73 --soloFeatures $solo.soloFeatures
93 --soloUMIdedup '$solo.soloUMIdedup' 74 --soloUMIdedup $solo.soloUMIdedup
75 --quantMode TranscriptomeSAM
76 --outSAMtype BAM Unsorted
77
78 ## BAM sorting (logic copied from samtools_sort wrapper)
79 ## choosing BAM SortedByCoord appeared once to give fewer reads
80 ## than BAM Unsorted followed by a samtools sort
81 ## so better go with the latter?
82
83 &&
84 ##compute the number of ADDITIONAL threads to be used by samtools (-@)
85 addthreads=\${GALAXY_SLOTS:-2} && (( addthreads-- )) &&
86 ##compute the number of memory available to samtools sort (-m)
87 ##use only 75% of available: https://github.com/samtools/samtools/issues/831
88 addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
89 ((addmemory=addmemory*75/100)) &&
90 samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam
91
94 ]]></command> 92 ]]></command>
95 <inputs> 93 <inputs>
96 <conditional name="input_types" > 94 <conditional name="input_types" >
97 <param name="use" type="select" label="Input Type" > 95 <param name="use" type="select" label="Input Type" >
98 <option value="repeat" >Single files</option> 96 <option value="repeat" >Separate barcode and cDNA reads</option>
99 <option value="list_paired" >List of Pairs</option> 97 <option value="list_paired" >Paired collection of barcode and cDNA reads</option>
100 </param> 98 </param>
101 <when value="repeat"> 99 <when value="repeat">
102 <repeat name="input_repeats" title="Input Pairs" min="1" > 100 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true"
103 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file, Barcode reads"/> 101 label="RNA-Seq FASTQ/FASTA file, Barcode reads" />
104 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" label="RNA-Seq FASTQ/FASTA file, cDNA reads"/> 102 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true"
105 </repeat> 103 label="RNA-Seq FASTQ/FASTA file, cDNA reads"/>
106 </when> 104 </when>
107 <when value="list_paired"> 105 <when value="list_paired">
108 <param name="input_repeats" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" /> 106 <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" />
109 </when> 107 </when>
110 </conditional> 108 </conditional>
111 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" /> 109 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" />
112 <expand macro="refgenomehandling" /> 110
111 <!-- Genome source. -->
112 <conditional name="refGenomeSource">
113 <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">
114 <option value="indexed" selected="true">Use a built-in index</option>
115 <option value="history">Use reference genome from history and create temporary index</option>
116 </param>
117 <when value="indexed">
118 <conditional name="GTFconditional">
119 <param name="GTFselect" type="select"
120 label="Reference genome with or without an annotation"
121 help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions, and provide your own splice junction annonations.">
122 <option value="without-gtf" selected='true'>use genome reference without builtin gene-model</option>
123 <option value="with-gtf">use genome reference with builtin gene-model</option>
124 </param>
125 <when value="with-gtf">
126 <expand macro="index_selection" with_gene_model="1" />
127 </when>
128 <when value="without-gtf">
129 <expand macro="index_selection" with_gene_model="0" />
130 <expand macro="@SJDBOPTIONS@" optional="false" />
131 </when>
132 </conditional>
133 </when>
134 <when value="history">
135 <expand macro="ref_selection" />
136 <expand macro="@SJDBOPTIONS@" optional="false"/>
137 </when>
138 </conditional>
139
113 <section name="solo" title="Advanced Settings" expanded="true"> 140 <section name="solo" title="Advanced Settings" expanded="true">
114 <conditional name="params"> 141 <conditional name="params">
115 <param name="chemistry" type="select" label="Configure Chemistry Options"> 142 <param name="chemistry" type="select" label="Configure Chemistry Options">
116 <option value="CR2" selected="true">Cell Ranger v2</option> 143 <option value="CR2" selected="true">Cell Ranger v2</option>
117 <option value="CR3">Cell Ranger v3</option> 144 <option value="CR3">Cell Ranger v3</option>
146 </inputs> 173 </inputs>
147 <outputs> 174 <outputs>
148 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out"> 175 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out">
149 <expand macro="dbKeyActions" /> 176 <expand macro="dbKeyActions" />
150 </data> 177 </data>
151 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" from_work_dir="Solo.out/genes.tsv" /> 178 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes"
152 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" from_work_dir="Solo.out/barcodes.tsv" /> 179 from_work_dir="Solo.out/Gene/filtered/features.tsv" />
153 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" from_work_dir="Solo.out/matrix.mtx" > 180 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes"
181 from_work_dir="Solo.out/Gene/filtered/barcodes.tsv" />
182 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts"
183 from_work_dir="Solo.out/Gene/filtered/matrix.mtx" >
154 <filter>solo['soloFeatures'] == "Gene" </filter> 184 <filter>solo['soloFeatures'] == "Gene" </filter>
155 </data> 185 <expand macro="dbKeyActions" />
156 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts" from_work_dir="Solo.out/matrixSJ.mtx" > 186 </data>
187 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts"
188 from_work_dir="Solo.out/Gene/filtered/matrixSJ.mtx" >
157 <filter>solo['soloFeatures'] == "SJ" </filter> 189 <filter>solo['soloFeatures'] == "SJ" </filter>
158 </data> 190 <expand macro="dbKeyActions" />
159 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts" from_work_dir="Solo.out/matrixGeneFull.mtx" > 191 </data>
192 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts"
193 from_work_dir="Solo.out/Gene/filtered/matrixGeneFull.mtx" >
160 <filter>solo['soloFeatures'] == "GeneFull" </filter> 194 <filter>solo['soloFeatures'] == "GeneFull" </filter>
161 </data> 195 <expand macro="dbKeyActions" />
162 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Feature Statistic Summaries" from_work_dir="Solo.out/Gene.stats" /> 196 </data>
197 <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" >
198 <expand macro="dbKeyActions" />
199 </data>
200 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Feature Statistic Summaries"
201 from_work_dir="Solo.out/Gene/Features.stats" />
163 </outputs> 202 </outputs>
203 <!-- Generating test data that is big enough for STARsolo to detect and small enough
204 for Galaxy to test requires careful modification of input FASTA and GTF data,
205 where the length of FASTA cannot exceed the largest position in the GTF file,
206 regardless of the FASTA starting sequence position.
207
208 A full writeup of how to subset single cell data for use in STARsolo is given
209 here: https://gist.github.com/mtekman/149a7c52fd73e5d8ebe49f5a27b0743d
210 -->
164 <tests> 211 <tests>
165 <test expect_num_outputs="5"> 212 <test expect_num_outputs="6">
166 <conditional name="input_types"> 213 <conditional name="input_types">
167 <param name="use" value="repeat" /> 214 <param name="use" value="repeat" />
168 <repeat name="input_repeats" > 215 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
169 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> 216 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
170 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> 217 </conditional>
171 </repeat> 218 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
172 </conditional>
173 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
174 <conditional name="refGenomeSource"> 219 <conditional name="refGenomeSource">
175 <param name="geneSource" value="history" /> 220 <param name="geneSource" value="history" />
176 <param name="genomeFastaFiles" value="SNORD83B.22.fa" /> 221 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
177 <param name="genomeSAindexNbases" value="4" /> 222 <param name="genomeSAindexNbases" value="4" />
178 <conditional name="GTFconditional"> 223 <param name="sjdbOverhang" value="100" />
179 <param name="GTFselect" value="with-gtf" /> 224 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
180 <param name="sjdbOverhang" value="75"/>
181 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/>
182 </conditional>
183 </conditional> 225 </conditional>
184 <section name="solo" > 226 <section name="solo" >
185 <conditional name="params"> 227 <conditional name="params">
186 <param name="chemistry" value="CR2" /> 228 <param name="chemistry" value="CR3" />
187 </conditional> 229 </conditional>
188 <param name="soloStrand" value="Forward" /> 230 <param name="soloStrand" value="Forward" />
189 <param name="soloFeatures" value="Gene" /> 231 <param name="soloFeatures" value="Gene" />
190 <param name="soloUMIdedup" value="1MM_All" /> 232 <param name="soloUMIdedup" value="1MM_All" />
191 </section> 233 </section>
234 <output name="output_barcodes" >
235 <assert_contents>
236 <!-- first and last line -->
237 <has_line line="ACACCGGTCTAACGGT" />
238 <has_line line="TTCTCAATCCACGTTC" />
239 </assert_contents>
240 </output>
192 <output name="output_genes"> 241 <output name="output_genes">
193 <assert_contents> 242 <assert_contents>
194 <has_line_matching expression="ENSG00000209480\sSNORD83B" /> 243 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
244 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
195 </assert_contents> 245 </assert_contents>
196 </output> 246 </output>
197 <output name="output_matrix" > 247 <output name="output_matrix" >
198 <assert_contents> 248 <assert_contents>
199 <has_line_matching expression="1\s137281\s0" /> 249 <has_line_matching expression="14\s+7\s+7" />
250 <has_line_matching expression="4\s+7\s+1" />
200 </assert_contents> 251 </assert_contents>
201 </output> 252 </output>
202 <output name="output_stats" > 253 <output name="output_stats" >
203 <assert_contents> 254 <assert_contents>
204 <has_line_matching expression="\s+nNoFeature\s+3253" /> 255 <has_line_matching expression="\s+nUnmapped\s+5823" />
205 <has_line_matching expression="\s+nUMIs\s+0" /> 256 <has_line_matching expression="\s+nUMIs\s+8" />
206 </assert_contents> 257 </assert_contents>
207 </output> 258 </output>
259 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
208 </test> 260 </test>
209 <test expect_num_outputs="5"> 261 <test expect_num_outputs="6"><!-- same as above, but using custom -->
210 <conditional name="input_types"> 262 <conditional name="input_types">
211 <param name="use" value="repeat" /> 263 <param name="use" value="repeat" />
212 <repeat name="input_repeats" > 264 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
213 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> 265 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
214 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> 266 </conditional>
215 </repeat> 267 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
216 </conditional>
217 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
218 <conditional name="refGenomeSource"> 268 <conditional name="refGenomeSource">
219 <param name="geneSource" value="history" /> 269 <param name="geneSource" value="history" />
220 <param name="genomeFastaFiles" value="SNORD83B.22.fa" /> 270 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
221 <param name="genomeSAindexNbases" value="4" /> 271 <param name="genomeSAindexNbases" value="4" />
222 <conditional name="GTFconditional"> 272 <param name="sjdbOverhang" value="100" />
223 <param name="GTFselect" value="with-gtf" /> 273 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
224 <param name="sjdbOverhang" value="75" />
225 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/>
226 </conditional>
227 </conditional> 274 </conditional>
228 <section name="solo" > 275 <section name="solo" >
229 <conditional name="params"> 276 <conditional name="params">
230 <param name="chemistry" value="custom" /> 277 <param name="chemistry" value="custom" />
231 <param name="soloCBstart" value="1" /> 278 <param name="soloCBstart" value="1" />
232 <param name="soloCBlen" value="16" /> 279 <param name="soloCBlen" value="16" />
233 <param name="soloUMIstart" value="17" /> 280 <param name="soloUMIstart" value="17" />
234 <param name="soloUMIlen" value="10" /> 281 <param name="soloUMIlen" value="12" />
235 </conditional> 282 </conditional>
236 <param name="soloStrand" value="Forward" /> 283 <param name="soloStrand" value="Forward" />
237 <param name="soloFeatures" value="GeneFull" /> 284 <param name="soloFeatures" value="Gene" />
238 <param name="soloUMIdedup" value="1MM_Directional" /> 285 <param name="soloUMIdedup" value="1MM_All" />
239 </section> 286 </section>
240 <output name="output_barcodes" > 287 <output name="output_barcodes" >
241 <assert_contents> 288 <assert_contents>
242 <has_line line="TTTGTCATCTTAGAGC" /> 289 <has_line line="ACACCGGTCTAACGGT" />
243 <has_line line="TTTGTCATCTTTCCTC" /> 290 <has_line line="TTCTCAATCCACGTTC" />
244 </assert_contents> 291 </assert_contents>
245 </output> 292 </output>
293 <output name="output_genes">
294 <assert_contents>
295 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
296 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
297 </assert_contents>
298 </output>
299 <output name="output_matrix" >
300 <assert_contents>
301 <has_line_matching expression="14\s+7\s+7" />
302 <has_line_matching expression="4\s+7\s+1" />
303 </assert_contents>
304 </output>
305 <output name="output_stats" >
306 <assert_contents>
307 <has_line_matching expression="\s+nUnmapped\s+5823" />
308 <has_line_matching expression="\s+nUMIs\s+8" />
309 </assert_contents>
310 </output>
311 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
246 </test> 312 </test>
247 <test expect_num_outputs="5"> 313 <test expect_num_outputs="6"><!-- Multiple repeats test -->
248 <!-- Multiple repeats test -->
249 <conditional name="input_types"> 314 <conditional name="input_types">
250 <param name="use" value="repeat" /> 315 <param name="use" value="repeat" />
251 <repeat name="input_repeats" > 316 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
252 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> 317 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
253 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> 318 </conditional>
254 </repeat> 319 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
255 <repeat name="input_repeats" >
256 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
257 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
258 </repeat>
259 <repeat name="input_repeats" >
260 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
261 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
262 </repeat>
263 </conditional>
264 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
265 <conditional name="refGenomeSource"> 320 <conditional name="refGenomeSource">
266 <param name="geneSource" value="history" /> 321 <param name="geneSource" value="history" />
267 <param name="genomeFastaFiles" value="SNORD83B.22.fa" /> 322 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
268 <param name="genomeSAindexNbases" value="4" /> 323 <param name="genomeSAindexNbases" value="4" />
269 <conditional name="GTFconditional"> 324 <param name="sjdbOverhang" value="100" />
270 <param name="GTFselect" value="with-gtf" /> 325 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
271 <param name="sjdbOverhang" value="75" />
272 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/>
273 </conditional>
274 </conditional> 326 </conditional>
275 <section name="solo" > 327 <section name="solo" >
276 <conditional name="params"> 328 <conditional name="params">
277 <param name="chemistry" value="custom" /> 329 <param name="chemistry" value="CR3" />
278 <param name="soloCBstart" value="1" />
279 <param name="soloCBlen" value="16" />
280 <param name="soloUMIstart" value="17" />
281 <param name="soloUMIlen" value="10" />
282 </conditional> 330 </conditional>
283 <param name="soloStrand" value="Forward" /> 331 <param name="soloStrand" value="Forward" />
284 <param name="soloFeatures" value="GeneFull" /> 332 <param name="soloFeatures" value="Gene" />
285 <param name="soloUMIdedup" value="1MM_Directional" /> 333 <param name="soloUMIdedup" value="1MM_All" />
286 </section> 334 </section>
287 <output name="output_barcodes" > 335 <output name="output_barcodes" >
288 <assert_contents> 336 <assert_contents>
289 <has_line line="TTTGTCATCTTAGAGC" /> 337 <has_line line="ACACCGGTCTAACGGT" />
290 <has_line line="TTTGTCATCTTTCCTC" /> 338 <has_line line="TTCTCAATCCACGTTC" />
291 </assert_contents> 339 </assert_contents>
292 </output> 340 </output>
341 <!-- BAM output is huge, we don't need to test here -->
293 </test> 342 </test>
294 <test expect_num_outputs="5"> 343 <test expect_num_outputs="6">
295 <!-- Same as the test before but with a collection of pairs --> 344 <!-- Test with paired collection -->
296 <conditional name="input_types"> 345 <conditional name="input_types">
297 <param name="use" value="list_paired" /> 346 <param name="use" value="list_paired" />
298 <param name="input_repeats" > 347 <param name="input_collection" >
299 <collection type="list:paired"> 348 <collection type="paired">
300 <element name="Pair1"> 349 <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
301 <collection type="paired"> 350 <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
302 <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
303 <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
304 </collection>
305 </element>
306 <element name="Pair2">
307 <collection type="paired">
308 <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
309 <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
310 </collection>
311 </element>
312 <!-- Planemo does not support more than 2 elements in a list of pairs -->
313 <!-- <element name="Pair3"> -->
314 <!-- <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> -->
315 <!-- <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> -->
316 <!-- </element> -->
317 </collection> 351 </collection>
318 </param> 352 </param>
319 </conditional> 353 </conditional>
320 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" /> 354 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
321 <conditional name="refGenomeSource"> 355 <conditional name="refGenomeSource">
322 <param name="geneSource" value="history" /> 356 <param name="geneSource" value="history" />
323 <param name="genomeFastaFiles" value="SNORD83B.22.fa" /> 357 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
324 <param name="genomeSAindexNbases" value="4" /> 358 <param name="genomeSAindexNbases" value="4" />
325 <conditional name="GTFconditional"> 359 <param name="sjdbOverhang" value="100" />
326 <param name="GTFselect" value="with-gtf" /> 360 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
327 <param name="sjdbOverhang" value="75" />
328 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/>
329 </conditional>
330 </conditional> 361 </conditional>
331 <section name="solo" > 362 <section name="solo" >
332 <conditional name="params"> 363 <conditional name="params">
333 <param name="chemistry" value="custom" /> 364 <param name="chemistry" value="CR3" />
334 <param name="soloCBstart" value="1" />
335 <param name="soloCBlen" value="16" />
336 <param name="soloUMIstart" value="17" />
337 <param name="soloUMIlen" value="10" />
338 </conditional> 365 </conditional>
339 <param name="soloStrand" value="Forward" /> 366 <param name="soloStrand" value="Forward" />
340 <param name="soloFeatures" value="GeneFull" /> 367 <param name="soloFeatures" value="Gene" />
341 <param name="soloUMIdedup" value="1MM_Directional" /> 368 <param name="soloUMIdedup" value="1MM_All" />
342 </section> 369 </section>
343 <output name="output_barcodes" > 370 <output name="output_barcodes" >
344 <assert_contents> 371 <assert_contents>
345 <has_line line="TTTGTCATCTTAGAGC" /> 372 <has_line line="ACACCGGTCTAACGGT" />
346 <has_line line="TTTGTCATCTTTCCTC" /> 373 <has_line line="TTCTCAATCCACGTTC" />
347 </assert_contents> 374 </assert_contents>
348 </output> 375 </output>
376 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
349 </test> 377 </test>
350 </tests> 378 </tests>
351 <help><![CDATA[ 379 <help><![CDATA[
352 **What it does** 380 **What it does**
353 381
354 **STARSolo** is a turnkey solution for analyzing droplet single cell RNA sequencing data (e.g. 10X Genomics Chromium System) built directly into STAR code. STARsolo inputs the raw FASTQ reads files, and performs the following operations: 382 **STARSolo** is a turnkey solution for analyzing droplet single cell RNA sequencing data (e.g. 10X Genomics Chromium System) built directly into STAR_ code. STARsolo takes raw FASTQ reads files as input, and performs the following operations:
355 383
356 * Error correction and demultiplexing of cell barcodes using user-input whitelist 384 * Error correction and demultiplexing of cell barcodes using user-input whitelist
357 * Mapping the reads to the reference genome using the standard STAR spliced read alignment algorithm 385 * Mapping the reads to the reference genome using the standard STAR spliced read alignment algorithm
358 * Error correction and collapsing (deduplication) of Unique Molecular Identifiers (UMIs) 386 * Error correction and collapsing (deduplication) of Unique Molecular Identifiers (UMIs)
359 * Quantification of per-cell gene expression by counting the number of reads per gene 387 * Quantification of per-cell gene expression by counting the number of reads per gene
360 388
361 STARsolo output is designed to be a drop-in replacement for 10X CellRanger gene quantification output. It follows CellRanger logic for cell barcode whitelisting and UMI deduplication, and produces nearly identical gene counts in the same format. At the same time STARsolo is 10 times faster than CellRanger. 389 STARsolo output is designed to be a drop-in replacement for 10X CellRanger gene quantification output. It follows CellRanger logic for cell barcode whitelisting and UMI deduplication, and produces nearly identical gene counts in the same format. At the same time STARsolo is 10 times faster than CellRanger.
362 390
391 .. _STAR: https://github.com/alexdobin/STAR
363 ]]></help> 392 ]]></help>
364 <expand macro="citations"/> 393 <expand macro="citations"/>
365 </tool> 394 </tool>