Mercurial > repos > iuc > rna_starsolo
comparison rg_rnaStarSolo.xml @ 7:e403d27e8f24 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit a8e319862d723654c372a6d71e5de76e052586a9"
author | iuc |
---|---|
date | Wed, 05 Aug 2020 09:41:22 -0400 |
parents | c23da6257d6a |
children | 00fbfac99d39 |
comparison
equal
deleted
inserted
replaced
6:178bdbdb6d24 | 7:e403d27e8f24 |
---|---|
1 <tool id="rna_starsolo" name="RNA STARSolo" version="@VERSION@@WRAPPER@" profile="17.01"> | 1 <tool id="rna_starsolo" name="RNA STARSolo" version="@VERSION@" profile="17.01"> |
2 <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description> | 2 <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 <token name="@WRAPPER@">1</token> | |
6 </macros> | 5 </macros> |
7 <expand macro="requirements"/> | 6 <expand macro="requirements"/> |
8 <expand macro="stdio" > | 7 <expand macro="stdio" > |
9 <regex match="Segmentation fault" source="both" level="fatal" /> | 8 <regex match="Segmentation fault" source="both" level="fatal" /> |
10 </expand> | 9 </expand> |
12 <command><![CDATA[ | 11 <command><![CDATA[ |
13 @TEMPINDEX@ | 12 @TEMPINDEX@ |
14 STAR | 13 STAR |
15 @REFGENOMEHANDLING@ | 14 @REFGENOMEHANDLING@ |
16 | 15 |
16 --readFilesIn | |
17 ## Check that the input pairs are of the same type | 17 ## Check that the input pairs are of the same type |
18 ## otherwise STARsolo will run for a long time and then error out. | 18 ## otherwise STARsolo will run for a long time and then error out. |
19 ## We consume either repeats of two inputs R1 + R2 | 19 ## We consume either repeats of two inputs R1 + R2 |
20 ## or a collection of paired reads. | 20 ## or a collection of paired reads. |
21 | 21 |
22 #try | 22 #if str($input_types.use) == "repeat": |
23 #set $last = None | 23 #set $reads1 = [] |
24 #for $x in $input_types.input_repeats: | 24 #set $reads2 = [] |
25 #if str($input_types.use) == "repeat": | 25 #for $r1, $r2 in zip($input_types.input1, $input_types.input2): |
26 #set $r1 = $x.input1 | |
27 #set $r2 = $x.input2 | |
28 #elif str($input_types.use) == "list_paired": | |
29 #set $r1 = $x.forward | |
30 #set $r2 = $x.reverse | |
31 #else | |
32 Wrong Type | |
33 #stop | |
34 #end if | |
35 | |
36 #assert $r1.datatype == $r2.datatype | 26 #assert $r1.datatype == $r2.datatype |
37 | 27 #silent $reads1.append(str($r1)) |
38 ## Test that all pairs are of the same type | 28 #silent $reads2.append(str($r2)) |
39 #if $last: | |
40 #assert $last.datatype == $r1.datatype | |
41 #end if | |
42 #set $last = $r1 | |
43 #end for | 29 #end for |
44 #except AssertionError | 30 #set $reads1 = ','.join($reads1) |
45 Input types are not the same! | 31 #set $reads2 = ','.join($reads2) |
46 #stop | 32 #elif str($input_types.use) == "list_paired": |
47 #end try | 33 #set $r1 = $input_types.input_collection.forward |
34 #set $r2 = $input_types.input_collection.reverse | |
35 #set $reads1 = $r1 | |
36 #set $reads2 = $r2 | |
37 #end if | |
48 | 38 |
49 ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1] | 39 ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1] |
50 ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs | 40 ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs |
51 #if str($input_types.use) == "repeat": | |
52 #set $reads2 = ','.join([ '%s' % $x.input2 for $i,$x in enumerate($input_types.input_repeats)]) | |
53 #set $reads1 = ','.join([ '%s' % $x.input1 for $i,$x in enumerate($input_types.input_repeats)]) | |
54 #else if str($input_types.use) == "list_paired" | |
55 #set $reads2 = ','.join([ '%s' % $x.reverse for $i,$x in enumerate($input_types.input_repeats)]) | |
56 #set $reads1 = ','.join([ '%s' % $x.forward for $i,$x in enumerate($input_types.input_repeats)]) | |
57 #end if | |
58 | |
59 --readFilesIn | |
60 $reads2 $reads1 | 41 $reads2 $reads1 |
61 | 42 |
62 #if $last.is_of_type('fastq.gz', 'fastqsanger.gz'): | 43 #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'): |
63 @FASTQ_GZ_OPTION@ | 44 @FASTQ_GZ_OPTION@ |
64 #end if | 45 #end if |
65 | 46 |
66 ## Droplet is the only mode available for now | 47 ## Droplet is the only mode available for now |
67 --soloType Droplet | 48 --soloType Droplet |
68 | 49 |
69 ## 1 - check length of barcode, 0 - do not check | 50 ## 1 - check length of barcode, 0 - do not check |
70 ## Good for checking custom chemistries | 51 ## Good for checking custom chemistries |
71 --soloCBwhitelist '$soloCBwhitelist' | 52 --soloCBwhitelist '$soloCBwhitelist' |
72 --soloBarcodeReadLength '$solo.soloBarcodeReadLength' | 53 --soloBarcodeReadLength $solo.soloBarcodeReadLength |
73 | 54 |
74 #if str($solo.params.chemistry) == "CR2": | 55 #if str($solo.params.chemistry) == "CR2": |
75 --soloCBstart 1 | 56 --soloCBstart 1 |
76 --soloCBlen 16 | 57 --soloCBlen 16 |
77 --soloUMIstart 17 | 58 --soloUMIstart 17 |
80 --soloCBstart 1 | 61 --soloCBstart 1 |
81 --soloCBlen 16 | 62 --soloCBlen 16 |
82 --soloUMIstart 17 | 63 --soloUMIstart 17 |
83 --soloUMIlen 12 | 64 --soloUMIlen 12 |
84 #else if str($solo.params.chemistry) == "custom": | 65 #else if str($solo.params.chemistry) == "custom": |
85 --soloCBstart '$solo.params.soloCBstart' | 66 --soloCBstart $solo.params.soloCBstart |
86 --soloCBlen '$solo.params.soloCBlen' | 67 --soloCBlen $solo.params.soloCBlen |
87 --soloUMIstart '$solo.params.soloUMIstart' | 68 --soloUMIstart $solo.params.soloUMIstart |
88 --soloUMIlen '$solo.params.soloUMIlen' | 69 --soloUMIlen $solo.params.soloUMIlen |
89 #end if | 70 #end if |
90 | 71 |
91 --soloStrand '$solo.soloStrand' | 72 --soloStrand $solo.soloStrand |
92 --soloFeatures '$solo.soloFeatures' | 73 --soloFeatures $solo.soloFeatures |
93 --soloUMIdedup '$solo.soloUMIdedup' | 74 --soloUMIdedup $solo.soloUMIdedup |
75 --quantMode TranscriptomeSAM | |
76 --outSAMtype BAM Unsorted | |
77 | |
78 ## BAM sorting (logic copied from samtools_sort wrapper) | |
79 ## choosing BAM SortedByCoord appeared once to give fewer reads | |
80 ## than BAM Unsorted followed by a samtools sort | |
81 ## so better go with the latter? | |
82 | |
83 && | |
84 ##compute the number of ADDITIONAL threads to be used by samtools (-@) | |
85 addthreads=\${GALAXY_SLOTS:-2} && (( addthreads-- )) && | |
86 ##compute the number of memory available to samtools sort (-m) | |
87 ##use only 75% of available: https://github.com/samtools/samtools/issues/831 | |
88 addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && | |
89 ((addmemory=addmemory*75/100)) && | |
90 samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam | |
91 | |
94 ]]></command> | 92 ]]></command> |
95 <inputs> | 93 <inputs> |
96 <conditional name="input_types" > | 94 <conditional name="input_types" > |
97 <param name="use" type="select" label="Input Type" > | 95 <param name="use" type="select" label="Input Type" > |
98 <option value="repeat" >Single files</option> | 96 <option value="repeat" >Separate barcode and cDNA reads</option> |
99 <option value="list_paired" >List of Pairs</option> | 97 <option value="list_paired" >Paired collection of barcode and cDNA reads</option> |
100 </param> | 98 </param> |
101 <when value="repeat"> | 99 <when value="repeat"> |
102 <repeat name="input_repeats" title="Input Pairs" min="1" > | 100 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true" |
103 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file, Barcode reads"/> | 101 label="RNA-Seq FASTQ/FASTA file, Barcode reads" /> |
104 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" label="RNA-Seq FASTQ/FASTA file, cDNA reads"/> | 102 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true" |
105 </repeat> | 103 label="RNA-Seq FASTQ/FASTA file, cDNA reads"/> |
106 </when> | 104 </when> |
107 <when value="list_paired"> | 105 <when value="list_paired"> |
108 <param name="input_repeats" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" /> | 106 <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" /> |
109 </when> | 107 </when> |
110 </conditional> | 108 </conditional> |
111 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" /> | 109 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" /> |
112 <expand macro="refgenomehandling" /> | 110 |
111 <!-- Genome source. --> | |
112 <conditional name="refGenomeSource"> | |
113 <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options"> | |
114 <option value="indexed" selected="true">Use a built-in index</option> | |
115 <option value="history">Use reference genome from history and create temporary index</option> | |
116 </param> | |
117 <when value="indexed"> | |
118 <conditional name="GTFconditional"> | |
119 <param name="GTFselect" type="select" | |
120 label="Reference genome with or without an annotation" | |
121 help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions, and provide your own splice junction annonations."> | |
122 <option value="without-gtf" selected='true'>use genome reference without builtin gene-model</option> | |
123 <option value="with-gtf">use genome reference with builtin gene-model</option> | |
124 </param> | |
125 <when value="with-gtf"> | |
126 <expand macro="index_selection" with_gene_model="1" /> | |
127 </when> | |
128 <when value="without-gtf"> | |
129 <expand macro="index_selection" with_gene_model="0" /> | |
130 <expand macro="@SJDBOPTIONS@" optional="false" /> | |
131 </when> | |
132 </conditional> | |
133 </when> | |
134 <when value="history"> | |
135 <expand macro="ref_selection" /> | |
136 <expand macro="@SJDBOPTIONS@" optional="false"/> | |
137 </when> | |
138 </conditional> | |
139 | |
113 <section name="solo" title="Advanced Settings" expanded="true"> | 140 <section name="solo" title="Advanced Settings" expanded="true"> |
114 <conditional name="params"> | 141 <conditional name="params"> |
115 <param name="chemistry" type="select" label="Configure Chemistry Options"> | 142 <param name="chemistry" type="select" label="Configure Chemistry Options"> |
116 <option value="CR2" selected="true">Cell Ranger v2</option> | 143 <option value="CR2" selected="true">Cell Ranger v2</option> |
117 <option value="CR3">Cell Ranger v3</option> | 144 <option value="CR3">Cell Ranger v3</option> |
146 </inputs> | 173 </inputs> |
147 <outputs> | 174 <outputs> |
148 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out"> | 175 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out"> |
149 <expand macro="dbKeyActions" /> | 176 <expand macro="dbKeyActions" /> |
150 </data> | 177 </data> |
151 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" from_work_dir="Solo.out/genes.tsv" /> | 178 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" |
152 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" from_work_dir="Solo.out/barcodes.tsv" /> | 179 from_work_dir="Solo.out/Gene/filtered/features.tsv" /> |
153 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" from_work_dir="Solo.out/matrix.mtx" > | 180 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" |
181 from_work_dir="Solo.out/Gene/filtered/barcodes.tsv" /> | |
182 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" | |
183 from_work_dir="Solo.out/Gene/filtered/matrix.mtx" > | |
154 <filter>solo['soloFeatures'] == "Gene" </filter> | 184 <filter>solo['soloFeatures'] == "Gene" </filter> |
155 </data> | 185 <expand macro="dbKeyActions" /> |
156 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts" from_work_dir="Solo.out/matrixSJ.mtx" > | 186 </data> |
187 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts" | |
188 from_work_dir="Solo.out/Gene/filtered/matrixSJ.mtx" > | |
157 <filter>solo['soloFeatures'] == "SJ" </filter> | 189 <filter>solo['soloFeatures'] == "SJ" </filter> |
158 </data> | 190 <expand macro="dbKeyActions" /> |
159 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts" from_work_dir="Solo.out/matrixGeneFull.mtx" > | 191 </data> |
192 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts" | |
193 from_work_dir="Solo.out/Gene/filtered/matrixGeneFull.mtx" > | |
160 <filter>solo['soloFeatures'] == "GeneFull" </filter> | 194 <filter>solo['soloFeatures'] == "GeneFull" </filter> |
161 </data> | 195 <expand macro="dbKeyActions" /> |
162 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Feature Statistic Summaries" from_work_dir="Solo.out/Gene.stats" /> | 196 </data> |
197 <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" > | |
198 <expand macro="dbKeyActions" /> | |
199 </data> | |
200 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Feature Statistic Summaries" | |
201 from_work_dir="Solo.out/Gene/Features.stats" /> | |
163 </outputs> | 202 </outputs> |
203 <!-- Generating test data that is big enough for STARsolo to detect and small enough | |
204 for Galaxy to test requires careful modification of input FASTA and GTF data, | |
205 where the length of FASTA cannot exceed the largest position in the GTF file, | |
206 regardless of the FASTA starting sequence position. | |
207 | |
208 A full writeup of how to subset single cell data for use in STARsolo is given | |
209 here: https://gist.github.com/mtekman/149a7c52fd73e5d8ebe49f5a27b0743d | |
210 --> | |
164 <tests> | 211 <tests> |
165 <test expect_num_outputs="5"> | 212 <test expect_num_outputs="6"> |
166 <conditional name="input_types"> | 213 <conditional name="input_types"> |
167 <param name="use" value="repeat" /> | 214 <param name="use" value="repeat" /> |
168 <repeat name="input_repeats" > | 215 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> |
169 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | 216 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> |
170 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | 217 </conditional> |
171 </repeat> | 218 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> |
172 </conditional> | |
173 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" /> | |
174 <conditional name="refGenomeSource"> | 219 <conditional name="refGenomeSource"> |
175 <param name="geneSource" value="history" /> | 220 <param name="geneSource" value="history" /> |
176 <param name="genomeFastaFiles" value="SNORD83B.22.fa" /> | 221 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> |
177 <param name="genomeSAindexNbases" value="4" /> | 222 <param name="genomeSAindexNbases" value="4" /> |
178 <conditional name="GTFconditional"> | 223 <param name="sjdbOverhang" value="100" /> |
179 <param name="GTFselect" value="with-gtf" /> | 224 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> |
180 <param name="sjdbOverhang" value="75"/> | |
181 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/> | |
182 </conditional> | |
183 </conditional> | 225 </conditional> |
184 <section name="solo" > | 226 <section name="solo" > |
185 <conditional name="params"> | 227 <conditional name="params"> |
186 <param name="chemistry" value="CR2" /> | 228 <param name="chemistry" value="CR3" /> |
187 </conditional> | 229 </conditional> |
188 <param name="soloStrand" value="Forward" /> | 230 <param name="soloStrand" value="Forward" /> |
189 <param name="soloFeatures" value="Gene" /> | 231 <param name="soloFeatures" value="Gene" /> |
190 <param name="soloUMIdedup" value="1MM_All" /> | 232 <param name="soloUMIdedup" value="1MM_All" /> |
191 </section> | 233 </section> |
234 <output name="output_barcodes" > | |
235 <assert_contents> | |
236 <!-- first and last line --> | |
237 <has_line line="ACACCGGTCTAACGGT" /> | |
238 <has_line line="TTCTCAATCCACGTTC" /> | |
239 </assert_contents> | |
240 </output> | |
192 <output name="output_genes"> | 241 <output name="output_genes"> |
193 <assert_contents> | 242 <assert_contents> |
194 <has_line_matching expression="ENSG00000209480\sSNORD83B" /> | 243 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> |
244 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> | |
195 </assert_contents> | 245 </assert_contents> |
196 </output> | 246 </output> |
197 <output name="output_matrix" > | 247 <output name="output_matrix" > |
198 <assert_contents> | 248 <assert_contents> |
199 <has_line_matching expression="1\s137281\s0" /> | 249 <has_line_matching expression="14\s+7\s+7" /> |
250 <has_line_matching expression="4\s+7\s+1" /> | |
200 </assert_contents> | 251 </assert_contents> |
201 </output> | 252 </output> |
202 <output name="output_stats" > | 253 <output name="output_stats" > |
203 <assert_contents> | 254 <assert_contents> |
204 <has_line_matching expression="\s+nNoFeature\s+3253" /> | 255 <has_line_matching expression="\s+nUnmapped\s+5823" /> |
205 <has_line_matching expression="\s+nUMIs\s+0" /> | 256 <has_line_matching expression="\s+nUMIs\s+8" /> |
206 </assert_contents> | 257 </assert_contents> |
207 </output> | 258 </output> |
259 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> | |
208 </test> | 260 </test> |
209 <test expect_num_outputs="5"> | 261 <test expect_num_outputs="6"><!-- same as above, but using custom --> |
210 <conditional name="input_types"> | 262 <conditional name="input_types"> |
211 <param name="use" value="repeat" /> | 263 <param name="use" value="repeat" /> |
212 <repeat name="input_repeats" > | 264 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> |
213 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | 265 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> |
214 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | 266 </conditional> |
215 </repeat> | 267 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> |
216 </conditional> | |
217 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" /> | |
218 <conditional name="refGenomeSource"> | 268 <conditional name="refGenomeSource"> |
219 <param name="geneSource" value="history" /> | 269 <param name="geneSource" value="history" /> |
220 <param name="genomeFastaFiles" value="SNORD83B.22.fa" /> | 270 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> |
221 <param name="genomeSAindexNbases" value="4" /> | 271 <param name="genomeSAindexNbases" value="4" /> |
222 <conditional name="GTFconditional"> | 272 <param name="sjdbOverhang" value="100" /> |
223 <param name="GTFselect" value="with-gtf" /> | 273 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> |
224 <param name="sjdbOverhang" value="75" /> | |
225 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/> | |
226 </conditional> | |
227 </conditional> | 274 </conditional> |
228 <section name="solo" > | 275 <section name="solo" > |
229 <conditional name="params"> | 276 <conditional name="params"> |
230 <param name="chemistry" value="custom" /> | 277 <param name="chemistry" value="custom" /> |
231 <param name="soloCBstart" value="1" /> | 278 <param name="soloCBstart" value="1" /> |
232 <param name="soloCBlen" value="16" /> | 279 <param name="soloCBlen" value="16" /> |
233 <param name="soloUMIstart" value="17" /> | 280 <param name="soloUMIstart" value="17" /> |
234 <param name="soloUMIlen" value="10" /> | 281 <param name="soloUMIlen" value="12" /> |
235 </conditional> | 282 </conditional> |
236 <param name="soloStrand" value="Forward" /> | 283 <param name="soloStrand" value="Forward" /> |
237 <param name="soloFeatures" value="GeneFull" /> | 284 <param name="soloFeatures" value="Gene" /> |
238 <param name="soloUMIdedup" value="1MM_Directional" /> | 285 <param name="soloUMIdedup" value="1MM_All" /> |
239 </section> | 286 </section> |
240 <output name="output_barcodes" > | 287 <output name="output_barcodes" > |
241 <assert_contents> | 288 <assert_contents> |
242 <has_line line="TTTGTCATCTTAGAGC" /> | 289 <has_line line="ACACCGGTCTAACGGT" /> |
243 <has_line line="TTTGTCATCTTTCCTC" /> | 290 <has_line line="TTCTCAATCCACGTTC" /> |
244 </assert_contents> | 291 </assert_contents> |
245 </output> | 292 </output> |
293 <output name="output_genes"> | |
294 <assert_contents> | |
295 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> | |
296 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> | |
297 </assert_contents> | |
298 </output> | |
299 <output name="output_matrix" > | |
300 <assert_contents> | |
301 <has_line_matching expression="14\s+7\s+7" /> | |
302 <has_line_matching expression="4\s+7\s+1" /> | |
303 </assert_contents> | |
304 </output> | |
305 <output name="output_stats" > | |
306 <assert_contents> | |
307 <has_line_matching expression="\s+nUnmapped\s+5823" /> | |
308 <has_line_matching expression="\s+nUMIs\s+8" /> | |
309 </assert_contents> | |
310 </output> | |
311 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> | |
246 </test> | 312 </test> |
247 <test expect_num_outputs="5"> | 313 <test expect_num_outputs="6"><!-- Multiple repeats test --> |
248 <!-- Multiple repeats test --> | |
249 <conditional name="input_types"> | 314 <conditional name="input_types"> |
250 <param name="use" value="repeat" /> | 315 <param name="use" value="repeat" /> |
251 <repeat name="input_repeats" > | 316 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> |
252 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | 317 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> |
253 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | 318 </conditional> |
254 </repeat> | 319 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> |
255 <repeat name="input_repeats" > | |
256 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | |
257 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | |
258 </repeat> | |
259 <repeat name="input_repeats" > | |
260 <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | |
261 <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | |
262 </repeat> | |
263 </conditional> | |
264 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" /> | |
265 <conditional name="refGenomeSource"> | 320 <conditional name="refGenomeSource"> |
266 <param name="geneSource" value="history" /> | 321 <param name="geneSource" value="history" /> |
267 <param name="genomeFastaFiles" value="SNORD83B.22.fa" /> | 322 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> |
268 <param name="genomeSAindexNbases" value="4" /> | 323 <param name="genomeSAindexNbases" value="4" /> |
269 <conditional name="GTFconditional"> | 324 <param name="sjdbOverhang" value="100" /> |
270 <param name="GTFselect" value="with-gtf" /> | 325 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> |
271 <param name="sjdbOverhang" value="75" /> | |
272 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/> | |
273 </conditional> | |
274 </conditional> | 326 </conditional> |
275 <section name="solo" > | 327 <section name="solo" > |
276 <conditional name="params"> | 328 <conditional name="params"> |
277 <param name="chemistry" value="custom" /> | 329 <param name="chemistry" value="CR3" /> |
278 <param name="soloCBstart" value="1" /> | |
279 <param name="soloCBlen" value="16" /> | |
280 <param name="soloUMIstart" value="17" /> | |
281 <param name="soloUMIlen" value="10" /> | |
282 </conditional> | 330 </conditional> |
283 <param name="soloStrand" value="Forward" /> | 331 <param name="soloStrand" value="Forward" /> |
284 <param name="soloFeatures" value="GeneFull" /> | 332 <param name="soloFeatures" value="Gene" /> |
285 <param name="soloUMIdedup" value="1MM_Directional" /> | 333 <param name="soloUMIdedup" value="1MM_All" /> |
286 </section> | 334 </section> |
287 <output name="output_barcodes" > | 335 <output name="output_barcodes" > |
288 <assert_contents> | 336 <assert_contents> |
289 <has_line line="TTTGTCATCTTAGAGC" /> | 337 <has_line line="ACACCGGTCTAACGGT" /> |
290 <has_line line="TTTGTCATCTTTCCTC" /> | 338 <has_line line="TTCTCAATCCACGTTC" /> |
291 </assert_contents> | 339 </assert_contents> |
292 </output> | 340 </output> |
341 <!-- BAM output is huge, we don't need to test here --> | |
293 </test> | 342 </test> |
294 <test expect_num_outputs="5"> | 343 <test expect_num_outputs="6"> |
295 <!-- Same as the test before but with a collection of pairs --> | 344 <!-- Test with paired collection --> |
296 <conditional name="input_types"> | 345 <conditional name="input_types"> |
297 <param name="use" value="list_paired" /> | 346 <param name="use" value="list_paired" /> |
298 <param name="input_repeats" > | 347 <param name="input_collection" > |
299 <collection type="list:paired"> | 348 <collection type="paired"> |
300 <element name="Pair1"> | 349 <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> |
301 <collection type="paired"> | 350 <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> |
302 <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | |
303 <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | |
304 </collection> | |
305 </element> | |
306 <element name="Pair2"> | |
307 <collection type="paired"> | |
308 <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | |
309 <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> | |
310 </collection> | |
311 </element> | |
312 <!-- Planemo does not support more than 2 elements in a list of pairs --> | |
313 <!-- <element name="Pair3"> --> | |
314 <!-- <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> --> | |
315 <!-- <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> --> | |
316 <!-- </element> --> | |
317 </collection> | 351 </collection> |
318 </param> | 352 </param> |
319 </conditional> | 353 </conditional> |
320 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" /> | 354 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> |
321 <conditional name="refGenomeSource"> | 355 <conditional name="refGenomeSource"> |
322 <param name="geneSource" value="history" /> | 356 <param name="geneSource" value="history" /> |
323 <param name="genomeFastaFiles" value="SNORD83B.22.fa" /> | 357 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> |
324 <param name="genomeSAindexNbases" value="4" /> | 358 <param name="genomeSAindexNbases" value="4" /> |
325 <conditional name="GTFconditional"> | 359 <param name="sjdbOverhang" value="100" /> |
326 <param name="GTFselect" value="with-gtf" /> | 360 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> |
327 <param name="sjdbOverhang" value="75" /> | |
328 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/> | |
329 </conditional> | |
330 </conditional> | 361 </conditional> |
331 <section name="solo" > | 362 <section name="solo" > |
332 <conditional name="params"> | 363 <conditional name="params"> |
333 <param name="chemistry" value="custom" /> | 364 <param name="chemistry" value="CR3" /> |
334 <param name="soloCBstart" value="1" /> | |
335 <param name="soloCBlen" value="16" /> | |
336 <param name="soloUMIstart" value="17" /> | |
337 <param name="soloUMIlen" value="10" /> | |
338 </conditional> | 365 </conditional> |
339 <param name="soloStrand" value="Forward" /> | 366 <param name="soloStrand" value="Forward" /> |
340 <param name="soloFeatures" value="GeneFull" /> | 367 <param name="soloFeatures" value="Gene" /> |
341 <param name="soloUMIdedup" value="1MM_Directional" /> | 368 <param name="soloUMIdedup" value="1MM_All" /> |
342 </section> | 369 </section> |
343 <output name="output_barcodes" > | 370 <output name="output_barcodes" > |
344 <assert_contents> | 371 <assert_contents> |
345 <has_line line="TTTGTCATCTTAGAGC" /> | 372 <has_line line="ACACCGGTCTAACGGT" /> |
346 <has_line line="TTTGTCATCTTTCCTC" /> | 373 <has_line line="TTCTCAATCCACGTTC" /> |
347 </assert_contents> | 374 </assert_contents> |
348 </output> | 375 </output> |
376 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> | |
349 </test> | 377 </test> |
350 </tests> | 378 </tests> |
351 <help><![CDATA[ | 379 <help><![CDATA[ |
352 **What it does** | 380 **What it does** |
353 | 381 |
354 **STARSolo** is a turnkey solution for analyzing droplet single cell RNA sequencing data (e.g. 10X Genomics Chromium System) built directly into STAR code. STARsolo inputs the raw FASTQ reads files, and performs the following operations: | 382 **STARSolo** is a turnkey solution for analyzing droplet single cell RNA sequencing data (e.g. 10X Genomics Chromium System) built directly into STAR_ code. STARsolo takes raw FASTQ reads files as input, and performs the following operations: |
355 | 383 |
356 * Error correction and demultiplexing of cell barcodes using user-input whitelist | 384 * Error correction and demultiplexing of cell barcodes using user-input whitelist |
357 * Mapping the reads to the reference genome using the standard STAR spliced read alignment algorithm | 385 * Mapping the reads to the reference genome using the standard STAR spliced read alignment algorithm |
358 * Error correction and collapsing (deduplication) of Unique Molecular Identifiers (UMIs) | 386 * Error correction and collapsing (deduplication) of Unique Molecular Identifiers (UMIs) |
359 * Quantification of per-cell gene expression by counting the number of reads per gene | 387 * Quantification of per-cell gene expression by counting the number of reads per gene |
360 | 388 |
361 STARsolo output is designed to be a drop-in replacement for 10X CellRanger gene quantification output. It follows CellRanger logic for cell barcode whitelisting and UMI deduplication, and produces nearly identical gene counts in the same format. At the same time STARsolo is 10 times faster than CellRanger. | 389 STARsolo output is designed to be a drop-in replacement for 10X CellRanger gene quantification output. It follows CellRanger logic for cell barcode whitelisting and UMI deduplication, and produces nearly identical gene counts in the same format. At the same time STARsolo is 10 times faster than CellRanger. |
362 | 390 |
391 .. _STAR: https://github.com/alexdobin/STAR | |
363 ]]></help> | 392 ]]></help> |
364 <expand macro="citations"/> | 393 <expand macro="citations"/> |
365 </tool> | 394 </tool> |