comparison rg_rnaStarSolo.xml @ 9:ec9cbd6b9a49 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 00c545ddbf0f008903f4b4c11d476e6089c3f531"
author iuc
date Fri, 15 Jan 2021 17:39:11 +0000
parents 00fbfac99d39
children a6fba3d92531
comparison
equal deleted inserted replaced
8:00fbfac99d39 9:ec9cbd6b9a49
15 <command><![CDATA[ 15 <command><![CDATA[
16 @TEMPINDEX@ 16 @TEMPINDEX@
17 STAR 17 STAR
18 @REFGENOMEHANDLING@ 18 @REFGENOMEHANDLING@
19 19
20 --readFilesIn 20 ## Supports Drop-seq, 10X Chromium, inDrop and Smart-Seq
21 ## Check that the input pairs are of the same type 21 --soloType $sc.solo_type
22 ## otherwise STARsolo will run for a long time and then error out. 22
23 ## We consume either repeats of two inputs R1 + R2 23 #if str($sc.solo_type) == "CB_UMI_Simple":
24 ## or a collection of paired reads. 24 @READSHANDLING@
25 25 --soloCBwhitelist '$sc.soloCBwhitelist'
26 #if str($input_types.use) == "repeat":
27 #set $reads1 = []
28 #set $reads2 = []
29 #for $r1, $r2 in zip($input_types.input1, $input_types.input2):
30 #assert $r1.datatype == $r2.datatype
31 #silent $reads1.append(str($r1))
32 #silent $reads2.append(str($r2))
33 #end for
34 #set $reads1 = ','.join($reads1)
35 #set $reads2 = ','.join($reads2)
36 #elif str($input_types.use) == "list_paired":
37 #set $r1 = $input_types.input_collection.forward
38 #set $r2 = $input_types.input_collection.reverse
39 #set $reads1 = $r1
40 #set $reads2 = $r2
41 #end if
42
43 ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1]
44 ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs
45 $reads2 $reads1
46
47 #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'):
48 @FASTQ_GZ_OPTION@
49 #end if
50
51 ## Droplet is the only mode available for now
52 --soloType Droplet
53
54 ## 1 - check length of barcode, 0 - do not check 26 ## 1 - check length of barcode, 0 - do not check
55 ## Good for checking custom chemistries 27 ## Good for checking custom chemistries
56 --soloCBwhitelist '$soloCBwhitelist' 28 --soloBarcodeReadLength $sc.soloBarcodeReadLength
57 --soloBarcodeReadLength $solo.soloBarcodeReadLength 29 #if str($sc.params.chemistry) == "CR2":
58
59 #if str($solo.params.chemistry) == "CR2":
60 --soloCBstart 1 30 --soloCBstart 1
61 --soloCBlen 16 31 --soloCBlen 16
62 --soloUMIstart 17 32 --soloUMIstart 17
63 --soloUMIlen 10 33 --soloUMIlen 10
64 #else if str($solo.params.chemistry) == "CR3": 34 #else if str($sc.params.chemistry) == "CR3":
65 --soloCBstart 1 35 --soloCBstart 1
66 --soloCBlen 16 36 --soloCBlen 16
67 --soloUMIstart 17 37 --soloUMIstart 17
68 --soloUMIlen 12 38 --soloUMIlen 12
69 #else if str($solo.params.chemistry) == "custom": 39 #else if str($sc.params.chemistry) == "custom":
70 --soloCBstart $solo.params.soloCBstart 40 --soloCBstart $sc.params.soloCBstart
71 --soloCBlen $solo.params.soloCBlen 41 --soloCBlen $sc.params.soloCBlen
72 --soloUMIstart $solo.params.soloUMIstart 42 --soloUMIstart $sc.params.soloUMIstart
73 --soloUMIlen $solo.params.soloUMIlen 43 --soloUMIlen $sc.params.soloUMIlen
74 #end if 44 #end if
75 45
46 #elif str($sc.solo_type) == "CB_UMI_Complex":
47 @READSHANDLING@
48 ## inDrop supports multiple cell barcodes of varying length
49 #set $cb_whitelist = []
50 #set $cb_pos = []
51 #for $cb in $sc.cb_whitelists:
52 #silent $cb_whitelist.append(str($cb.whitelist_file))
53 #silent $cb_pos.append('_'.join([str($cb.cb_start_anchor), str($cb.cb_start_anchor_pos),str($cb.cb_end_anchor), str($cb.cb_end_anchor_pos)]))
54 #end for
55 #set $cb_whitelist = ' '.join($cb_whitelist)
56 --soloCBwhitelist $cb_whitelist
57 #set $cb_pos = ' '.join($cb_pos)
58 --soloCBposition $cb_pos
59 #set $umi_pos = '_'.join([str($sc.umi_start_anchor), str($sc.umi_start_anchor_pos), str($sc.umi_end_anchor), str($sc.umi_end_anchor_pos)])
60 --soloUMIposition $umi_pos
61 --soloAdapterSequence $sc.soloAdapterSequence
62 --soloAdapterMismatchesNmax $sc.soloAdapterMismatchesNmax
63
64 #elif str($sc.solo_type) == "SmartSeq":
65 ## Create a manifest file with fastq files and their corresponding cell-ids
66 ## For Smart-Seq [R1] is followed by [R2]
67 --readFilesManifest '$manifest_file'
68 #set $read_files_command = ""
69 #if str($sc.input_types_smart_seq.use) == "list_single_end":
70 #if $sc.input_types_smart_seq.single_end_collection[0].is_of_type('fastq.gz', 'fastqsanger.gz'):
71 @FASTQ_GZ_OPTION@
72 #end if
73 #elif str($sc.input_types_smart_seq.use) == "list_paired_end":
74 #if $sc.input_types_smart_seq.paired_end_collection[0].forward.is_of_type('fastq.gz', 'fastqsanger.gz'):
75 @FASTQ_GZ_OPTION@
76 #end if
77 #end if
78 --soloCBwhitelist None
79 #end if
80
81 --soloUMIfiltering $solo.soloUMIfiltering
76 --soloStrand $solo.soloStrand 82 --soloStrand $solo.soloStrand
77 --soloFeatures $solo.soloFeatures 83 --soloFeatures $solo.soloFeatures
78 --soloUMIdedup $solo.soloUMIdedup 84 --soloUMIdedup $sc.soloUMIdedup
79 --quantMode TranscriptomeSAM 85 --quantMode TranscriptomeSAM
80 --outSAMtype BAM Unsorted 86 --outSAMtype BAM Unsorted
87
88 #if str($solo.filter.filter_type) == "cellranger2":
89 --soloCellFilter CellRanger2.2 $solo.filter.n_expected $solo.filter.max_perc $solo.filter.max_min_ratio
90 #else if str($solo.filter.filter_type) == "topcells":
91 --soloCellFilter TopCells $solo.filter.n_cells
92 #else if str($solo.filter.filter_type) == "no_filter":
93 --soloCellFilter None
94 #end if
95 ## Splice junctions are always under "raw" directory
96
97 --soloOutFormatFeaturesGeneField3 '${solo.soloOutFormatFeaturesGeneField3}'
98 ## Rename the the selected features directory
99 && mv Solo.out/${solo.soloFeatures} Solo.out/soloFeatures
100 ## put the barcodes and features stats into a single file
101 && cat <(echo "Barcodes:") Solo.out/Barcodes.stats <(echo "Genes:") Solo.out/soloFeatures/Features.stats > '${output_stats}'
81 102
82 ## BAM sorting (logic copied from samtools_sort wrapper) 103 ## BAM sorting (logic copied from samtools_sort wrapper)
83 ## choosing BAM SortedByCoord appeared once to give fewer reads 104 ## choosing BAM SortedByCoord appeared once to give fewer reads
84 ## than BAM Unsorted followed by a samtools sort 105 ## than BAM Unsorted followed by a samtools sort
85 ## so better go with the latter? 106 ## so better go with the latter?
90 ##compute the number of memory available to samtools sort (-m) 111 ##compute the number of memory available to samtools sort (-m)
91 ##use only 75% of available: https://github.com/samtools/samtools/issues/831 112 ##use only 75% of available: https://github.com/samtools/samtools/issues/831
92 addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && 113 addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
93 ((addmemory=addmemory*75/100)) && 114 ((addmemory=addmemory*75/100)) &&
94 samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam 115 samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam
95
96 ]]></command> 116 ]]></command>
117 <configfiles>
118 <configfile name="manifest_file" >
119 #if str($sc.solo_type) == "SmartSeq":
120 #set $cellids_fh = open(str($sc.cell_ids), 'r')
121 #set $cellids = [str(x.strip()) for x in $cellids_fh.readlines()]
122 #silent $cellids_fh.close()
123 #set $samples = []
124 #if str($sc.input_types_smart_seq.use) == "list_single_end":
125 #assert len($cellids) == len($sc.input_types_smart_seq.single_end_collection.keys())
126 #for $i,$r1 in enumerate($sc.input_types_smart_seq.single_end_collection):
127 #silent $samples.append('\t'.join([str($r1), '-', 'ID:' + $cellids[$i]]))
128 #end for
129 #elif str($sc.input_types_smart_seq.use) == "list_paired_end":
130 #assert len($cellids) == len($sc.input_types_smart_seq.paired_end_collection.keys())
131 #for $i,($r1,$r2) in enumerate($sc.input_types_smart_seq.paired_end_collection):
132 #silent $samples.append('\t'.join([str($r1), str($r2), 'ID:' + $cellids[$i]]))
133 #end for
134 #end if
135 #echo '\n'.join($samples)
136 #end if
137 </configfile>
138 </configfiles>
97 <inputs> 139 <inputs>
98 <conditional name="input_types" >
99 <param name="use" type="select" label="Input Type" >
100 <option value="repeat" >Separate barcode and cDNA reads</option>
101 <option value="list_paired" >Paired collection of barcode and cDNA reads</option>
102 </param>
103 <when value="repeat">
104 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true"
105 label="RNA-Seq FASTQ/FASTA file, Barcode reads" />
106 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true"
107 label="RNA-Seq FASTQ/FASTA file, cDNA reads"/>
108 </when>
109 <when value="list_paired">
110 <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" />
111 </when>
112 </conditional>
113 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" />
114
115 <!-- Genome source. --> 140 <!-- Genome source. -->
116 <conditional name="refGenomeSource"> 141 <conditional name="refGenomeSource">
117 <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options"> 142 <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">
118 <option value="indexed" selected="true">Use a built-in index</option> 143 <option value="indexed" selected="true">Use a built-in index</option>
119 <option value="history">Use reference genome from history and create temporary index</option> 144 <option value="history">Use reference genome from history and create temporary index</option>
138 <when value="history"> 163 <when value="history">
139 <expand macro="ref_selection" /> 164 <expand macro="ref_selection" />
140 <expand macro="@SJDBOPTIONS@" optional="false"/> 165 <expand macro="@SJDBOPTIONS@" optional="false"/>
141 </when> 166 </when>
142 </conditional> 167 </conditional>
143 168 <conditional name="sc" >
169 <param name="solo_type" type="select" label="Type of single-cell RNA-seq" >
170 <option value="CB_UMI_Simple">Drop-seq or 10X Chromium</option>
171 <option value="CB_UMI_Complex">inDrop</option>
172 <option value="SmartSeq">Smart-Seq</option>
173 </param>
174 <when value="CB_UMI_Simple">
175 <expand macro="input_selection" />
176 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist"/>
177 <conditional name="params" >
178 <param name="chemistry" type="select" label="Configure Chemistry Options">
179 <option value="CR2" selected="true">Cell Ranger v2</option>
180 <option value="CR3">Cell Ranger v3</option>
181 <option value="custom">Custom</option>
182 </param>
183 <when value="CR2" />
184 <when value="CR3" />
185 <when value="custom" >
186 <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" />
187 <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" />
188 <param argument="--soloUMIstart" type="integer" min="1" value="17" label="UMI Start Base" />
189 <param argument="--soloUMIlen" type="integer" min="1" value="10" label="UMI Length" />
190 </when>
191 </conditional>
192 <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." />
193 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
194 <expand macro="umidedup_options" />
195 <option value="Exact" >Exact</option>
196 </param>
197 <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed
198 CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.">
199 <expand macro="cb_match_wl_common" />
200 <expand macro="cb_match_wl_cellranger" />
201 </param>
202 </when>
203 <when value="CB_UMI_Complex">
204 <expand macro="input_selection" />
205 <repeat name="cb_whitelists" title="Cell barcode whitelist information" max="2" >
206 <param name="whitelist_file" format="txt,tsv" type="data" label="RNA-Seq Cell Barcode Whitelist"/>
207 <param name="cb_start_anchor" type="select" label="Start anchor base for cell barcode">
208 <expand macro="anchor_types" />
209 </param>
210 <param name="cb_start_anchor_pos" type="integer" value="0" label="0-based position of the CB start with respect to the anchor base" />
211 <param name="cb_end_anchor" type="select" label="End anchor base for cell barcode">
212 <expand macro="anchor_types" />
213 </param>
214 <param name="cb_end_anchor_pos" type="integer" value="0" label="0-based position of the CB end with respect to the anchor base" />
215 </repeat>
216 <param name="umi_start_anchor" type="select" label="Start anchor base for UMI">
217 <expand macro="anchor_types" />
218 </param>
219 <param name="umi_start_anchor_pos" type="integer" value="0" label="0-based position of the UMI start with respect to the anchor base" />
220 <param name="umi_end_anchor" type="select" label="End anchor base for UMI">
221 <expand macro="anchor_types" />
222 </param>
223 <param name="umi_end_anchor_pos" type="integer" value="0" label="0-based position of the UMI end with respect to the anchor base" />
224 <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." />
225 <param argument="--soloAdapterMismatchesNmax" type="integer" min="1" value="1" label="Maximum number of mismatches allowed in adapter sequence" />
226 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
227 <expand macro="umidedup_options" />
228 <option value="Exact" >Exact</option>
229 </param>
230 <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed
231 CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.">
232 <expand macro="cb_match_wl_common" />
233 </param>
234 </when>
235 <when value="SmartSeq">
236 <expand macro="input_selection_smart_seq" />
237 <param name="cell_ids" type="data" label="File containing cell IDs of the samples. One ID per line in order of samples in the above collection."/>
238 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
239 <option value="Exact" >Exact</option>
240 <option value="NoDedup">Do not deduplicate UMIs</option>
241 </param>
242 </when>
243 </conditional>
144 <section name="solo" title="Advanced Settings" expanded="true"> 244 <section name="solo" title="Advanced Settings" expanded="true">
145 <conditional name="params">
146 <param name="chemistry" type="select" label="Configure Chemistry Options">
147 <option value="CR2" selected="true">Cell Ranger v2</option>
148 <option value="CR3">Cell Ranger v3</option>
149 <option value="custom">Custom</option>
150 </param>
151 <when value="CR2" />
152 <when value="CR3" />
153 <when value="custom" >
154 <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" />
155 <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" />
156 <param argument="--soloUMIstart" type="integer" min="1" value="17" label="UMI Start Base" />
157 <param argument="--soloUMIlen" type="integer" min="1" value="10" label="UMI Length" />
158 </when>
159 </conditional>
160 <param argument="--soloStrand" type="select" label="Strandedness of Library" help="Unstranded has no strand information, Forward has the read strand the same as the original RNA molecule, Reverse has the read strand opposite to the original RNA molecule"> 245 <param argument="--soloStrand" type="select" label="Strandedness of Library" help="Unstranded has no strand information, Forward has the read strand the same as the original RNA molecule, Reverse has the read strand opposite to the original RNA molecule">
161 <option value="Unstranded" /> 246 <option value="Unstranded" />
162 <option value="Forward" selected="true" /> 247 <option value="Forward" selected="true" />
163 <option value="Reverse" /> 248 <option value="Reverse" />
164 </param> 249 </param>
165 <param argument="--soloFeatures" type="select" label="Collect UMI counts for these genomic features" > 250 <param argument="--soloFeatures" type="select" label="Collect UMI counts for these genomic features" >
166 <option value="Gene" selected="true">Gene: Count reads matching the Gene Transcript</option> 251 <option value="Gene" selected="true">Gene: Count reads matching the Gene Transcript</option>
167 <option value="SJ" >Splice Junctions: Count reads at exon-intron junctions</option> 252 <option value="SJ" >Splice Junctions: Count reads at exon-intron junctions</option>
168 <option value="GeneFull" >Full: Count all reads overlapping genes' exons and introns</option> 253 <option value="GeneFull" >Full: Count all reads overlapping genes' exons and introns</option>
169 </param> 254 </param>
170 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, None has UMIs with 1 mismatch distance to others not collapsed"> 255 <param argument="--soloUMIfiltering" type="select" label="Type of UMI filtering" >
171 <option value="1MM_All" selected="true">All</option> 256 <option value="-" selected="true">Remove UMIs with N and homopolymers (similar to CellRanger 2.2.0)</option>
172 <option value="1MM_Directional" >Directional</option> 257 <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene ((introduced in CellRanger 3.x.x)</option>
173 <option value="1MM_NotCollapsed" >None</option>
174 </param> 258 </param>
175 <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." /> 259 <conditional name="filter" >
260 <param name="filter_type" type="select" label="Cell filtering type and parameters" >
261 <option value="cellranger2" selected="true" >Simple filtering of CellRanger v2</option>
262 <option value="topcells" >Filter top N cells</option>
263 <option value="no_filter" >Do not filter</option>
264 </param>
265 <when value="cellranger2" >
266 <param name="n_expected" type="integer" min="1" value="3000" label="Number of expected cells" />
267 <param name="max_perc" type="float" min="0" max="1" value="0.99" label="Robust maximum percentile for UMI count" />
268 <param name="max_min_ratio" type="float" min="1" value="10" label="Maximum to minimum ratio for UMI count" />
269 </when>
270 <when value="topcells" >
271 <param name="n_cells" type="integer" min="1" value="3000" label="Number of top cells to report sorted by UMI count" />
272 </when>
273 <when value="no_filter" />
274 </conditional>
275 <param argument="--soloOutFormatFeaturesGeneField3" type="text" value="Gene Expression" label="Field 3 in the Genes output." help="Input '-' to remove the 3rd column from the output." />
176 </section> 276 </section>
177 </inputs> 277 </inputs>
178 <outputs> 278 <outputs>
179 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out"> 279 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out">
180 <expand macro="dbKeyActions" /> 280 <expand macro="dbKeyActions" />
181 </data> 281 </data>
182 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" 282 <!--
183 from_work_dir="Solo.out/Gene/filtered/features.tsv" /> 283 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" />
184 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" 284 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" />
185 from_work_dir="Solo.out/Gene/filtered/barcodes.tsv" /> 285 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" >
186 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts"
187 from_work_dir="Solo.out/Gene/filtered/matrix.mtx" >
188 <filter>solo['soloFeatures'] == "Gene" </filter>
189 <expand macro="dbKeyActions" /> 286 <expand macro="dbKeyActions" />
190 </data> 287 </data>
288 -->
289 <!-- soloCellFilter set to None, if SJ is selected for soloFeatures -->
290 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes raw"
291 from_work_dir="Solo.out/soloFeatures/raw/features.tsv" >
292 <filter>solo['filter']['filter_type'] == "no_filter" or solo['soloFeatures'] == "SJ" </filter>
293 </data>
294 <data format="tsv" name="output_genes_filtered" label="${tool.name} on ${on_string}: Genes filtered"
295 from_work_dir="Solo.out/soloFeatures/filtered/features.tsv" >
296 <filter>solo['filter']['filter_type'] != "no_filter" and solo['soloFeatures'] != "SJ" </filter>
297 </data>
298 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes raw"
299 from_work_dir="Solo.out/soloFeatures/raw/barcodes.tsv" >
300 <filter>solo['filter']['filter_type'] == "no_filter" or solo['soloFeatures'] == "SJ" </filter>
301 </data>
302 <data format="tsv" name="output_barcodes_filtered" label="${tool.name} on ${on_string}: Barcodes filtered"
303 from_work_dir="Solo.out/soloFeatures/filtered/barcodes.tsv" >
304 <filter>solo['filter']['filter_type'] != "no_filter" and solo['soloFeatures'] != "SJ" </filter>
305 </data>
306 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts raw"
307 from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" >
308 <filter>solo['soloFeatures'] == "Gene" and solo['filter']['filter_type'] == "no_filter" </filter>
309 <expand macro="dbKeyActions" />
310 </data>
311 <data format="mtx" name="output_matrix_filtered" label="${tool.name} on ${on_string}: Matrix Gene Counts filtered"
312 from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" >
313 <filter>solo['soloFeatures'] == "Gene" and solo['filter']['filter_type'] != "no_filter" </filter>
314 <expand macro="dbKeyActions" />
315 </data>
191 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts" 316 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts"
192 from_work_dir="Solo.out/Gene/filtered/matrixSJ.mtx" > 317 from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" >
193 <filter>solo['soloFeatures'] == "SJ" </filter> 318 <filter>solo['soloFeatures'] == "SJ" </filter>
194 <expand macro="dbKeyActions" /> 319 <expand macro="dbKeyActions" />
195 </data> 320 </data>
196 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts" 321 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts raw"
197 from_work_dir="Solo.out/Gene/filtered/matrixGeneFull.mtx" > 322 from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" >
198 <filter>solo['soloFeatures'] == "GeneFull" </filter> 323 <filter>solo['soloFeatures'] == "GeneFull" and solo['filter']['filter_type'] == "no_filter" </filter>
324 <expand macro="dbKeyActions" />
325 </data>
326 <data format="mtx" name="output_matrixGeneFull_filtered" label="${tool.name} on ${on_string}: Matrix Full Gene Counts filtered"
327 from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" >
328 <filter>solo['soloFeatures'] == "GeneFull" and solo['filter']['filter_type'] != "no_filter" </filter>
199 <expand macro="dbKeyActions" /> 329 <expand macro="dbKeyActions" />
200 </data> 330 </data>
201 <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" > 331 <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" >
202 <expand macro="dbKeyActions" /> 332 <expand macro="dbKeyActions" />
203 </data> 333 </data>
204 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Feature Statistic Summaries" 334 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Barcode/Feature Statistic Summaries"/>
205 from_work_dir="Solo.out/Gene/Features.stats" />
206 </outputs> 335 </outputs>
207 <!-- Generating test data that is big enough for STARsolo to detect and small enough 336 <!-- Generating test data that is big enough for STARsolo to detect and small enough
208 for Galaxy to test requires careful modification of input FASTA and GTF data, 337 for Galaxy to test requires careful modification of input FASTA and GTF data,
209 where the length of FASTA cannot exceed the largest position in the GTF file, 338 where the length of FASTA cannot exceed the largest position in the GTF file,
210 regardless of the FASTA starting sequence position. 339 regardless of the FASTA starting sequence position.
212 A full writeup of how to subset single cell data for use in STARsolo is given 341 A full writeup of how to subset single cell data for use in STARsolo is given
213 here: https://gist.github.com/mtekman/149a7c52fd73e5d8ebe49f5a27b0743d 342 here: https://gist.github.com/mtekman/149a7c52fd73e5d8ebe49f5a27b0743d
214 --> 343 -->
215 <tests> 344 <tests>
216 <test expect_num_outputs="6"> 345 <test expect_num_outputs="6">
217 <conditional name="input_types">
218 <param name="use" value="repeat" />
219 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
220 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
221 </conditional>
222 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
223 <conditional name="refGenomeSource"> 346 <conditional name="refGenomeSource">
224 <param name="geneSource" value="history" /> 347 <param name="geneSource" value="history" />
225 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> 348 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
226 <param name="genomeSAindexNbases" value="4" /> 349 <param name="genomeSAindexNbases" value="4" />
227 <param name="sjdbOverhang" value="100" /> 350 <param name="sjdbOverhang" value="100" />
228 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> 351 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
229 </conditional> 352 </conditional>
230 <section name="solo" > 353 <conditional name="sc" >
354 <param name="solo_type" value="CB_UMI_Simple" />
355 <conditional name="input_types">
356 <param name="use" value="repeat" />
357 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
358 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
359 </conditional>
360 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
231 <conditional name="params"> 361 <conditional name="params">
232 <param name="chemistry" value="CR3" /> 362 <param name="chemistry" value="CR3" />
233 </conditional> 363 </conditional>
364 <param name="soloUMIdedup" value="1MM_All" />
365 </conditional>
366 <section name="solo" >
367 <conditional name="filter">
368 <param name="filter_type" value="no_filter" />
369 </conditional>
234 <param name="soloStrand" value="Forward" /> 370 <param name="soloStrand" value="Forward" />
235 <param name="soloFeatures" value="Gene" /> 371 <param name="soloFeatures" value="Gene" />
236 <param name="soloUMIdedup" value="1MM_All" />
237 </section> 372 </section>
238 <output name="output_barcodes" > 373 <output name="output_barcodes" >
239 <assert_contents> 374 <assert_contents>
240 <!-- first and last line --> 375 <!-- first and last line -->
241 <has_line line="ACACCGGTCTAACGGT" /> 376 <has_line line="AAACCTGAGCGCTCCA" />
242 <has_line line="TTCTCAATCCACGTTC" /> 377 <has_line line="TTTGGTTAGTGGGCTA" />
243 </assert_contents> 378 </assert_contents>
244 </output> 379 </output>
245 <output name="output_genes"> 380 <output name="output_genes">
246 <assert_contents> 381 <assert_contents>
247 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> 382 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
248 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> 383 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
249 </assert_contents> 384 </assert_contents>
250 </output> 385 </output>
251 <output name="output_matrix" > 386 <output name="output_matrix" >
252 <assert_contents> 387 <assert_contents>
253 <has_line_matching expression="14\s+7\s+7" /> 388 <has_line_matching expression="14\s+394\s+7" />
254 <has_line_matching expression="4\s+7\s+1" /> 389 <has_line_matching expression="4\s+381\s+1" />
255 </assert_contents> 390 </assert_contents>
256 </output> 391 </output>
257 <output name="output_stats" > 392 <output name="output_stats" >
258 <assert_contents> 393 <assert_contents>
259 <has_line_matching expression="\s+nUnmapped\s+5823" /> 394 <has_line_matching expression="\s+nUnmapped\s+5823" />
261 </assert_contents> 396 </assert_contents>
262 </output> 397 </output>
263 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> 398 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
264 </test> 399 </test>
265 <test expect_num_outputs="6"><!-- same as above, but using custom --> 400 <test expect_num_outputs="6"><!-- same as above, but using custom -->
266 <conditional name="input_types">
267 <param name="use" value="repeat" />
268 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
269 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
270 </conditional>
271 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
272 <conditional name="refGenomeSource"> 401 <conditional name="refGenomeSource">
273 <param name="geneSource" value="history" /> 402 <param name="geneSource" value="history" />
274 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> 403 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
275 <param name="genomeSAindexNbases" value="4" /> 404 <param name="genomeSAindexNbases" value="4" />
276 <param name="sjdbOverhang" value="100" /> 405 <param name="sjdbOverhang" value="100" />
277 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> 406 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
278 </conditional> 407 </conditional>
279 <section name="solo" > 408 <conditional name="sc" >
409 <param name="solo_type" value="CB_UMI_Simple" />
410 <conditional name="input_types">
411 <param name="use" value="repeat" />
412 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
413 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
414 </conditional>
415 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
280 <conditional name="params"> 416 <conditional name="params">
281 <param name="chemistry" value="custom" /> 417 <param name="chemistry" value="custom" />
282 <param name="soloCBstart" value="1" /> 418 <param name="soloCBstart" value="1" />
283 <param name="soloCBlen" value="16" /> 419 <param name="soloCBlen" value="16" />
284 <param name="soloUMIstart" value="17" /> 420 <param name="soloUMIstart" value="17" />
285 <param name="soloUMIlen" value="12" /> 421 <param name="soloUMIlen" value="12" />
286 </conditional> 422 </conditional>
423 <param name="soloUMIdedup" value="1MM_All" />
424 </conditional>
425 <section name="solo" >
287 <param name="soloStrand" value="Forward" /> 426 <param name="soloStrand" value="Forward" />
288 <param name="soloFeatures" value="Gene" /> 427 <param name="soloFeatures" value="Gene" />
289 <param name="soloUMIdedup" value="1MM_All" />
290 </section> 428 </section>
291 <output name="output_barcodes" > 429 <output name="output_barcodes_filtered" >
292 <assert_contents> 430 <assert_contents>
293 <has_line line="ACACCGGTCTAACGGT" /> 431 <has_line line="ACACCGGTCTAACGGT" />
294 <has_line line="TTCTCAATCCACGTTC" /> 432 <has_line line="TTCTCAATCCACGTTC" />
295 </assert_contents> 433 </assert_contents>
296 </output> 434 </output>
297 <output name="output_genes"> 435 <output name="output_genes_filtered">
298 <assert_contents> 436 <assert_contents>
299 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> 437 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
300 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> 438 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
301 </assert_contents> 439 </assert_contents>
302 </output> 440 </output>
303 <output name="output_matrix" > 441 <output name="output_matrix_filtered" >
304 <assert_contents> 442 <assert_contents>
305 <has_line_matching expression="14\s+7\s+7" /> 443 <has_line_matching expression="14\s+7\s+7" />
306 <has_line_matching expression="4\s+7\s+1" /> 444 <has_line_matching expression="4\s+7\s+1" />
307 </assert_contents> 445 </assert_contents>
308 </output> 446 </output>
313 </assert_contents> 451 </assert_contents>
314 </output> 452 </output>
315 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> 453 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
316 </test> 454 </test>
317 <test expect_num_outputs="6"><!-- Multiple repeats test --> 455 <test expect_num_outputs="6"><!-- Multiple repeats test -->
318 <conditional name="input_types">
319 <param name="use" value="repeat" />
320 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
321 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
322 </conditional>
323 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
324 <conditional name="refGenomeSource"> 456 <conditional name="refGenomeSource">
325 <param name="geneSource" value="history" /> 457 <param name="geneSource" value="history" />
326 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> 458 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
327 <param name="genomeSAindexNbases" value="4" /> 459 <param name="genomeSAindexNbases" value="4" />
328 <param name="sjdbOverhang" value="100" /> 460 <param name="sjdbOverhang" value="100" />
329 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> 461 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
330 </conditional> 462 </conditional>
331 <section name="solo" > 463 <conditional name="sc" >
464 <param name="solo_type" value="CB_UMI_Simple" />
465 <conditional name="input_types">
466 <param name="use" value="repeat" />
467 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
468 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
469 </conditional>
470 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
332 <conditional name="params"> 471 <conditional name="params">
333 <param name="chemistry" value="CR3" /> 472 <param name="chemistry" value="CR3" />
334 </conditional> 473 </conditional>
474 <param name="soloUMIdedup" value="1MM_All" />
475 </conditional>
476 <section name="solo" >
335 <param name="soloStrand" value="Forward" /> 477 <param name="soloStrand" value="Forward" />
336 <param name="soloFeatures" value="Gene" /> 478 <param name="soloFeatures" value="Gene" />
337 <param name="soloUMIdedup" value="1MM_All" />
338 </section> 479 </section>
339 <output name="output_barcodes" > 480 <output name="output_barcodes_filtered" >
340 <assert_contents> 481 <assert_contents>
341 <has_line line="ACACCGGTCTAACGGT" /> 482 <has_line line="ACACCGGTCTAACGGT" />
342 <has_line line="TTCTCAATCCACGTTC" /> 483 <has_line line="TTCTCAATCCACGTTC" />
343 </assert_contents> 484 </assert_contents>
344 </output> 485 </output>
345 <!-- BAM output is huge, we don't need to test here --> 486 <!-- BAM output is huge, we don't need to test here -->
346 </test> 487 </test>
347 <test expect_num_outputs="6"> 488 <test expect_num_outputs="6">
348 <!-- Test with paired collection --> 489 <!-- Test with paired collection -->
349 <conditional name="input_types">
350 <param name="use" value="list_paired" />
351 <param name="input_collection" >
352 <collection type="paired">
353 <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
354 <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
355 </collection>
356 </param>
357 </conditional>
358 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
359 <conditional name="refGenomeSource"> 490 <conditional name="refGenomeSource">
360 <param name="geneSource" value="history" /> 491 <param name="geneSource" value="history" />
361 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> 492 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
362 <param name="genomeSAindexNbases" value="4" /> 493 <param name="genomeSAindexNbases" value="4" />
363 <param name="sjdbOverhang" value="100" /> 494 <param name="sjdbOverhang" value="100" />
364 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> 495 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
365 </conditional> 496 </conditional>
366 <section name="solo" > 497 <conditional name="sc" >
498 <param name="solo_type" value="CB_UMI_Simple" />
499 <conditional name="input_types">
500 <param name="use" value="list_paired" />
501 <param name="input_collection" >
502 <collection type="paired">
503 <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
504 <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
505 </collection>
506 </param>
507 </conditional>
508 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
367 <conditional name="params"> 509 <conditional name="params">
368 <param name="chemistry" value="CR3" /> 510 <param name="chemistry" value="CR3" />
369 </conditional> 511 </conditional>
512 <param name="soloUMIdedup" value="1MM_All" />
513 </conditional>
514 <section name="solo" >
370 <param name="soloStrand" value="Forward" /> 515 <param name="soloStrand" value="Forward" />
371 <param name="soloFeatures" value="Gene" /> 516 <param name="soloFeatures" value="Gene" />
372 <param name="soloUMIdedup" value="1MM_All" />
373 </section> 517 </section>
374 <output name="output_barcodes" > 518 <output name="output_barcodes_filtered" >
375 <assert_contents> 519 <assert_contents>
376 <has_line line="ACACCGGTCTAACGGT" /> 520 <has_line line="ACACCGGTCTAACGGT" />
377 <has_line line="TTCTCAATCCACGTTC" /> 521 <has_line line="TTCTCAATCCACGTTC" />
378 </assert_contents> 522 </assert_contents>
379 </output> 523 </output>
380 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> 524 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
525 </test>
526 <test expect_num_outputs="6">
527 <!-- Test soloFeatures, soloCBmatchWLtype, soloCellFilter, soloOutFormatFeaturesGeneField3, soloUMIfiltering -->
528 <conditional name="refGenomeSource">
529 <param name="geneSource" value="history" />
530 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
531 <param name="genomeSAindexNbases" value="4" />
532 <param name="sjdbOverhang" value="100" />
533 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
534 </conditional>
535 <conditional name="sc" >
536 <param name="solo_type" value="CB_UMI_Simple" />
537 <conditional name="input_types">
538 <param name="use" value="repeat" />
539 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
540 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
541 </conditional>
542 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
543 <param name="soloCBmatchWLtype" value="1MM_multi_pseudocounts" />
544 <conditional name="params">
545 <param name="chemistry" value="CR3" />
546 </conditional>
547 <param name="soloUMIdedup" value="1MM_All" />
548 </conditional>
549 <section name="solo" >
550 <param name="soloUMIfiltering" value="MultiGeneUMI" />
551 <param name="soloStrand" value="Forward" />
552 <param name="soloFeatures" value="GeneFull" />
553 <conditional name="filter">
554 <param name="filter_type" value="topcells" />
555 <param name="n_cells" value="5" />
556 </conditional>
557 <param name="soloOutFormatFeaturesGeneField3" value="Dummy Text" />
558 </section>
559 <output name="output_barcodes_filtered" >
560 <assert_contents>
561 <!-- first and last line -->
562 <has_line line="AGACGTTCAAGGCTCC" />
563 <has_line line="TCAACGAAGCTAGTGG" />
564 </assert_contents>
565 </output>
566 <output name="output_genes_filtered" >
567 <assert_contents>
568 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" />
569 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" />
570 </assert_contents>
571 </output>
572 <output name="output_matrixGeneFull_filtered" >
573 <assert_contents>
574 <has_line_matching expression="14\s+6\s+14" />
575 <has_line_matching expression="10\s+6\s+1" />
576 </assert_contents>
577 </output>
578 </test>
579 <test expect_num_outputs="6">
580 <!-- Test soloType CB_UMI_Complex -->
581 <conditional name="refGenomeSource">
582 <param name="geneSource" value="history" />
583 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
584 <param name="genomeSAindexNbases" value="4" />
585 <param name="sjdbOverhang" value="100" />
586 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
587 </conditional>
588 <conditional name="sc" >
589 <param name="solo_type" value="CB_UMI_Complex" />
590 <conditional name="input_types">
591 <param name="use" value="repeat" />
592 <param name="input1" value="indrop.R1.fastq.gz" ftype="fastqsanger.gz" />
593 <param name="input2" value="indrop.R2.fastq.gz" ftype="fastqsanger.gz" />
594 </conditional>
595 <repeat name="cb_whitelists" >
596 <param name="whitelist_file" value="indrop.barcodes1.txt"/>
597 <param name="cb_start_anchor" value="0" />
598 <param name="cb_start_anchor_pos" value="0" />
599 <param name="cb_end_anchor" value="2" />
600 <param name="cb_end_anchor_pos" value="-1" />
601 </repeat>
602 <repeat name="cb_whitelists" >
603 <param name="whitelist_file" value="indrop.barcodes2.txt"/>
604 <param name="cb_start_anchor" value="3" />
605 <param name="cb_start_anchor_pos" value="1" />
606 <param name="cb_end_anchor" value="3" />
607 <param name="cb_end_anchor_pos" value="8" />
608 </repeat>
609 <param name="umi_start_anchor" value="3" />
610 <param name="umi_start_anchor_pos" value="9" />
611 <param name="umi_end_anchor" value="3" />
612 <param name="umi_end_anchor_pos" value="14" />
613 <param name="soloAdapterSequence" value="GAGTGATTGCTTGTGACGCCTT" />
614 <param name="soloAdapterMismatchesNmax" value="1" />
615 <param name="soloUMIdedup" value="1MM_All" />
616 <param name="soloCBmatchWLtype" value="1MM" />
617 </conditional>
618 <output name="output_barcodes_filtered" >
619 <assert_contents>
620 <!-- first and last line -->
621 <has_line line="ACAACGTGG_AAACCTCC" />
622 <has_line line="ATTCCAGAC_TTCGCTGG" />
623 </assert_contents>
624 </output>
625 <output name="output_genes_filtered">
626 <assert_contents>
627 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
628 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
629 </assert_contents>
630 </output>
631 <output name="output_matrix_filtered" >
632 <assert_contents>
633 <has_line_matching expression="14\s+33\s+36" />
634 <has_line_matching expression="2\s+33\s+1" />
635 </assert_contents>
636 </output>
637 <output name="output_stats" >
638 <assert_contents>
639 <has_line_matching expression="\s+nExactMatch\s+791" />
640 <has_line_matching expression="\s+nUMIs\s+36" />
641 </assert_contents>
642 </output>
643 </test>
644 <test expect_num_outputs="6">
645 <!-- Test soloType SmartSeq -->
646 <conditional name="refGenomeSource">
647 <param name="geneSource" value="history" />
648 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
649 <param name="genomeSAindexNbases" value="4" />
650 <param name="sjdbOverhang" value="100" />
651 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
652 </conditional>
653 <conditional name="sc" >
654 <param name="solo_type" value="SmartSeq" />
655 <conditional name="input_types_smart_seq">
656 <param name="use" value="list_paired_end" />
657 <param name="paired_end_collection" >
658 <collection type="list:paired">
659 <element name="pair1">
660 <collection type="paired">
661 <element name="forward" value="smartseq1.R1.fastq.gz" ftype="fastqsanger.gz" />
662 <element name="reverse" value="smartseq1.R2.fastq.gz" ftype="fastqsanger.gz" />
663 </collection>
664 </element>
665 <element name="pair2">
666 <collection type="paired">
667 <element name="forward" value="smartseq2.R1.fastq.gz" ftype="fastqsanger.gz" />
668 <element name="reverse" value="smartseq2.R2.fastq.gz" ftype="fastqsanger.gz" />
669 </collection>
670 </element>
671 <element name="pair3">
672 <collection type="paired">
673 <element name="forward" value="smartseq3.R1.fastq.gz" ftype="fastqsanger.gz" />
674 <element name="reverse" value="smartseq3.R2.fastq.gz" ftype="fastqsanger.gz" />
675 </collection>
676 </element>
677 <element name="pair4">
678 <collection type="paired">
679 <element name="forward" value="smartseq4.R1.fastq.gz" ftype="fastqsanger.gz" />
680 <element name="reverse" value="smartseq4.R2.fastq.gz" ftype="fastqsanger.gz" />
681 </collection>
682 </element>
683 <element name="pair5">
684 <collection type="paired">
685 <element name="forward" value="smartseq5.R1.fastq.gz" ftype="fastqsanger.gz" />
686 <element name="reverse" value="smartseq5.R2.fastq.gz" ftype="fastqsanger.gz" />
687 </collection>
688 </element>
689 <element name="pair6">
690 <collection type="paired">
691 <element name="forward" value="smartseq6.R1.fastq.gz" ftype="fastqsanger.gz" />
692 <element name="reverse" value="smartseq6.R2.fastq.gz" ftype="fastqsanger.gz" />
693 </collection>
694 </element>
695 <element name="pair7">
696 <collection type="paired">
697 <element name="forward" value="smartseq7.R1.fastq.gz" ftype="fastqsanger.gz" />
698 <element name="reverse" value="smartseq7.R2.fastq.gz" ftype="fastqsanger.gz" />
699 </collection>
700 </element>
701 <element name="pair8">
702 <collection type="paired">
703 <element name="forward" value="smartseq8.R1.fastq.gz" ftype="fastqsanger.gz" />
704 <element name="reverse" value="smartseq8.R2.fastq.gz" ftype="fastqsanger.gz" />
705 </collection>
706 </element>
707 <element name="pair9">
708 <collection type="paired">
709 <element name="forward" value="smartseq9.R1.fastq.gz" ftype="fastqsanger.gz" />
710 <element name="reverse" value="smartseq9.R2.fastq.gz" ftype="fastqsanger.gz" />
711 </collection>
712 </element>
713 </collection>
714 </param>
715 </conditional>
716 <param name="cell_ids" value="smartseq.cellids.txt" />
717 <param name="soloUMIdedup" value="Exact" />
718 </conditional>
719 <section name="solo" >
720 <param name="soloStrand" value="Unstranded" />
721 <conditional name="filter">
722 <param name="filter_type" value="topcells" />
723 <param name="n_cells" value="2" />
724 </conditional>
725 </section>
726 <output name="output_barcodes_filtered" >
727 <assert_contents>
728 <has_line line="CSC6_D02" />
729 <not_has_text text="MGH26_A02" />
730 </assert_contents>
731 </output>
732 <output name="output_genes_filtered">
733 <assert_contents>
734 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
735 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
736 </assert_contents>
737 </output>
738 <output name="output_matrix_filtered" >
739 <assert_contents>
740 <has_line_matching expression="14\s+3\s+10" />
741 <has_line_matching expression="12\s+3\s+1" />
742 </assert_contents>
743 </output>
744 <output name="output_stats" >
745 <assert_contents>
746 <has_line_matching expression="\s+nExactMatch\s+9000" />
747 <has_line_matching expression="\s+nUMIs\s+32" />
748 </assert_contents>
749 </output>
381 </test> 750 </test>
382 </tests> 751 </tests>
383 <help><![CDATA[ 752 <help><![CDATA[
384 **What it does** 753 **What it does**
385 754