Mercurial > repos > iuc > rna_starsolo
comparison rg_rnaStarSolo.xml @ 9:ec9cbd6b9a49 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 00c545ddbf0f008903f4b4c11d476e6089c3f531"
author | iuc |
---|---|
date | Fri, 15 Jan 2021 17:39:11 +0000 |
parents | 00fbfac99d39 |
children | a6fba3d92531 |
comparison
equal
deleted
inserted
replaced
8:00fbfac99d39 | 9:ec9cbd6b9a49 |
---|---|
15 <command><![CDATA[ | 15 <command><![CDATA[ |
16 @TEMPINDEX@ | 16 @TEMPINDEX@ |
17 STAR | 17 STAR |
18 @REFGENOMEHANDLING@ | 18 @REFGENOMEHANDLING@ |
19 | 19 |
20 --readFilesIn | 20 ## Supports Drop-seq, 10X Chromium, inDrop and Smart-Seq |
21 ## Check that the input pairs are of the same type | 21 --soloType $sc.solo_type |
22 ## otherwise STARsolo will run for a long time and then error out. | 22 |
23 ## We consume either repeats of two inputs R1 + R2 | 23 #if str($sc.solo_type) == "CB_UMI_Simple": |
24 ## or a collection of paired reads. | 24 @READSHANDLING@ |
25 | 25 --soloCBwhitelist '$sc.soloCBwhitelist' |
26 #if str($input_types.use) == "repeat": | |
27 #set $reads1 = [] | |
28 #set $reads2 = [] | |
29 #for $r1, $r2 in zip($input_types.input1, $input_types.input2): | |
30 #assert $r1.datatype == $r2.datatype | |
31 #silent $reads1.append(str($r1)) | |
32 #silent $reads2.append(str($r2)) | |
33 #end for | |
34 #set $reads1 = ','.join($reads1) | |
35 #set $reads2 = ','.join($reads2) | |
36 #elif str($input_types.use) == "list_paired": | |
37 #set $r1 = $input_types.input_collection.forward | |
38 #set $r2 = $input_types.input_collection.reverse | |
39 #set $reads1 = $r1 | |
40 #set $reads2 = $r2 | |
41 #end if | |
42 | |
43 ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1] | |
44 ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs | |
45 $reads2 $reads1 | |
46 | |
47 #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'): | |
48 @FASTQ_GZ_OPTION@ | |
49 #end if | |
50 | |
51 ## Droplet is the only mode available for now | |
52 --soloType Droplet | |
53 | |
54 ## 1 - check length of barcode, 0 - do not check | 26 ## 1 - check length of barcode, 0 - do not check |
55 ## Good for checking custom chemistries | 27 ## Good for checking custom chemistries |
56 --soloCBwhitelist '$soloCBwhitelist' | 28 --soloBarcodeReadLength $sc.soloBarcodeReadLength |
57 --soloBarcodeReadLength $solo.soloBarcodeReadLength | 29 #if str($sc.params.chemistry) == "CR2": |
58 | |
59 #if str($solo.params.chemistry) == "CR2": | |
60 --soloCBstart 1 | 30 --soloCBstart 1 |
61 --soloCBlen 16 | 31 --soloCBlen 16 |
62 --soloUMIstart 17 | 32 --soloUMIstart 17 |
63 --soloUMIlen 10 | 33 --soloUMIlen 10 |
64 #else if str($solo.params.chemistry) == "CR3": | 34 #else if str($sc.params.chemistry) == "CR3": |
65 --soloCBstart 1 | 35 --soloCBstart 1 |
66 --soloCBlen 16 | 36 --soloCBlen 16 |
67 --soloUMIstart 17 | 37 --soloUMIstart 17 |
68 --soloUMIlen 12 | 38 --soloUMIlen 12 |
69 #else if str($solo.params.chemistry) == "custom": | 39 #else if str($sc.params.chemistry) == "custom": |
70 --soloCBstart $solo.params.soloCBstart | 40 --soloCBstart $sc.params.soloCBstart |
71 --soloCBlen $solo.params.soloCBlen | 41 --soloCBlen $sc.params.soloCBlen |
72 --soloUMIstart $solo.params.soloUMIstart | 42 --soloUMIstart $sc.params.soloUMIstart |
73 --soloUMIlen $solo.params.soloUMIlen | 43 --soloUMIlen $sc.params.soloUMIlen |
74 #end if | 44 #end if |
75 | 45 |
46 #elif str($sc.solo_type) == "CB_UMI_Complex": | |
47 @READSHANDLING@ | |
48 ## inDrop supports multiple cell barcodes of varying length | |
49 #set $cb_whitelist = [] | |
50 #set $cb_pos = [] | |
51 #for $cb in $sc.cb_whitelists: | |
52 #silent $cb_whitelist.append(str($cb.whitelist_file)) | |
53 #silent $cb_pos.append('_'.join([str($cb.cb_start_anchor), str($cb.cb_start_anchor_pos),str($cb.cb_end_anchor), str($cb.cb_end_anchor_pos)])) | |
54 #end for | |
55 #set $cb_whitelist = ' '.join($cb_whitelist) | |
56 --soloCBwhitelist $cb_whitelist | |
57 #set $cb_pos = ' '.join($cb_pos) | |
58 --soloCBposition $cb_pos | |
59 #set $umi_pos = '_'.join([str($sc.umi_start_anchor), str($sc.umi_start_anchor_pos), str($sc.umi_end_anchor), str($sc.umi_end_anchor_pos)]) | |
60 --soloUMIposition $umi_pos | |
61 --soloAdapterSequence $sc.soloAdapterSequence | |
62 --soloAdapterMismatchesNmax $sc.soloAdapterMismatchesNmax | |
63 | |
64 #elif str($sc.solo_type) == "SmartSeq": | |
65 ## Create a manifest file with fastq files and their corresponding cell-ids | |
66 ## For Smart-Seq [R1] is followed by [R2] | |
67 --readFilesManifest '$manifest_file' | |
68 #set $read_files_command = "" | |
69 #if str($sc.input_types_smart_seq.use) == "list_single_end": | |
70 #if $sc.input_types_smart_seq.single_end_collection[0].is_of_type('fastq.gz', 'fastqsanger.gz'): | |
71 @FASTQ_GZ_OPTION@ | |
72 #end if | |
73 #elif str($sc.input_types_smart_seq.use) == "list_paired_end": | |
74 #if $sc.input_types_smart_seq.paired_end_collection[0].forward.is_of_type('fastq.gz', 'fastqsanger.gz'): | |
75 @FASTQ_GZ_OPTION@ | |
76 #end if | |
77 #end if | |
78 --soloCBwhitelist None | |
79 #end if | |
80 | |
81 --soloUMIfiltering $solo.soloUMIfiltering | |
76 --soloStrand $solo.soloStrand | 82 --soloStrand $solo.soloStrand |
77 --soloFeatures $solo.soloFeatures | 83 --soloFeatures $solo.soloFeatures |
78 --soloUMIdedup $solo.soloUMIdedup | 84 --soloUMIdedup $sc.soloUMIdedup |
79 --quantMode TranscriptomeSAM | 85 --quantMode TranscriptomeSAM |
80 --outSAMtype BAM Unsorted | 86 --outSAMtype BAM Unsorted |
87 | |
88 #if str($solo.filter.filter_type) == "cellranger2": | |
89 --soloCellFilter CellRanger2.2 $solo.filter.n_expected $solo.filter.max_perc $solo.filter.max_min_ratio | |
90 #else if str($solo.filter.filter_type) == "topcells": | |
91 --soloCellFilter TopCells $solo.filter.n_cells | |
92 #else if str($solo.filter.filter_type) == "no_filter": | |
93 --soloCellFilter None | |
94 #end if | |
95 ## Splice junctions are always under "raw" directory | |
96 | |
97 --soloOutFormatFeaturesGeneField3 '${solo.soloOutFormatFeaturesGeneField3}' | |
98 ## Rename the the selected features directory | |
99 && mv Solo.out/${solo.soloFeatures} Solo.out/soloFeatures | |
100 ## put the barcodes and features stats into a single file | |
101 && cat <(echo "Barcodes:") Solo.out/Barcodes.stats <(echo "Genes:") Solo.out/soloFeatures/Features.stats > '${output_stats}' | |
81 | 102 |
82 ## BAM sorting (logic copied from samtools_sort wrapper) | 103 ## BAM sorting (logic copied from samtools_sort wrapper) |
83 ## choosing BAM SortedByCoord appeared once to give fewer reads | 104 ## choosing BAM SortedByCoord appeared once to give fewer reads |
84 ## than BAM Unsorted followed by a samtools sort | 105 ## than BAM Unsorted followed by a samtools sort |
85 ## so better go with the latter? | 106 ## so better go with the latter? |
90 ##compute the number of memory available to samtools sort (-m) | 111 ##compute the number of memory available to samtools sort (-m) |
91 ##use only 75% of available: https://github.com/samtools/samtools/issues/831 | 112 ##use only 75% of available: https://github.com/samtools/samtools/issues/831 |
92 addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && | 113 addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && |
93 ((addmemory=addmemory*75/100)) && | 114 ((addmemory=addmemory*75/100)) && |
94 samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam | 115 samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam |
95 | |
96 ]]></command> | 116 ]]></command> |
117 <configfiles> | |
118 <configfile name="manifest_file" > | |
119 #if str($sc.solo_type) == "SmartSeq": | |
120 #set $cellids_fh = open(str($sc.cell_ids), 'r') | |
121 #set $cellids = [str(x.strip()) for x in $cellids_fh.readlines()] | |
122 #silent $cellids_fh.close() | |
123 #set $samples = [] | |
124 #if str($sc.input_types_smart_seq.use) == "list_single_end": | |
125 #assert len($cellids) == len($sc.input_types_smart_seq.single_end_collection.keys()) | |
126 #for $i,$r1 in enumerate($sc.input_types_smart_seq.single_end_collection): | |
127 #silent $samples.append('\t'.join([str($r1), '-', 'ID:' + $cellids[$i]])) | |
128 #end for | |
129 #elif str($sc.input_types_smart_seq.use) == "list_paired_end": | |
130 #assert len($cellids) == len($sc.input_types_smart_seq.paired_end_collection.keys()) | |
131 #for $i,($r1,$r2) in enumerate($sc.input_types_smart_seq.paired_end_collection): | |
132 #silent $samples.append('\t'.join([str($r1), str($r2), 'ID:' + $cellids[$i]])) | |
133 #end for | |
134 #end if | |
135 #echo '\n'.join($samples) | |
136 #end if | |
137 </configfile> | |
138 </configfiles> | |
97 <inputs> | 139 <inputs> |
98 <conditional name="input_types" > | |
99 <param name="use" type="select" label="Input Type" > | |
100 <option value="repeat" >Separate barcode and cDNA reads</option> | |
101 <option value="list_paired" >Paired collection of barcode and cDNA reads</option> | |
102 </param> | |
103 <when value="repeat"> | |
104 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true" | |
105 label="RNA-Seq FASTQ/FASTA file, Barcode reads" /> | |
106 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true" | |
107 label="RNA-Seq FASTQ/FASTA file, cDNA reads"/> | |
108 </when> | |
109 <when value="list_paired"> | |
110 <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" /> | |
111 </when> | |
112 </conditional> | |
113 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" /> | |
114 | |
115 <!-- Genome source. --> | 140 <!-- Genome source. --> |
116 <conditional name="refGenomeSource"> | 141 <conditional name="refGenomeSource"> |
117 <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options"> | 142 <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options"> |
118 <option value="indexed" selected="true">Use a built-in index</option> | 143 <option value="indexed" selected="true">Use a built-in index</option> |
119 <option value="history">Use reference genome from history and create temporary index</option> | 144 <option value="history">Use reference genome from history and create temporary index</option> |
138 <when value="history"> | 163 <when value="history"> |
139 <expand macro="ref_selection" /> | 164 <expand macro="ref_selection" /> |
140 <expand macro="@SJDBOPTIONS@" optional="false"/> | 165 <expand macro="@SJDBOPTIONS@" optional="false"/> |
141 </when> | 166 </when> |
142 </conditional> | 167 </conditional> |
143 | 168 <conditional name="sc" > |
169 <param name="solo_type" type="select" label="Type of single-cell RNA-seq" > | |
170 <option value="CB_UMI_Simple">Drop-seq or 10X Chromium</option> | |
171 <option value="CB_UMI_Complex">inDrop</option> | |
172 <option value="SmartSeq">Smart-Seq</option> | |
173 </param> | |
174 <when value="CB_UMI_Simple"> | |
175 <expand macro="input_selection" /> | |
176 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist"/> | |
177 <conditional name="params" > | |
178 <param name="chemistry" type="select" label="Configure Chemistry Options"> | |
179 <option value="CR2" selected="true">Cell Ranger v2</option> | |
180 <option value="CR3">Cell Ranger v3</option> | |
181 <option value="custom">Custom</option> | |
182 </param> | |
183 <when value="CR2" /> | |
184 <when value="CR3" /> | |
185 <when value="custom" > | |
186 <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" /> | |
187 <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" /> | |
188 <param argument="--soloUMIstart" type="integer" min="1" value="17" label="UMI Start Base" /> | |
189 <param argument="--soloUMIlen" type="integer" min="1" value="10" label="UMI Length" /> | |
190 </when> | |
191 </conditional> | |
192 <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." /> | |
193 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs."> | |
194 <expand macro="umidedup_options" /> | |
195 <option value="Exact" >Exact</option> | |
196 </param> | |
197 <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed | |
198 CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes."> | |
199 <expand macro="cb_match_wl_common" /> | |
200 <expand macro="cb_match_wl_cellranger" /> | |
201 </param> | |
202 </when> | |
203 <when value="CB_UMI_Complex"> | |
204 <expand macro="input_selection" /> | |
205 <repeat name="cb_whitelists" title="Cell barcode whitelist information" max="2" > | |
206 <param name="whitelist_file" format="txt,tsv" type="data" label="RNA-Seq Cell Barcode Whitelist"/> | |
207 <param name="cb_start_anchor" type="select" label="Start anchor base for cell barcode"> | |
208 <expand macro="anchor_types" /> | |
209 </param> | |
210 <param name="cb_start_anchor_pos" type="integer" value="0" label="0-based position of the CB start with respect to the anchor base" /> | |
211 <param name="cb_end_anchor" type="select" label="End anchor base for cell barcode"> | |
212 <expand macro="anchor_types" /> | |
213 </param> | |
214 <param name="cb_end_anchor_pos" type="integer" value="0" label="0-based position of the CB end with respect to the anchor base" /> | |
215 </repeat> | |
216 <param name="umi_start_anchor" type="select" label="Start anchor base for UMI"> | |
217 <expand macro="anchor_types" /> | |
218 </param> | |
219 <param name="umi_start_anchor_pos" type="integer" value="0" label="0-based position of the UMI start with respect to the anchor base" /> | |
220 <param name="umi_end_anchor" type="select" label="End anchor base for UMI"> | |
221 <expand macro="anchor_types" /> | |
222 </param> | |
223 <param name="umi_end_anchor_pos" type="integer" value="0" label="0-based position of the UMI end with respect to the anchor base" /> | |
224 <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." /> | |
225 <param argument="--soloAdapterMismatchesNmax" type="integer" min="1" value="1" label="Maximum number of mismatches allowed in adapter sequence" /> | |
226 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs."> | |
227 <expand macro="umidedup_options" /> | |
228 <option value="Exact" >Exact</option> | |
229 </param> | |
230 <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed | |
231 CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes."> | |
232 <expand macro="cb_match_wl_common" /> | |
233 </param> | |
234 </when> | |
235 <when value="SmartSeq"> | |
236 <expand macro="input_selection_smart_seq" /> | |
237 <param name="cell_ids" type="data" label="File containing cell IDs of the samples. One ID per line in order of samples in the above collection."/> | |
238 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs."> | |
239 <option value="Exact" >Exact</option> | |
240 <option value="NoDedup">Do not deduplicate UMIs</option> | |
241 </param> | |
242 </when> | |
243 </conditional> | |
144 <section name="solo" title="Advanced Settings" expanded="true"> | 244 <section name="solo" title="Advanced Settings" expanded="true"> |
145 <conditional name="params"> | |
146 <param name="chemistry" type="select" label="Configure Chemistry Options"> | |
147 <option value="CR2" selected="true">Cell Ranger v2</option> | |
148 <option value="CR3">Cell Ranger v3</option> | |
149 <option value="custom">Custom</option> | |
150 </param> | |
151 <when value="CR2" /> | |
152 <when value="CR3" /> | |
153 <when value="custom" > | |
154 <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" /> | |
155 <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" /> | |
156 <param argument="--soloUMIstart" type="integer" min="1" value="17" label="UMI Start Base" /> | |
157 <param argument="--soloUMIlen" type="integer" min="1" value="10" label="UMI Length" /> | |
158 </when> | |
159 </conditional> | |
160 <param argument="--soloStrand" type="select" label="Strandedness of Library" help="Unstranded has no strand information, Forward has the read strand the same as the original RNA molecule, Reverse has the read strand opposite to the original RNA molecule"> | 245 <param argument="--soloStrand" type="select" label="Strandedness of Library" help="Unstranded has no strand information, Forward has the read strand the same as the original RNA molecule, Reverse has the read strand opposite to the original RNA molecule"> |
161 <option value="Unstranded" /> | 246 <option value="Unstranded" /> |
162 <option value="Forward" selected="true" /> | 247 <option value="Forward" selected="true" /> |
163 <option value="Reverse" /> | 248 <option value="Reverse" /> |
164 </param> | 249 </param> |
165 <param argument="--soloFeatures" type="select" label="Collect UMI counts for these genomic features" > | 250 <param argument="--soloFeatures" type="select" label="Collect UMI counts for these genomic features" > |
166 <option value="Gene" selected="true">Gene: Count reads matching the Gene Transcript</option> | 251 <option value="Gene" selected="true">Gene: Count reads matching the Gene Transcript</option> |
167 <option value="SJ" >Splice Junctions: Count reads at exon-intron junctions</option> | 252 <option value="SJ" >Splice Junctions: Count reads at exon-intron junctions</option> |
168 <option value="GeneFull" >Full: Count all reads overlapping genes' exons and introns</option> | 253 <option value="GeneFull" >Full: Count all reads overlapping genes' exons and introns</option> |
169 </param> | 254 </param> |
170 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, None has UMIs with 1 mismatch distance to others not collapsed"> | 255 <param argument="--soloUMIfiltering" type="select" label="Type of UMI filtering" > |
171 <option value="1MM_All" selected="true">All</option> | 256 <option value="-" selected="true">Remove UMIs with N and homopolymers (similar to CellRanger 2.2.0)</option> |
172 <option value="1MM_Directional" >Directional</option> | 257 <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene ((introduced in CellRanger 3.x.x)</option> |
173 <option value="1MM_NotCollapsed" >None</option> | |
174 </param> | 258 </param> |
175 <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." /> | 259 <conditional name="filter" > |
260 <param name="filter_type" type="select" label="Cell filtering type and parameters" > | |
261 <option value="cellranger2" selected="true" >Simple filtering of CellRanger v2</option> | |
262 <option value="topcells" >Filter top N cells</option> | |
263 <option value="no_filter" >Do not filter</option> | |
264 </param> | |
265 <when value="cellranger2" > | |
266 <param name="n_expected" type="integer" min="1" value="3000" label="Number of expected cells" /> | |
267 <param name="max_perc" type="float" min="0" max="1" value="0.99" label="Robust maximum percentile for UMI count" /> | |
268 <param name="max_min_ratio" type="float" min="1" value="10" label="Maximum to minimum ratio for UMI count" /> | |
269 </when> | |
270 <when value="topcells" > | |
271 <param name="n_cells" type="integer" min="1" value="3000" label="Number of top cells to report sorted by UMI count" /> | |
272 </when> | |
273 <when value="no_filter" /> | |
274 </conditional> | |
275 <param argument="--soloOutFormatFeaturesGeneField3" type="text" value="Gene Expression" label="Field 3 in the Genes output." help="Input '-' to remove the 3rd column from the output." /> | |
176 </section> | 276 </section> |
177 </inputs> | 277 </inputs> |
178 <outputs> | 278 <outputs> |
179 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out"> | 279 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out"> |
180 <expand macro="dbKeyActions" /> | 280 <expand macro="dbKeyActions" /> |
181 </data> | 281 </data> |
182 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" | 282 <!-- |
183 from_work_dir="Solo.out/Gene/filtered/features.tsv" /> | 283 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" /> |
184 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" | 284 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" /> |
185 from_work_dir="Solo.out/Gene/filtered/barcodes.tsv" /> | 285 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" > |
186 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" | |
187 from_work_dir="Solo.out/Gene/filtered/matrix.mtx" > | |
188 <filter>solo['soloFeatures'] == "Gene" </filter> | |
189 <expand macro="dbKeyActions" /> | 286 <expand macro="dbKeyActions" /> |
190 </data> | 287 </data> |
288 --> | |
289 <!-- soloCellFilter set to None, if SJ is selected for soloFeatures --> | |
290 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes raw" | |
291 from_work_dir="Solo.out/soloFeatures/raw/features.tsv" > | |
292 <filter>solo['filter']['filter_type'] == "no_filter" or solo['soloFeatures'] == "SJ" </filter> | |
293 </data> | |
294 <data format="tsv" name="output_genes_filtered" label="${tool.name} on ${on_string}: Genes filtered" | |
295 from_work_dir="Solo.out/soloFeatures/filtered/features.tsv" > | |
296 <filter>solo['filter']['filter_type'] != "no_filter" and solo['soloFeatures'] != "SJ" </filter> | |
297 </data> | |
298 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes raw" | |
299 from_work_dir="Solo.out/soloFeatures/raw/barcodes.tsv" > | |
300 <filter>solo['filter']['filter_type'] == "no_filter" or solo['soloFeatures'] == "SJ" </filter> | |
301 </data> | |
302 <data format="tsv" name="output_barcodes_filtered" label="${tool.name} on ${on_string}: Barcodes filtered" | |
303 from_work_dir="Solo.out/soloFeatures/filtered/barcodes.tsv" > | |
304 <filter>solo['filter']['filter_type'] != "no_filter" and solo['soloFeatures'] != "SJ" </filter> | |
305 </data> | |
306 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts raw" | |
307 from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" > | |
308 <filter>solo['soloFeatures'] == "Gene" and solo['filter']['filter_type'] == "no_filter" </filter> | |
309 <expand macro="dbKeyActions" /> | |
310 </data> | |
311 <data format="mtx" name="output_matrix_filtered" label="${tool.name} on ${on_string}: Matrix Gene Counts filtered" | |
312 from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" > | |
313 <filter>solo['soloFeatures'] == "Gene" and solo['filter']['filter_type'] != "no_filter" </filter> | |
314 <expand macro="dbKeyActions" /> | |
315 </data> | |
191 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts" | 316 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts" |
192 from_work_dir="Solo.out/Gene/filtered/matrixSJ.mtx" > | 317 from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" > |
193 <filter>solo['soloFeatures'] == "SJ" </filter> | 318 <filter>solo['soloFeatures'] == "SJ" </filter> |
194 <expand macro="dbKeyActions" /> | 319 <expand macro="dbKeyActions" /> |
195 </data> | 320 </data> |
196 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts" | 321 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts raw" |
197 from_work_dir="Solo.out/Gene/filtered/matrixGeneFull.mtx" > | 322 from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" > |
198 <filter>solo['soloFeatures'] == "GeneFull" </filter> | 323 <filter>solo['soloFeatures'] == "GeneFull" and solo['filter']['filter_type'] == "no_filter" </filter> |
324 <expand macro="dbKeyActions" /> | |
325 </data> | |
326 <data format="mtx" name="output_matrixGeneFull_filtered" label="${tool.name} on ${on_string}: Matrix Full Gene Counts filtered" | |
327 from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" > | |
328 <filter>solo['soloFeatures'] == "GeneFull" and solo['filter']['filter_type'] != "no_filter" </filter> | |
199 <expand macro="dbKeyActions" /> | 329 <expand macro="dbKeyActions" /> |
200 </data> | 330 </data> |
201 <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" > | 331 <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" > |
202 <expand macro="dbKeyActions" /> | 332 <expand macro="dbKeyActions" /> |
203 </data> | 333 </data> |
204 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Feature Statistic Summaries" | 334 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Barcode/Feature Statistic Summaries"/> |
205 from_work_dir="Solo.out/Gene/Features.stats" /> | |
206 </outputs> | 335 </outputs> |
207 <!-- Generating test data that is big enough for STARsolo to detect and small enough | 336 <!-- Generating test data that is big enough for STARsolo to detect and small enough |
208 for Galaxy to test requires careful modification of input FASTA and GTF data, | 337 for Galaxy to test requires careful modification of input FASTA and GTF data, |
209 where the length of FASTA cannot exceed the largest position in the GTF file, | 338 where the length of FASTA cannot exceed the largest position in the GTF file, |
210 regardless of the FASTA starting sequence position. | 339 regardless of the FASTA starting sequence position. |
212 A full writeup of how to subset single cell data for use in STARsolo is given | 341 A full writeup of how to subset single cell data for use in STARsolo is given |
213 here: https://gist.github.com/mtekman/149a7c52fd73e5d8ebe49f5a27b0743d | 342 here: https://gist.github.com/mtekman/149a7c52fd73e5d8ebe49f5a27b0743d |
214 --> | 343 --> |
215 <tests> | 344 <tests> |
216 <test expect_num_outputs="6"> | 345 <test expect_num_outputs="6"> |
217 <conditional name="input_types"> | |
218 <param name="use" value="repeat" /> | |
219 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
220 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
221 </conditional> | |
222 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
223 <conditional name="refGenomeSource"> | 346 <conditional name="refGenomeSource"> |
224 <param name="geneSource" value="history" /> | 347 <param name="geneSource" value="history" /> |
225 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> | 348 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> |
226 <param name="genomeSAindexNbases" value="4" /> | 349 <param name="genomeSAindexNbases" value="4" /> |
227 <param name="sjdbOverhang" value="100" /> | 350 <param name="sjdbOverhang" value="100" /> |
228 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> | 351 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> |
229 </conditional> | 352 </conditional> |
230 <section name="solo" > | 353 <conditional name="sc" > |
354 <param name="solo_type" value="CB_UMI_Simple" /> | |
355 <conditional name="input_types"> | |
356 <param name="use" value="repeat" /> | |
357 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
358 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
359 </conditional> | |
360 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
231 <conditional name="params"> | 361 <conditional name="params"> |
232 <param name="chemistry" value="CR3" /> | 362 <param name="chemistry" value="CR3" /> |
233 </conditional> | 363 </conditional> |
364 <param name="soloUMIdedup" value="1MM_All" /> | |
365 </conditional> | |
366 <section name="solo" > | |
367 <conditional name="filter"> | |
368 <param name="filter_type" value="no_filter" /> | |
369 </conditional> | |
234 <param name="soloStrand" value="Forward" /> | 370 <param name="soloStrand" value="Forward" /> |
235 <param name="soloFeatures" value="Gene" /> | 371 <param name="soloFeatures" value="Gene" /> |
236 <param name="soloUMIdedup" value="1MM_All" /> | |
237 </section> | 372 </section> |
238 <output name="output_barcodes" > | 373 <output name="output_barcodes" > |
239 <assert_contents> | 374 <assert_contents> |
240 <!-- first and last line --> | 375 <!-- first and last line --> |
241 <has_line line="ACACCGGTCTAACGGT" /> | 376 <has_line line="AAACCTGAGCGCTCCA" /> |
242 <has_line line="TTCTCAATCCACGTTC" /> | 377 <has_line line="TTTGGTTAGTGGGCTA" /> |
243 </assert_contents> | 378 </assert_contents> |
244 </output> | 379 </output> |
245 <output name="output_genes"> | 380 <output name="output_genes"> |
246 <assert_contents> | 381 <assert_contents> |
247 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> | 382 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> |
248 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> | 383 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> |
249 </assert_contents> | 384 </assert_contents> |
250 </output> | 385 </output> |
251 <output name="output_matrix" > | 386 <output name="output_matrix" > |
252 <assert_contents> | 387 <assert_contents> |
253 <has_line_matching expression="14\s+7\s+7" /> | 388 <has_line_matching expression="14\s+394\s+7" /> |
254 <has_line_matching expression="4\s+7\s+1" /> | 389 <has_line_matching expression="4\s+381\s+1" /> |
255 </assert_contents> | 390 </assert_contents> |
256 </output> | 391 </output> |
257 <output name="output_stats" > | 392 <output name="output_stats" > |
258 <assert_contents> | 393 <assert_contents> |
259 <has_line_matching expression="\s+nUnmapped\s+5823" /> | 394 <has_line_matching expression="\s+nUnmapped\s+5823" /> |
261 </assert_contents> | 396 </assert_contents> |
262 </output> | 397 </output> |
263 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> | 398 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> |
264 </test> | 399 </test> |
265 <test expect_num_outputs="6"><!-- same as above, but using custom --> | 400 <test expect_num_outputs="6"><!-- same as above, but using custom --> |
266 <conditional name="input_types"> | |
267 <param name="use" value="repeat" /> | |
268 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
269 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
270 </conditional> | |
271 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
272 <conditional name="refGenomeSource"> | 401 <conditional name="refGenomeSource"> |
273 <param name="geneSource" value="history" /> | 402 <param name="geneSource" value="history" /> |
274 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> | 403 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> |
275 <param name="genomeSAindexNbases" value="4" /> | 404 <param name="genomeSAindexNbases" value="4" /> |
276 <param name="sjdbOverhang" value="100" /> | 405 <param name="sjdbOverhang" value="100" /> |
277 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> | 406 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> |
278 </conditional> | 407 </conditional> |
279 <section name="solo" > | 408 <conditional name="sc" > |
409 <param name="solo_type" value="CB_UMI_Simple" /> | |
410 <conditional name="input_types"> | |
411 <param name="use" value="repeat" /> | |
412 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
413 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
414 </conditional> | |
415 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
280 <conditional name="params"> | 416 <conditional name="params"> |
281 <param name="chemistry" value="custom" /> | 417 <param name="chemistry" value="custom" /> |
282 <param name="soloCBstart" value="1" /> | 418 <param name="soloCBstart" value="1" /> |
283 <param name="soloCBlen" value="16" /> | 419 <param name="soloCBlen" value="16" /> |
284 <param name="soloUMIstart" value="17" /> | 420 <param name="soloUMIstart" value="17" /> |
285 <param name="soloUMIlen" value="12" /> | 421 <param name="soloUMIlen" value="12" /> |
286 </conditional> | 422 </conditional> |
423 <param name="soloUMIdedup" value="1MM_All" /> | |
424 </conditional> | |
425 <section name="solo" > | |
287 <param name="soloStrand" value="Forward" /> | 426 <param name="soloStrand" value="Forward" /> |
288 <param name="soloFeatures" value="Gene" /> | 427 <param name="soloFeatures" value="Gene" /> |
289 <param name="soloUMIdedup" value="1MM_All" /> | |
290 </section> | 428 </section> |
291 <output name="output_barcodes" > | 429 <output name="output_barcodes_filtered" > |
292 <assert_contents> | 430 <assert_contents> |
293 <has_line line="ACACCGGTCTAACGGT" /> | 431 <has_line line="ACACCGGTCTAACGGT" /> |
294 <has_line line="TTCTCAATCCACGTTC" /> | 432 <has_line line="TTCTCAATCCACGTTC" /> |
295 </assert_contents> | 433 </assert_contents> |
296 </output> | 434 </output> |
297 <output name="output_genes"> | 435 <output name="output_genes_filtered"> |
298 <assert_contents> | 436 <assert_contents> |
299 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> | 437 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> |
300 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> | 438 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> |
301 </assert_contents> | 439 </assert_contents> |
302 </output> | 440 </output> |
303 <output name="output_matrix" > | 441 <output name="output_matrix_filtered" > |
304 <assert_contents> | 442 <assert_contents> |
305 <has_line_matching expression="14\s+7\s+7" /> | 443 <has_line_matching expression="14\s+7\s+7" /> |
306 <has_line_matching expression="4\s+7\s+1" /> | 444 <has_line_matching expression="4\s+7\s+1" /> |
307 </assert_contents> | 445 </assert_contents> |
308 </output> | 446 </output> |
313 </assert_contents> | 451 </assert_contents> |
314 </output> | 452 </output> |
315 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> | 453 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> |
316 </test> | 454 </test> |
317 <test expect_num_outputs="6"><!-- Multiple repeats test --> | 455 <test expect_num_outputs="6"><!-- Multiple repeats test --> |
318 <conditional name="input_types"> | |
319 <param name="use" value="repeat" /> | |
320 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
321 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
322 </conditional> | |
323 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
324 <conditional name="refGenomeSource"> | 456 <conditional name="refGenomeSource"> |
325 <param name="geneSource" value="history" /> | 457 <param name="geneSource" value="history" /> |
326 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> | 458 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> |
327 <param name="genomeSAindexNbases" value="4" /> | 459 <param name="genomeSAindexNbases" value="4" /> |
328 <param name="sjdbOverhang" value="100" /> | 460 <param name="sjdbOverhang" value="100" /> |
329 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> | 461 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> |
330 </conditional> | 462 </conditional> |
331 <section name="solo" > | 463 <conditional name="sc" > |
464 <param name="solo_type" value="CB_UMI_Simple" /> | |
465 <conditional name="input_types"> | |
466 <param name="use" value="repeat" /> | |
467 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
468 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
469 </conditional> | |
470 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
332 <conditional name="params"> | 471 <conditional name="params"> |
333 <param name="chemistry" value="CR3" /> | 472 <param name="chemistry" value="CR3" /> |
334 </conditional> | 473 </conditional> |
474 <param name="soloUMIdedup" value="1MM_All" /> | |
475 </conditional> | |
476 <section name="solo" > | |
335 <param name="soloStrand" value="Forward" /> | 477 <param name="soloStrand" value="Forward" /> |
336 <param name="soloFeatures" value="Gene" /> | 478 <param name="soloFeatures" value="Gene" /> |
337 <param name="soloUMIdedup" value="1MM_All" /> | |
338 </section> | 479 </section> |
339 <output name="output_barcodes" > | 480 <output name="output_barcodes_filtered" > |
340 <assert_contents> | 481 <assert_contents> |
341 <has_line line="ACACCGGTCTAACGGT" /> | 482 <has_line line="ACACCGGTCTAACGGT" /> |
342 <has_line line="TTCTCAATCCACGTTC" /> | 483 <has_line line="TTCTCAATCCACGTTC" /> |
343 </assert_contents> | 484 </assert_contents> |
344 </output> | 485 </output> |
345 <!-- BAM output is huge, we don't need to test here --> | 486 <!-- BAM output is huge, we don't need to test here --> |
346 </test> | 487 </test> |
347 <test expect_num_outputs="6"> | 488 <test expect_num_outputs="6"> |
348 <!-- Test with paired collection --> | 489 <!-- Test with paired collection --> |
349 <conditional name="input_types"> | |
350 <param name="use" value="list_paired" /> | |
351 <param name="input_collection" > | |
352 <collection type="paired"> | |
353 <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
354 <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
355 </collection> | |
356 </param> | |
357 </conditional> | |
358 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
359 <conditional name="refGenomeSource"> | 490 <conditional name="refGenomeSource"> |
360 <param name="geneSource" value="history" /> | 491 <param name="geneSource" value="history" /> |
361 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> | 492 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> |
362 <param name="genomeSAindexNbases" value="4" /> | 493 <param name="genomeSAindexNbases" value="4" /> |
363 <param name="sjdbOverhang" value="100" /> | 494 <param name="sjdbOverhang" value="100" /> |
364 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> | 495 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> |
365 </conditional> | 496 </conditional> |
366 <section name="solo" > | 497 <conditional name="sc" > |
498 <param name="solo_type" value="CB_UMI_Simple" /> | |
499 <conditional name="input_types"> | |
500 <param name="use" value="list_paired" /> | |
501 <param name="input_collection" > | |
502 <collection type="paired"> | |
503 <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
504 <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
505 </collection> | |
506 </param> | |
507 </conditional> | |
508 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
367 <conditional name="params"> | 509 <conditional name="params"> |
368 <param name="chemistry" value="CR3" /> | 510 <param name="chemistry" value="CR3" /> |
369 </conditional> | 511 </conditional> |
512 <param name="soloUMIdedup" value="1MM_All" /> | |
513 </conditional> | |
514 <section name="solo" > | |
370 <param name="soloStrand" value="Forward" /> | 515 <param name="soloStrand" value="Forward" /> |
371 <param name="soloFeatures" value="Gene" /> | 516 <param name="soloFeatures" value="Gene" /> |
372 <param name="soloUMIdedup" value="1MM_All" /> | |
373 </section> | 517 </section> |
374 <output name="output_barcodes" > | 518 <output name="output_barcodes_filtered" > |
375 <assert_contents> | 519 <assert_contents> |
376 <has_line line="ACACCGGTCTAACGGT" /> | 520 <has_line line="ACACCGGTCTAACGGT" /> |
377 <has_line line="TTCTCAATCCACGTTC" /> | 521 <has_line line="TTCTCAATCCACGTTC" /> |
378 </assert_contents> | 522 </assert_contents> |
379 </output> | 523 </output> |
380 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> | 524 <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> |
525 </test> | |
526 <test expect_num_outputs="6"> | |
527 <!-- Test soloFeatures, soloCBmatchWLtype, soloCellFilter, soloOutFormatFeaturesGeneField3, soloUMIfiltering --> | |
528 <conditional name="refGenomeSource"> | |
529 <param name="geneSource" value="history" /> | |
530 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> | |
531 <param name="genomeSAindexNbases" value="4" /> | |
532 <param name="sjdbOverhang" value="100" /> | |
533 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> | |
534 </conditional> | |
535 <conditional name="sc" > | |
536 <param name="solo_type" value="CB_UMI_Simple" /> | |
537 <conditional name="input_types"> | |
538 <param name="use" value="repeat" /> | |
539 <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
540 <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> | |
541 </conditional> | |
542 <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> | |
543 <param name="soloCBmatchWLtype" value="1MM_multi_pseudocounts" /> | |
544 <conditional name="params"> | |
545 <param name="chemistry" value="CR3" /> | |
546 </conditional> | |
547 <param name="soloUMIdedup" value="1MM_All" /> | |
548 </conditional> | |
549 <section name="solo" > | |
550 <param name="soloUMIfiltering" value="MultiGeneUMI" /> | |
551 <param name="soloStrand" value="Forward" /> | |
552 <param name="soloFeatures" value="GeneFull" /> | |
553 <conditional name="filter"> | |
554 <param name="filter_type" value="topcells" /> | |
555 <param name="n_cells" value="5" /> | |
556 </conditional> | |
557 <param name="soloOutFormatFeaturesGeneField3" value="Dummy Text" /> | |
558 </section> | |
559 <output name="output_barcodes_filtered" > | |
560 <assert_contents> | |
561 <!-- first and last line --> | |
562 <has_line line="AGACGTTCAAGGCTCC" /> | |
563 <has_line line="TCAACGAAGCTAGTGG" /> | |
564 </assert_contents> | |
565 </output> | |
566 <output name="output_genes_filtered" > | |
567 <assert_contents> | |
568 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" /> | |
569 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" /> | |
570 </assert_contents> | |
571 </output> | |
572 <output name="output_matrixGeneFull_filtered" > | |
573 <assert_contents> | |
574 <has_line_matching expression="14\s+6\s+14" /> | |
575 <has_line_matching expression="10\s+6\s+1" /> | |
576 </assert_contents> | |
577 </output> | |
578 </test> | |
579 <test expect_num_outputs="6"> | |
580 <!-- Test soloType CB_UMI_Complex --> | |
581 <conditional name="refGenomeSource"> | |
582 <param name="geneSource" value="history" /> | |
583 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> | |
584 <param name="genomeSAindexNbases" value="4" /> | |
585 <param name="sjdbOverhang" value="100" /> | |
586 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> | |
587 </conditional> | |
588 <conditional name="sc" > | |
589 <param name="solo_type" value="CB_UMI_Complex" /> | |
590 <conditional name="input_types"> | |
591 <param name="use" value="repeat" /> | |
592 <param name="input1" value="indrop.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
593 <param name="input2" value="indrop.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
594 </conditional> | |
595 <repeat name="cb_whitelists" > | |
596 <param name="whitelist_file" value="indrop.barcodes1.txt"/> | |
597 <param name="cb_start_anchor" value="0" /> | |
598 <param name="cb_start_anchor_pos" value="0" /> | |
599 <param name="cb_end_anchor" value="2" /> | |
600 <param name="cb_end_anchor_pos" value="-1" /> | |
601 </repeat> | |
602 <repeat name="cb_whitelists" > | |
603 <param name="whitelist_file" value="indrop.barcodes2.txt"/> | |
604 <param name="cb_start_anchor" value="3" /> | |
605 <param name="cb_start_anchor_pos" value="1" /> | |
606 <param name="cb_end_anchor" value="3" /> | |
607 <param name="cb_end_anchor_pos" value="8" /> | |
608 </repeat> | |
609 <param name="umi_start_anchor" value="3" /> | |
610 <param name="umi_start_anchor_pos" value="9" /> | |
611 <param name="umi_end_anchor" value="3" /> | |
612 <param name="umi_end_anchor_pos" value="14" /> | |
613 <param name="soloAdapterSequence" value="GAGTGATTGCTTGTGACGCCTT" /> | |
614 <param name="soloAdapterMismatchesNmax" value="1" /> | |
615 <param name="soloUMIdedup" value="1MM_All" /> | |
616 <param name="soloCBmatchWLtype" value="1MM" /> | |
617 </conditional> | |
618 <output name="output_barcodes_filtered" > | |
619 <assert_contents> | |
620 <!-- first and last line --> | |
621 <has_line line="ACAACGTGG_AAACCTCC" /> | |
622 <has_line line="ATTCCAGAC_TTCGCTGG" /> | |
623 </assert_contents> | |
624 </output> | |
625 <output name="output_genes_filtered"> | |
626 <assert_contents> | |
627 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> | |
628 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> | |
629 </assert_contents> | |
630 </output> | |
631 <output name="output_matrix_filtered" > | |
632 <assert_contents> | |
633 <has_line_matching expression="14\s+33\s+36" /> | |
634 <has_line_matching expression="2\s+33\s+1" /> | |
635 </assert_contents> | |
636 </output> | |
637 <output name="output_stats" > | |
638 <assert_contents> | |
639 <has_line_matching expression="\s+nExactMatch\s+791" /> | |
640 <has_line_matching expression="\s+nUMIs\s+36" /> | |
641 </assert_contents> | |
642 </output> | |
643 </test> | |
644 <test expect_num_outputs="6"> | |
645 <!-- Test soloType SmartSeq --> | |
646 <conditional name="refGenomeSource"> | |
647 <param name="geneSource" value="history" /> | |
648 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> | |
649 <param name="genomeSAindexNbases" value="4" /> | |
650 <param name="sjdbOverhang" value="100" /> | |
651 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> | |
652 </conditional> | |
653 <conditional name="sc" > | |
654 <param name="solo_type" value="SmartSeq" /> | |
655 <conditional name="input_types_smart_seq"> | |
656 <param name="use" value="list_paired_end" /> | |
657 <param name="paired_end_collection" > | |
658 <collection type="list:paired"> | |
659 <element name="pair1"> | |
660 <collection type="paired"> | |
661 <element name="forward" value="smartseq1.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
662 <element name="reverse" value="smartseq1.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
663 </collection> | |
664 </element> | |
665 <element name="pair2"> | |
666 <collection type="paired"> | |
667 <element name="forward" value="smartseq2.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
668 <element name="reverse" value="smartseq2.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
669 </collection> | |
670 </element> | |
671 <element name="pair3"> | |
672 <collection type="paired"> | |
673 <element name="forward" value="smartseq3.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
674 <element name="reverse" value="smartseq3.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
675 </collection> | |
676 </element> | |
677 <element name="pair4"> | |
678 <collection type="paired"> | |
679 <element name="forward" value="smartseq4.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
680 <element name="reverse" value="smartseq4.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
681 </collection> | |
682 </element> | |
683 <element name="pair5"> | |
684 <collection type="paired"> | |
685 <element name="forward" value="smartseq5.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
686 <element name="reverse" value="smartseq5.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
687 </collection> | |
688 </element> | |
689 <element name="pair6"> | |
690 <collection type="paired"> | |
691 <element name="forward" value="smartseq6.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
692 <element name="reverse" value="smartseq6.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
693 </collection> | |
694 </element> | |
695 <element name="pair7"> | |
696 <collection type="paired"> | |
697 <element name="forward" value="smartseq7.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
698 <element name="reverse" value="smartseq7.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
699 </collection> | |
700 </element> | |
701 <element name="pair8"> | |
702 <collection type="paired"> | |
703 <element name="forward" value="smartseq8.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
704 <element name="reverse" value="smartseq8.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
705 </collection> | |
706 </element> | |
707 <element name="pair9"> | |
708 <collection type="paired"> | |
709 <element name="forward" value="smartseq9.R1.fastq.gz" ftype="fastqsanger.gz" /> | |
710 <element name="reverse" value="smartseq9.R2.fastq.gz" ftype="fastqsanger.gz" /> | |
711 </collection> | |
712 </element> | |
713 </collection> | |
714 </param> | |
715 </conditional> | |
716 <param name="cell_ids" value="smartseq.cellids.txt" /> | |
717 <param name="soloUMIdedup" value="Exact" /> | |
718 </conditional> | |
719 <section name="solo" > | |
720 <param name="soloStrand" value="Unstranded" /> | |
721 <conditional name="filter"> | |
722 <param name="filter_type" value="topcells" /> | |
723 <param name="n_cells" value="2" /> | |
724 </conditional> | |
725 </section> | |
726 <output name="output_barcodes_filtered" > | |
727 <assert_contents> | |
728 <has_line line="CSC6_D02" /> | |
729 <not_has_text text="MGH26_A02" /> | |
730 </assert_contents> | |
731 </output> | |
732 <output name="output_genes_filtered"> | |
733 <assert_contents> | |
734 <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> | |
735 <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> | |
736 </assert_contents> | |
737 </output> | |
738 <output name="output_matrix_filtered" > | |
739 <assert_contents> | |
740 <has_line_matching expression="14\s+3\s+10" /> | |
741 <has_line_matching expression="12\s+3\s+1" /> | |
742 </assert_contents> | |
743 </output> | |
744 <output name="output_stats" > | |
745 <assert_contents> | |
746 <has_line_matching expression="\s+nExactMatch\s+9000" /> | |
747 <has_line_matching expression="\s+nUMIs\s+32" /> | |
748 </assert_contents> | |
749 </output> | |
381 </test> | 750 </test> |
382 </tests> | 751 </tests> |
383 <help><![CDATA[ | 752 <help><![CDATA[ |
384 **What it does** | 753 **What it does** |
385 | 754 |