Mercurial > repos > wolma > mimodd_aln
comparison snap_caller.xml @ 0:d801b0675eb5 draft
planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit b36048cd608ede0ec6f6559648525c9350caae34-dirty
author | wolma |
---|---|
date | Sat, 11 Nov 2017 18:18:54 -0500 |
parents | |
children | e76e813f615a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d801b0675eb5 |
---|---|
1 <tool id="mimodd_align" name="MiModD Read Alignment" version="@MIMODD_WRAPPER_VERSION@"> | |
2 <description>maps sequence reads to a reference genome using SNAP</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 <macro name="require_metadata"> | |
6 <param name="header" type="data" format="sam" | |
7 label="metadata source for this sample" /> | |
8 </macro> | |
9 <macro name="sam_bam_selector" token_format="sam"> | |
10 <param name="ifile" type="data" format="@FORMAT@" | |
11 label="input file"/> | |
12 <param name="header" type="data" format="sam" optional="true" | |
13 label="(optional) metadata source for this sample" | |
14 help="a SAM format dataset providing information about the sequences in the input data in its header; do NOT provide this dataset if the information is already part of your input dataset unless you want to have the original metadata overwritten. If needed, a metadata source dataset can be generated with the MiModD Run Annotation tool." /> | |
15 </macro> | |
16 </macros> | |
17 <expand macro="requirements" /> | |
18 <expand macro="stdio" /> | |
19 <expand macro="version_command" /> | |
20 <command><![CDATA[ | |
21 ## Currently Galaxy does not autoconvert collections of fastq.gz files. | |
22 ## This tool wrapper fixes that by allowing fastq and fastq.gz as input | |
23 ## collection formats. | |
24 ## gz_input is then used as flag to indicate a fastq.gz input file | |
25 #set gz_input = False | |
26 | |
27 mimodd snap-batch -s | |
28 #if str($reference.source) == "cached": | |
29 #set ref_genome = $reference.genome.fields.path | |
30 #else: | |
31 #set ref_genome = $reference.genome | |
32 #end if | |
33 #for $i in $datasets | |
34 "snap ${i.mode_choose.mode} '$ref_genome' | |
35 #if $str($i.mode_choose.mode) == "paired" and $str($i.mode_choose.input.iformat) == "fastq": | |
36 #if $str($i.mode_choose.input.pe_source.type) == 'collection': | |
37 ## PE input provided as a paired collection - if the forward | |
38 ## dataset is gzipped we assume the reverse dataset is too. | |
39 '${i.mode_choose.input.pe_source.input_data.forward}' | |
40 '${i.mode_choose.input.pe_source.input_data.reverse}' | |
41 #if $i.mode_choose.input.pe_source.input_data.forward.is_of_type('fastq.gz'): | |
42 #set gz_input = True | |
43 #end if | |
44 #else | |
45 ## PE input provided as separate fastq datasets | |
46 '${i.mode_choose.input.pe_source.ifile1}' | |
47 '${i.mode_choose.input.pe_source.ifile2}' | |
48 #end if | |
49 #else: | |
50 ## Input is either SE data or not in fastq format => | |
51 ## only one input dataset | |
52 '${i.mode_choose.input.ifile}' | |
53 #end if | |
54 #if $gz_input: | |
55 ## a gzipped fastq input dataset was encountered | |
56 --iformat gz | |
57 #else | |
58 --iformat ${i.mode_choose.input.iformat} | |
59 #end if | |
60 --ofile '$ofile' --oformat ${output_options.oformat} | |
61 ${output_options.sort} ${output_options.explicit_mmatch_notation} | |
62 --idx-seedsize $indexing.seedsize | |
63 --idx-slack $indexing.slack | |
64 --idx-overflow $indexing.overflow | |
65 #set $aln_spec = $i.mode_choose.aln_options | |
66 #if $str($i.mode_choose.mode) == "paired": | |
67 #set $aln_global = $alignment.paired | |
68 #set $treat_overlaps = $aln_spec.discard_overlapping_mates or $aln_global.discard_overlapping_mates | |
69 --spacing #if $aln_spec.sp_min then $aln_spec.sp_min else $aln_global.sp_min | |
70 #if $aln_spec.sp_max then $aln_spec.sp_max else $aln_global.sp_max | |
71 #else | |
72 #set $aln_global = $alignment.single | |
73 #set $treat_overlaps = "" | |
74 #end if | |
75 --maxseeds #if $aln_spec.maxseeds then $aln_spec.maxseeds else $aln_global.maxseeds | |
76 --maxhits #if $aln_spec.maxhits then $aln_spec.maxhits else $aln_global.maxhits | |
77 --clipping #if $aln_spec.clipping then $aln_spec.clipping else $aln_global.clipping | |
78 --maxdist #if $aln_spec.maxdist then $aln_spec.maxdist else $aln_global.maxdist | |
79 --confdiff #if $aln_spec.confdiff then $aln_spec.confdiff else $aln_global.confdiff | |
80 --confadapt #if $aln_spec.confadpt then $aln_spec.confadpt else $aln_global.confadpt | |
81 #if $i.mode_choose.input.header: | |
82 --header '${i.mode_choose.input.header}' | |
83 #end if | |
84 --selectivity $output_options.selectivity | |
85 #if $str($output_options.filter_output) != "off": | |
86 --filter-output $output_options.filter_output | |
87 #end if | |
88 #if $treat_overlaps: | |
89 --discard-overlapping-mates | |
90 ## remove ',' (and possibly adjacent whitespace) and replace with ' ' | |
91 '#echo ("' '".join($treat_overlaps.replace(" ", "").split(',')))#' | |
92 #end if | |
93 --verbose" | |
94 #end for | |
95 ]]></command> | |
96 | |
97 <inputs> | |
98 <conditional name="reference"> | |
99 <param name="source" type="select" | |
100 label="Will you select a reference genome from your history or use a built-in genome?"> | |
101 <option value="cached">Use a built-in genome</option> | |
102 <option value="history">Use a genome from my history</option> | |
103 </param> | |
104 <when value="cached"> | |
105 <param name="genome" type="select" | |
106 label="reference genome" | |
107 help="The fasta reference genome that SNAP should align reads against."> | |
108 <options from_data_table="all_fasta" /> | |
109 </param> | |
110 </when> | |
111 <when value="history"> | |
112 <param name="genome" type="data" format="fasta" | |
113 label="reference genome" | |
114 help="The fasta reference genome that SNAP should align reads against."/> | |
115 </when> | |
116 </conditional> | |
117 <section name="indexing" title="Parameters affecting reference genome indexing" expanded="false"> | |
118 <param name="seedsize" type="integer" value="20" | |
119 label="seed size (default: 20)" | |
120 help="Length of the seeds used in the reference genome hash table (SNAP index option -s)."/> | |
121 <param name="slack" type="float" value="0.3" | |
122 label="hash table slack size (default: 0.3)" | |
123 help="Corresponds to the -h option of SNAP index."/> | |
124 <param name="overflow" type="integer" min="1" max="1000" value="40" | |
125 label="index overflow factor (default: 40)" | |
126 help="Factor (between 1 and 1000) that controls the size of the index build overflow space. For certain genomes you may have to increase this value if you are getting a corresponding error from the tool." /> | |
127 </section> | |
128 <section name="alignment" title="Alignment parameters" expanded="false" | |
129 help="The global alignment parameters in this section will be used for samples for which you do not provide their own sample-specific settings."> | |
130 <section name="single" title="Parameters applied to single-end samples" | |
131 help="These parameters will affect the alignments for any single-end sample | |
132 for which you do not provide sample-specific settings."> | |
133 <param name="maxdist" type="integer" value="8" | |
134 label="edit distance (default: 8)" | |
135 help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/> | |
136 <param name="confdiff" type="integer" value="2" | |
137 label="confidence threshold (default: 2)" | |
138 help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/> | |
139 <param name="confadpt" type="integer" value="7" | |
140 label="adaptive confdiff behaviour (default: 7)" | |
141 help="Specifies how many seeds of a read may be ignored (based on the maximum hits value above) before the confidence threshold above gets increased by one for that read (SNAP option -a); helps fine-tuning alignment accuracy in repetitive regions of the genome."/> | |
142 <param name="maxseeds" type="integer" value="25" | |
143 label="maximum seeds per read (default: 25)" | |
144 help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/> | |
145 <param name="maxhits" type="integer" value="250" | |
146 label="maximum hits per seed (default: 250)" | |
147 help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/> | |
148 <param name="clipping" type="select" display="radio" | |
149 label="read clipping (default: from back and front)" | |
150 help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)"> | |
151 <option value="++">from back and front</option> | |
152 <option value="x+">from back only</option> | |
153 <option value="+x">from front only</option> | |
154 <option value="xx">no clipping</option> | |
155 </param> | |
156 </section> | |
157 <section name="paired" title="Parameters applied to paired-end samples" | |
158 help="These parameters will affect the alignments for any paired-end sample | |
159 for which you do not provide sample-specific settings."> | |
160 <param name="sp_min" type="integer" value="100" | |
161 label="minimum spacing to allow between paired ends (default: 100)" | |
162 help="Corresponds to the first value of the SNAP option -s."/> | |
163 <param name="sp_max" type="integer" value="10000" | |
164 label="maximum spacing to allow between paired ends (default: 10000)" | |
165 help="Corresponds to the second value of the SNAP option -s."/> | |
166 <param name="discard_overlapping_mates" type="text" optional="true" | |
167 label="discard overlapping read pairs of type" | |
168 help="Consider overlapping mate pairs of the given orientation type(s) anomalous and discard them; allowed values: RF, FR, FF, RR; multiple types may be specified as a comma-separated list and ALL can be used as a shortcut for discarding all overlapping mate pairs; leave blank to retain all overlapping pairs." /> | |
169 <param name="maxdist" type="integer" value="8" | |
170 label="edit distance (default: 8)" | |
171 help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/> | |
172 <param name="confdiff" type="integer" value="2" | |
173 label="confidence threshold (default: 2)" | |
174 help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/> | |
175 <param name="confadpt" type="integer" value="7" | |
176 label="adaptive confdiff behaviour (default: 7)" | |
177 help="Specifies how many seeds of a read may be ignored (based on the maximum hits value above) before the confidence threshold above gets increased by one for that read (SNAP option -a); helps fine-tuning alignment accuracy in repetitive regions of the genome."/> | |
178 <param name="maxseeds" type="integer" value="25" | |
179 label="maximum seeds per read (default: 25)" | |
180 help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/> | |
181 <param name="maxhits" type="integer" value="250" | |
182 label="maximum hits per seed (default: 250)" | |
183 help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/> | |
184 <param name="clipping" type="select" display="radio" | |
185 label="read clipping (default: from back and front)" | |
186 help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)"> | |
187 <option value="++">from back and front</option> | |
188 <option value="x+">from back only</option> | |
189 <option value="+x">from front only</option> | |
190 <option value="xx">no clipping</option> | |
191 </param> | |
192 </section> | |
193 </section> | |
194 <conditional name="output_options"> | |
195 <param name="config" type="select" | |
196 label="Output options" | |
197 help="No matter how many input datasets you specify below and what there formats are, this tool will produce a single output file with the aligned reads from all samples. In this section you can configure some aspects of what the output should look like. Unless you have a really special usecase, you can (and probably should) just go with the default settings though."> | |
198 <option value="default">Just go with the defaults</option> | |
199 <option value="custom">Show detailed output options</option> | |
200 </param> | |
201 <when value="default"> | |
202 <param name="oformat" type="hidden" value="bam" /> | |
203 <param name="sort" type="hidden" value=""/> | |
204 <param name="explicit_mmatch_notation" type="hidden" value=""/> | |
205 <param name="filter_output" type="hidden" value="off"/> | |
206 <param name="selectivity" type="hidden" value="1"/> | |
207 </when> | |
208 <when value="custom"> | |
209 <param name="oformat" type="select" display="radio" | |
210 label="Output format"> | |
211 <option value="bam">BAM</option> | |
212 <option value="sam">SAM</option> | |
213 </param> | |
214 <param name="sort" type="boolean" falsevalue="--no-sort" truevalue="" checked="true" | |
215 label="Sort aligned reads in the output by coordinates" | |
216 help="Turn off if you want to retain the read order of the input file(s) (mimodd snap option --no-sort)." /> | |
217 <param name="explicit_mmatch_notation" type="boolean" truevalue="-X" falsevalue="" checked="false" | |
218 label="Use = and X to indicate matches/mismatches in CIGAR strings explicitly instead of using M for both" | |
219 help="Warning: Downstream tools may still rely on the classic M notation! Turn this on at your own risk (mimodd snap option -X)." /> | |
220 <param name="selectivity" type="integer" min="1" value="1" | |
221 label="selectivity (default: 1)" | |
222 help="randomly choose 1/selectivity of the reads to score (SNAP option -S). The default of 1 indicates that all reads should be worked with." /> | |
223 <param name="filter_output" type="select" display="radio" | |
224 label="filter output (default: no filtering)" | |
225 help="filter output (SNAP option -F) to retain only specific classes of reads."> | |
226 <option value="off">no filtering</option> | |
227 <option value="a">aligned only</option> | |
228 <option value="s">single-aligned only</option> | |
229 <option value="u">unaligned only</option> | |
230 </param> | |
231 </when> | |
232 </conditional> | |
233 <repeat name="datasets" title="datasets" default="1" min="1"> | |
234 <conditional name="mode_choose"> | |
235 <param name="mode" type="select" label="choose mode" | |
236 help="Reads obtained from single-end sequencing runs should be aligned in 'single' mode, paired-end reads in 'paired' mode. **WARNING**: if the read input file is in SAM/BAM format, the current version of this tool will **not** verify the mode and may produce erroneous alignments with wrong settings!"> | |
237 <option value="single">single-end</option> | |
238 <option value="paired">paired-end</option> | |
239 </param> | |
240 <when value="single"> | |
241 <conditional name="input"> | |
242 <param name="iformat" type="select" label="input file format"> | |
243 <option value="bam">BAM</option> | |
244 <option value="sam">SAM</option> | |
245 <option value="fastq">fastq</option> | |
246 </param> | |
247 <when value="bam"> | |
248 <expand macro="sam_bam_selector" format="bam" /> | |
249 </when> | |
250 <when value="sam"> | |
251 <expand macro="sam_bam_selector" format="sam" /> | |
252 </when> | |
253 <when value="fastq"> | |
254 <param name="ifile" type="data" format="fastq" | |
255 label="input file"/> | |
256 <expand macro="require_metadata" /> | |
257 </when> | |
258 </conditional> | |
259 <section name="aln_options" title="Alignment options for this sample" expanded="false" | |
260 help="Any options you specify here will overwrite the global alignment settings defined for all single-end samples above."> | |
261 <param name="maxdist" type="integer" optional="true" value="" | |
262 label="edit distance" | |
263 help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/> | |
264 <param name="confdiff" type="integer" optional="true" value="" | |
265 label="confidence threshold" | |
266 help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/> | |
267 <param name="confadpt" type="integer" optional="true" value="" | |
268 label="adaptive confdiff behaviour" | |
269 help="Specifies how many seeds of a read may be ignored (based on the maximum hits value above) before the confidence threshold above gets increased by one for that read (SNAP option -a); helps fine-tuning alignment accuracy in repetitive regions of the genome."/> | |
270 <param name="maxseeds" type="integer" optional="true" value="" | |
271 label="maximum seeds per read" | |
272 help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/> | |
273 <param name="maxhits" type="integer" optional="true" value="" | |
274 label="maximum hits per seed" | |
275 help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/> | |
276 <param name="clipping" type="select" display="radio" | |
277 label="read clipping (default: from back and front)" | |
278 help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)"> | |
279 <option value="">use global setting</option> | |
280 <option value="++">from back and front</option> | |
281 <option value="x+">from back only</option> | |
282 <option value="+x">from front only</option> | |
283 <option value="xx">no clipping</option> | |
284 </param> | |
285 </section> | |
286 </when> | |
287 <when value="paired"> | |
288 <conditional name="input"> | |
289 <param name="iformat" type="select" label="input file format"> | |
290 <option value="bam">BAM</option> | |
291 <option value="sam">SAM</option> | |
292 <option value="fastq">fastq</option> | |
293 </param> | |
294 <when value="bam"> | |
295 <expand macro="sam_bam_selector" format="bam" /> | |
296 </when> | |
297 <when value="sam"> | |
298 <expand macro="sam_bam_selector" format="sam" /> | |
299 </when> | |
300 <when value="fastq"> | |
301 <conditional name="pe_source"> | |
302 <param name="type" type="select" | |
303 label="the paired-end fastq input is provided as"> | |
304 <option value="individual">Individual datasets</option> | |
305 <option value="collection">a Paired collection</option> | |
306 </param> | |
307 <when value="individual"> | |
308 <param name="ifile1" type="data" format="fastq" | |
309 label="inputfile with the first set of reads of paired-end data"/> | |
310 <param name="ifile2" type="data" format="fastq" | |
311 label="inputfile with the second set of reads of paired-end data"/> | |
312 </when> | |
313 <when value="collection"> | |
314 <param name="input_data" type="data_collection" | |
315 collection_type="paired" format="fastq, fastq.gz" | |
316 label="paired input dataset collection"/> | |
317 </when> | |
318 </conditional> | |
319 <expand macro="require_metadata" /> | |
320 </when> | |
321 </conditional> | |
322 <section name="aln_options" title="Alignment options for this sample" expanded="false" | |
323 help="Any options you specify here will overwrite the global alignment settings defined for all paired-end samples above."> | |
324 <param name="sp_min" type="integer" optional="true" value="0" | |
325 label="minimum spacing to allow between paired ends" | |
326 help="Corresponds to the first value of the SNAP option -s."/> | |
327 <param name="sp_max" type="integer" optional="true" value="0" | |
328 label="maximum spacing to allow between paired ends" | |
329 help="Corresponds to the second value of the SNAP option -s."/> | |
330 <param name="discard_overlapping_mates" type="text" optional="true" value="" | |
331 label="discard overlapping read pairs of type" | |
332 help="Consider overlapping mate pairs of the given orientation type(s) anomalous and discard them; allowed values: RF, FR, FF, RR; multiple types may be specified as a comma-separated list and ALL can be used as a shortcut for discarding all overlapping mate pairs; leave blank to retain all overlapping pairs." /> | |
333 <param name="maxdist" type="integer" optional="true" value="0" | |
334 label="edit distance" | |
335 help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/> | |
336 <param name="confdiff" type="integer" optional="true" value="" | |
337 label="confidence threshold" | |
338 help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/> | |
339 <param name="confadpt" type="integer" optional="true" value="" | |
340 label="adaptive confdiff behaviour" | |
341 help="Specifies how many seeds of a read may be ignored (based on the maximum hits value above) before the confidence threshold above gets increased by one for that read (SNAP option -a); helps fine-tuning alignment accuracy in repetitive regions of the genome."/> | |
342 <param name="maxseeds" type="integer" optional="true" value="" | |
343 label="maximum seeds per read" | |
344 help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/> | |
345 <param name="maxhits" type="integer" optional="true" value="" | |
346 label="maximum hits per seed" | |
347 help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/> | |
348 <param name="clipping" type="select" display="radio" | |
349 label="read clipping (default: from back and front)" | |
350 help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)"> | |
351 <option value="">use global setting</option> | |
352 <option value="++">from back and front</option> | |
353 <option value="x+">from back only</option> | |
354 <option value="+x">from front only</option> | |
355 <option value="xx">no clipping</option> | |
356 </param> | |
357 </section> | |
358 </when> | |
359 </conditional> | |
360 </repeat> | |
361 </inputs> | |
362 | |
363 <outputs> | |
364 <data name="ofile" format="bam" | |
365 label="Aligned reads from MiModd ${tool.name} on ${on_string}"> | |
366 <change_format> | |
367 <when input="output_options.oformat" value="sam" format="sam"/> | |
368 </change_format> | |
369 <actions> | |
370 <conditional name="reference.source"> | |
371 <when value="cached"> | |
372 <action type="metadata" name="dbkey"> | |
373 <option type="from_data_table" name="all_fasta" column="1" offset="0"> | |
374 <filter type="param_value" ref="reference.genome" column="0" /> | |
375 </option> | |
376 </action> | |
377 </when> | |
378 </conditional> | |
379 </actions> | |
380 </data> | |
381 </outputs> | |
382 | |
383 <tests> | |
384 <test> | |
385 <conditional name="reference"> | |
386 <param name="source" value="history" /> | |
387 <param name="genome" value="a.fa" /> | |
388 </conditional> | |
389 <repeat name="datasets"> | |
390 <conditional name="mode_choose"> | |
391 <param name="mode" value="single" /> | |
392 <conditional name="input"> | |
393 <param name="iformat" value="bam" /> | |
394 <param name="ifile" value="a_part1.bam" /> | |
395 </conditional> | |
396 </conditional> | |
397 </repeat> | |
398 <assert_command> | |
399 <has_text text="--idx-slack 0.3" /> | |
400 <has_text text="--iformat bam" /> | |
401 <has_text text="--oformat bam" /> | |
402 <has_text text="--idx-seedsize 20" /> | |
403 <has_text text="--idx-slack 0.3" /> | |
404 <has_text text="--idx-overflow 40" /> | |
405 <has_text text="--maxseeds 25" /> | |
406 <has_text text="--maxhits 250" /> | |
407 <has_text text="--clipping ++" /> | |
408 <has_text text="--maxdist 8" /> | |
409 <has_text text="--confdiff 2" /> | |
410 <has_text text="--confadapt 7" /> | |
411 <has_text text="--selectivity 1" /> | |
412 </assert_command> | |
413 </test> | |
414 <test> | |
415 <conditional name="reference"> | |
416 <param name="source" value="history" /> | |
417 <param name="genome" value="a.fa" /> | |
418 </conditional> | |
419 <repeat name="datasets"> | |
420 <conditional name="mode_choose"> | |
421 <param name="mode" value="single" /> | |
422 <conditional name="input"> | |
423 <param name="iformat" value="bam" /> | |
424 <param name="ifile" value="a_part1.bam" /> | |
425 </conditional> | |
426 <section name="aln_options"> | |
427 <param name="maxdist" value="7" /> | |
428 </section> | |
429 </conditional> | |
430 </repeat> | |
431 <assert_command> | |
432 <has_text text="--idx-slack 0.3" /> | |
433 <has_text text="--iformat bam" /> | |
434 <has_text text="--oformat bam" /> | |
435 <has_text text="--idx-seedsize 20" /> | |
436 <has_text text="--idx-slack 0.3" /> | |
437 <has_text text="--idx-overflow 40" /> | |
438 <has_text text="--maxseeds 25" /> | |
439 <has_text text="--maxhits 250" /> | |
440 <has_text text="--clipping ++" /> | |
441 <has_text text="--maxdist 7" /> | |
442 <has_text text="--confdiff 2" /> | |
443 <has_text text="--confadapt 7" /> | |
444 <has_text text="--selectivity 1" /> | |
445 </assert_command> | |
446 </test> | |
447 </tests> | |
448 | |
449 <help><![CDATA[ | |
450 .. class:: infomark | |
451 | |
452 **What it does** | |
453 | |
454 The tool aligns the sequenced reads in an arbitrary number of input datasets | |
455 against a common reference genome and stores the results in a single, possibly | |
456 multi-sample output dataset. | |
457 | |
458 Internally, the tool uses the ultrafast, hashtable-based aligner SNAP (http://snap.cs.berkeley.edu). | |
459 | |
460 ---------- | |
461 | |
462 **Notes:** | |
463 | |
464 *Input formats* | |
465 | |
466 - The tool accepts SAM, BAM, fastq and fastq.gz input datasets of sequenced | |
467 reads and supports both single-end and paired-end data. | |
468 | |
469 The recommended approach with MiModD is to store NGS datasets in SAM/BAM | |
470 format with *Run Metadata* (see below) stored in the file header. You can use | |
471 the *MiModD Run Annotation* and *MiModD Convert* tools to convert data from | |
472 fastq format to SAM/BAM format while attaching run metadata to it. | |
473 | |
474 While alignments **directly from fastq format** are supported, this **is less | |
475 reliable** due to less strict specifications of this format. If you find | |
476 the tool complaining about malformed fastq input, it is likely that you can | |
477 fix this problem by converting the data to SAM/BAM format first. | |
478 | |
479 - If you wish to align paired-end data directly from fastq format, the mate | |
480 sequence data has to be split over two datasets as is mostly standard today. | |
481 If you have your paired-end data as a single dataset you may look into the | |
482 *FASTQ splitter* and *FASTQ de-interlacer* tools for Galaxy, which are | |
483 available from the `Fastq Manipulation category`_ of the Galaxy Tool Shed and | |
484 may be able to convert your files to the expected format. | |
485 | |
486 *Run Metadata* | |
487 | |
488 - **Every input file requires accompanying Run Metadata!** Most importantly, | |
489 this includes a *read-group ID* (an identifier of the sequencing run that | |
490 produced the data) and a *sample name* (identifying the | |
491 biological sample sequenced in the run). | |
492 | |
493 - If an input dataset does not provide this information directly (fastq | |
494 datasets never do; SAM/BAM datasets may provide it in their header), you need | |
495 to specify a separate SAM/BAM dataset with an appropriate header as the | |
496 source of the Run Metadata. | |
497 | |
498 You can use the *MiModD Run Annotation* tool to generate such a file. | |
499 | |
500 - If a SAM/BAM input dataset already provides Run Metadata, you can still | |
501 specify a different Run Metadata source, which will then overwrite the | |
502 information already present in the input. This is useful, for example, to | |
503 resolve read-group ID conflicts between multiple input datasets. | |
504 | |
505 - Every input dataset can only contain reads from a single read-group. If you | |
506 would like, for example, to realign the reads in a multi-sample SAM/BAM | |
507 dataset. You should first use the *MiModD Sort* tool to sort the data by read | |
508 names (this step is only necessary for paired-end data), then split the reads | |
509 into new per-read-group datasets using the *MiModD Convert* tool. | |
510 | |
511 - Several input datasets can declare identical read-group IDs and/or sample | |
512 names. | |
513 | |
514 Identical read-group IDs mean that the datasets were produced in the | |
515 same sequencing run, as is the case, for example, with partial fastq | |
516 sequencing data. In the output dataset, the corresponding reads will be | |
517 merged and it will not be possible to trace back their source. | |
518 | |
519 Identical sample names (but different read-group IDs) indicate that the same | |
520 sample has been sequenced multiple times. In the output dataset, the | |
521 corresponding reads will be tagged appropriately and tools like the | |
522 *MiModD Variant Calling* tool will let you decide whether you want to treat | |
523 them together or separately. | |
524 | |
525 ---------- | |
526 | |
527 **Tool Options** | |
528 | |
529 The section *Alignment parameters* lets you configure global settings for the | |
530 alignment job that will be applied to all input datasets. For each input | |
531 dataset, however, you can overwrite some or all of these settings by specifying | |
532 new values in the section *Alignment options for this sample*. Some of the | |
533 alignment parameters may have **big** effects on the alignment quality, but | |
534 these effects are very dependent on the type of input sequences. You are | |
535 strongly encouraged to consult the in-depth `tool documentation`_ for detailed | |
536 explanations of the available options. | |
537 | |
538 .. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531 | |
539 .. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy | |
540 .. _tool documentation: http://mimodd.readthedocs.io/en/@MIMODD_REAL_VERSION@/tool_doc.html#snap | |
541 | |
542 @HELP_FOOTER@ | |
543 ]]></help> | |
544 <expand macro="citations" /> | |
545 </tool> | |
546 |