comparison bwa.xml @ 0:ff1ae217ccc2 draft

Uploaded
author devteam
date Tue, 16 Dec 2014 15:22:56 -0500
parents
children c71dd035971e
comparison
equal deleted inserted replaced
-1:000000000000 0:ff1ae217ccc2
1 <?xml version="1.0"?>
2 <tool id="bwa" name="BWA" version="0.1">
3
4 <requirements>
5 <requirement type="package" version="0.7.10.039ea20639">bwa</requirement>
6 <requirement type="package" version="1.1">samtools</requirement>
7 </requirements>
8 <description>- map short reads (&lt; 100 bp) against reference genome</description>
9 <command>
10
11 #set $reference_fasta_filename = "localref.fa"
12
13 #if str( $reference_source.reference_source_selector ) == "history":
14
15 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
16
17 ## The following shell commands decide with of the BWA indexing algorithms (IS or BWTSW) will be run
18 ## depending ob the size of the input FASTA dataset
19
20 (
21 size=`stat -c %s "${reference_fasta_filename}" 2&gt;/dev/null`; ## Linux
22 if [ $? -eq 0 ];
23 then
24 if [ \$size -lt 2000000000 ];
25 then
26 bwa index -a is "${reference_fasta_filename}";
27 else
28 bwa index -a bwtsw "${reference_fasta_filename}";
29 fi;
30 fi;
31
32 eval \$(stat -s "${reference_fasta_filename}"); ## OSX
33 if [ $? -eq 0 ];
34 then
35 if [ \$st_size -lt 2000000000 ];
36 then
37 bwa index -a is "${reference_fasta_filename}";
38 echo "Generating BWA index with is algorithm";
39 else
40 bwa index -a bwtsw "${reference_fasta_filename}";
41 echo "Generating BWA index with bwtsw algorithm";
42 fi;
43 fi;
44 ) &amp;&amp;
45
46 #else:
47 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
48 #end if
49
50 ## Begin bwa command line
51
52 ####### Fastq paired
53
54 #if str( $input_type.input_type_selector ) == "paired" or str( $input_type.input_type_selector ) == "paired_collection":
55
56 bwa aln
57 -t "\${GALAXY_SLOTS:-1}"
58
59 @command_options@
60
61 "${reference_fasta_filename}"
62
63 #if str( $input_type.input_type_selector ) == "paired_collection":
64 "${input_type.fastq_input1.forward}"
65 #else
66 "${input_type.fastq_input1}"
67 #end if
68
69 > first.sai &amp;&amp;
70
71 bwa aln
72 -t "\${GALAXY_SLOTS:-1}"
73
74 @command_options@
75
76 "${reference_fasta_filename}"
77
78 #if str( $input_type.input_type_selector ) == "paired_collection":
79 "${input_type.fastq_input1.reverse}"
80 #else
81 "${input_type.fastq_input2}"
82 #end if
83
84 > second.sai &amp;&amp;
85
86 bwa sampe
87
88 #if str( $input_type.adv_pe_options.adv_pe_options_selector) == "True":
89
90 -a ${$input_type.adv_pe_options.a}
91 -o ${$input_type.adv_pe_options.o}
92 -n ${$input_type.adv_pe_options.n}
93 -N ${$input_type.adv_pe_options.N}
94
95 #end if
96
97 @read_group_options@
98
99 #if str( $input_type.input_type_selector ) == "paired_collection":
100
101 "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1.forward}" "${input_type.fastq_input1.reverse}"
102
103 #else:
104
105 "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1}" "${input_type.fastq_input2}"
106
107 #end if
108
109 ####### Fastq single
110
111 #elif str( $input_type.input_type_selector ) == "single":
112
113 bwa aln
114 -t "\${GALAXY_SLOTS:-1}"
115
116 @command_options@
117
118 "${reference_fasta_filename}"
119 "${input_type.fastq_input1}"
120 > first.sai &amp;&amp;
121
122 bwa samse
123
124 #if str( $input_type.adv_se_options.adv_se_options_selector) == "True":
125
126 -n ${$input_type.adv_se_options.n}
127
128 #end if
129
130 @read_group_options@
131
132 "${reference_fasta_filename}" first.sai "${input_type.fastq_input1}"
133
134 ####### BAM paired
135
136 #elif str( $input_type.input_type_selector ) == "paired_bam":
137
138 bwa aln
139 -t "\${GALAXY_SLOTS:-1}"
140 -b
141 -1
142
143 @command_options@
144
145 "${reference_fasta_filename}"
146 "${input_type.bam_input}"
147 > first.sai &amp;&amp;
148
149 bwa aln
150 -t "\${GALAXY_SLOTS:-1}"
151 -b
152 -2
153 @command_options@
154 "${reference_fasta_filename}"
155 "${input_type.bam_input}"
156 > second.sai &amp;&amp;
157
158 bwa sampe
159
160 #if str( $input_type.adv_bam_pe_options.adv_pe_options_selector) == "True":
161
162 -a ${$input_type.adv_bam_pe_options.a}
163 -o ${$input_type.adv_bam_pe_options.o}
164 -n ${$input_type.adv_bam_pe_options.n}
165 -N ${$input_type.adv_bam_pe_options.N}
166
167 #end if
168
169 @read_group_options@
170
171 "${reference_fasta_filename}" first.sai second.sai "${input_type.bam_input}" "${input_type.bam_input}"
172
173 ####### Fastq single ------------ to do next
174
175 #elif str( $input_type.input_type_selector ) == "single_bam":
176
177 bwa aln
178 -t "\${GALAXY_SLOTS:-1}"
179 -b
180 -0
181
182 @command_options@
183
184 "${reference_fasta_filename}"
185 "${input_type.bam_input}"
186 > first.sai &amp;&amp;
187
188 bwa samse
189
190 #if str( $input_type.adv_bam_se_options.adv_se_options_selector) == "True":
191
192 -n ${$input_type.adv_bam_se_options.n}
193
194 #end if
195
196 @read_group_options@
197
198 "${reference_fasta_filename}" first.sai "${input_type.bam_input}"
199
200 #end if
201
202 | samtools view -Sb - > temporary_bam_file.bam &amp;&amp;
203
204 samtools sort -f temporary_bam_file.bam ${bam_output}
205
206
207 </command>
208
209 <macros>
210 <import>bwa_macros.xml</import>
211 <token name="@command_options@">
212 #if str( $analysis_type.analysis_type_selector ) == "illumina":
213
214 ## do nothing -> just align with default parameters
215
216 #elif str( $analysis_type.analysis_type_selector ) == "full":
217
218 -n ${analysis_type.n}
219 -o ${analysis_type.o}
220 -e ${analysis_type.e}
221 -i ${analysis_type.i}
222 -d ${analysis_type.d}
223 -l ${analysis_type.l}
224 -k ${analysis_type.k}
225 -m ${analysis_type.m}
226 -M ${analysis_type.M}
227 -O ${analysis_type.O}
228 -E ${analysis_type.E}
229 -R ${analysis_type.R}
230 -q ${analysis_type.q}
231
232 #if str( $analysis_type.B ):
233 -B ${analysis_type.B}
234 #end if
235
236 #if str( $analysis_type.L ):
237 -B ${analysis_type.L}
238 #end if
239 #end if
240 </token>
241 <token name="@read_group_options@">
242
243 #if str( $rg.rg_selector ) == "True":
244
245 -r "@RG\tID:$rg.ID\tSM:$rg.SM"
246
247 #end if
248 </token>
249
250 <xml name="advanced_pe_options">
251 <param name="adv_pe_options_selector" type="select" label="Set advanced paired end options?" help="Provides additional controls">
252 <option value="set">Set</option>
253 <option value="do_not_set" selected="True">Do not set</option>
254 </param>
255 <when value="set">
256 <param name="a" type="integer" value="500" label="Maximum insert size for a read pair to be considered being mapped properly." help="sampe -a; This option is only used when there are not enough good alignment to infer the distribution of insert sizes; default=500"/>
257 <param name="o" type="integer" value="100000" label="Maximum occurrences of a read for pairing. A read with more occurrences will be treated as a single-end read." help="sampe -o; Reducing this parameter helps faster pairing; default=100000"/>
258 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly." help="sampe -n; If a read has more than this many hits, the XA tag will not be written; default=3"/>
259 <param name="N" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)." help="sampe -N; If a read has more than this many hits, the XA tag will not be written; default=10"/>
260 <param name="c" type="float" value="0.00005" label="Prior of chimeric rate (lower bound)" help="sampe -c"/>
261 </when>
262 <when value="do_not_set">
263 <!-- do nothing -->
264 </when>
265 </xml>
266 <xml name="advanced_se_options">
267 <param name="adv_se_options_selector" type="select" label="Set advanced single end options?" help="Provides additional controls">
268 <option value="set">Set</option>
269 <option value="do_not_set" selected="True">Do not set</option>
270 </param>
271 <when value="set">
272 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag." help="-n; If a read has more than this many hits, the XA tag will not be written; default=3"/>
273 </when>
274 <when value="do_not_set">
275 <!-- do nothing -->
276 </when>
277 </xml>
278 </macros>
279
280 <inputs>
281
282 <conditional name="reference_source">
283 <param name="reference_source_selector" type="select" label="Load reference genome from">
284 <option value="cached">Local cache</option>
285 <option value="history">History</option>
286 </param>
287 <when value="cached">
288 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
289 <options from_data_table="bwa_mem_indexes">
290 <filter type="sort_by" column="2" />
291 <validator type="no_options" message="No indexes are available" />
292 </options>
293 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
294 </param>
295 </when>
296 <when value="history">
297 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
298 </when>
299 </conditional>
300 <conditional name="input_type">
301 <param name="input_type_selector" type="select" label="Select input type" help="Select between fastq and bam datasets and between paired and single end data">
302 <option value="paired">Paired fastq</option>
303 <option value="paired_collection">Paired fastq collection</option>
304 <option value="single">Single fastq</option>
305 <option value="paired_bam">Paired BAM</option>
306 <option value="single_bam">Single BAM</option>
307 </param>
308 <when value="paired">
309 <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
310 <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
311 <conditional name="adv_pe_options">
312
313 <expand macro="advanced_pe_options" />
314
315 </conditional>
316 </when>
317
318 <when value="paired_collection">
319 <param name="fastq_input1" format="fastqsanger" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
320 <conditional name="adv_pe_options">
321
322 <expand macro="advanced_pe_options" />
323
324 </conditional>
325 </when>
326
327
328 <when value="single">
329 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/>
330 <conditional name="adv_se_options">
331
332 <expand macro="advanced_se_options" />
333
334 </conditional>
335 </when>
336
337 <!-- the difference between single and paired bams is in the <command> tag portion and realated to -0, -1, and -2 options -->
338
339 <when value="paired_bam">
340 <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with paired reads"/>
341 <conditional name="adv_bam_pe_options">
342
343 <expand macro="advanced_pe_options" />
344
345 </conditional>
346 </when>
347
348 <when value="single_bam">
349 <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with single reads"/>
350 <conditional name="adv_bam_se_options">
351
352 <expand macro="advanced_se_options" />
353
354 </conditional>
355 </when>
356
357 </conditional>
358
359 <conditional name="rg">
360 <param name="rg_selector" type="select" label="Set readgroups information?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details">
361 <option value="set">Set</option>
362 <option value="do_not_set" selected="True">Do not set</option>
363 </param>
364 <when value="set">
365 <param name="ID" type="text" value="readgroup1" size="20" label="Specify readgroup ID" help="This value must be unique among multiple samples in your experiment">
366 <sanitizer invalid_char="">
367 <valid initial="string.printable"/>
368 </sanitizer>
369 </param>
370 <param name="SM" type="text" value="blood" size="20" label="Specify readgroup sample name (SM)" help="This value should be descriptive">
371 <sanitizer invalid_char="">
372 <valid initial="string.printable"/>
373 </sanitizer>
374 </param>
375 </when>
376 <when value="do_not_set">
377 <!-- do nothing -->
378 </when>
379 </conditional>
380
381 <conditional name="analysis_type">
382 <param name="analysis_type_selector" type="select" label="Select analysis mode">
383 <option value="illumina">1.Simple Illumina mode</option>
384 <option value="full">2.Full list of options</option>
385 </param>
386 <when value="illumina">
387 <!-- do nothing -->
388 </when>
389 <when value="full">
390 <param name="n" type="text" value="0.04" label="maximum edit distance if the value is integer, or the fraction of missing alignments given 2% uniform base error rate if float. In the latter case, the maximum edit distance is automatically chosen for different read lengths." help="aln -n; default=0.04"/>
391 <param name="o" type="integer" value="1" label="maximum number or gap openings" help="aln -o; default=1"/>
392 <param name="e" type="integer" value="-1" label="maximum number of gap extensions" help="aln -e; -1 disables long gaps and invokes k-difference mode; default=-1"/>
393 <param name="i" type="integer" value="5" label="do not put an indel within this many bp towards the ends" help="aln -i; default=5"/>
394 <param name="d" type="integer" value="10" label="maximum occurrences for extending a long deletion" help="aln -d; default=10"/>
395 <param name="l" type="integer" value="32" label="seed length" help="aln -l; default=32"/>
396 <param name="k" type="integer" value="2" label="maximum differences in the seed" help="aln -k; default=2"/>
397 <param name="m" type="integer" value="2000000" label="maximum entries in the queue" help="aln -m; default=2000000"/>
398 <param name="M" type="integer" value="3" label="mismatch penalty" help="aln -M; default=3"/>
399 <param name="O" type="integer" value="11" label="gap open penalty" help="aln -O; default=11"/>
400 <param name="E" type="integer" value="4" label="gap extension penalty" help="aln -E; default=4"/>
401 <param name="R" type="integer" value="30" label="stop searching when there are more than this value of equally best hits" help="aln -R; default=30"/>
402 <param name="q" type="integer" value="0" label="quality threshold for read trimming down to 35bp" help="aln -q; default=0"/>
403 <param name="B" type="integer" optional="True" label="length of barcode" help="aln -B; optional parameter"/>
404 <param name="L" type="float" optional="True" label="log-scaled gap penalty for long deletions" help="aln -L; optional parameter"/>
405 </when>
406 </conditional>
407 </inputs>
408
409 <outputs>
410 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"/>
411 </outputs>
412
413 <tests>
414 <test>
415 <param name="reference_source_selector" value="history" />
416 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
417 <param name="input_type_selector" value="paired"/>
418 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
419 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
420 <param name="analysis_type_selector" value="illumina"/>
421 <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2" />
422 </test>
423 <test>
424 <param name="reference_source_selector" value="history" />
425 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
426 <param name="input_type_selector" value="paired_bam"/>
427 <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/>
428 <param name="analysis_type_selector" value="illumina"/>
429 <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2" />
430 </test>
431 </tests>
432 <stdio>
433 <exit_code range="1:" />
434 </stdio>
435 <help>
436
437 **What is does**
438
439 BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use BWA-MEM algorithm distributed as separate Galaxy tool.
440
441 This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool:
442
443 - bwa aln - actual mapper placing reads onto the reference sequence
444 - bwa samse - post-processor converting suffix array coordinates into genome coordinates in SAM format for single reads
445 - bam sampe - post-processor for paired reads
446
447 Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM (not SAM; in reality SAM produced by the bwa is converted to BAM on the fly by samtools view command) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
448
449 -----
450
451 **Galaxy-specific option**
452
453 Galaxy allows three levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are:
454
455 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem &lt;reference index&gt; &lt;fastq dataset1&gt; [fastq dataset2]
456 2. *Full list of options*: Allows access to all options through Galaxy interface.
457
458 ------
459
460 **bwa-aln options**
461
462 Each Galaxy parameter widget corresponds to command line flags listed below::
463
464 -n NUM max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
465 -o INT maximum number or fraction of gap opens [1]
466 -e INT maximum number of gap extensions, -1 for disabling long gaps [-1]
467 -i INT do not put an indel within INT bp towards the ends [5]
468 -d INT maximum occurrences for extending a long deletion [10]
469 -l INT seed length [32]
470 -k INT maximum differences in the seed [2]
471 -m INT maximum entries in the queue [2000000]
472 -M INT mismatch penalty [3]
473 -O INT gap open penalty [11]
474 -E INT gap extension penalty [4]
475 -R INT stop searching when there are >INT equally best hits [30]
476 -q INT quality threshold for read trimming down to 35bp [0]
477 -B INT length of barcode
478 -L log-scaled gap penalty for long deletions
479 -N non-iterative mode: search for all n-difference hits (slooow)
480 -I the input is in the Illumina 1.3+ FASTQ-like format
481 -b the input read file is in the BAM format
482 -0 use single-end reads only (effective with -b)
483 -1 use the 1st read in a pair (effective with -b)
484 -2 use the 2nd read in a pair (effective with -b)
485
486 **bwa-samse options**::
487
488 -a INT maximum insert size [500]
489 -o INT maximum occurrences for one end [100000]
490 -n INT maximum hits to output for paired reads [3]
491 -N INT maximum hits to output for discordant pairs [10]
492 -c FLOAT prior of chimeric rate (lower bound) [1.0e-05]
493 -r STR read group header line [null]
494
495 **bwa-sampe options**::
496
497 -n INT maximum hits to output for paired reads [3]
498 -r STR read group header line [null]
499
500
501 @dataset_collections@
502
503 @RG@
504
505 @info@
506
507 </help>
508 <citations>
509 <citation type="doi">10.1093/bioinformatics/btp324</citation>
510 <citation type="doi">10.1093/bioinformatics/btp698</citation>
511 </citations>
512 </tool>