comparison bwa-mem.xml @ 18:48f306c57611 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bwa commit c355891532cecaab6b3288a148a6b3bcb5973396
author iuc
date Fri, 24 Nov 2017 09:55:45 -0500
parents be4e38d127ae
children 4f774c1e6049
comparison
equal deleted inserted replaced
17:d1228ec6233f 18:48f306c57611
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="bwa_mem" name="Map with BWA-MEM" version="@VERSION@.0"> 2 <tool id="bwa_mem" name="Map with BWA-MEM" version="@VERSION@.1">
3 <description>- map medium and long reads (&gt; 100 bp) against reference genome</description> 3 <description>- map medium and long reads (&gt; 100 bp) against reference genome</description>
4 <macros> 4 <macros>
5 <import>read_group_macros.xml</import> 5 <import>read_group_macros.xml</import>
6 <import>bwa_macros.xml</import> 6 <import>bwa_macros.xml</import>
7 </macros> 7 </macros>
8 <expand macro="requirements" /> 8 <expand macro="requirements"/>
9 <expand macro="stdio" /> 9 <expand macro="stdio"/>
10 <command> 10 <command>
11 <![CDATA[ 11 <![CDATA[
12 @set_reference_fasta_filename@ 12 @set_reference_fasta_filename@
13 13
14 ## Begin BWA-MEM command line 14 ## Begin BWA-MEM command line
15 15
16 bwa mem 16 bwa mem
17 -t "\${GALAXY_SLOTS:-1}" 17 -t "\${GALAXY_SLOTS:-1}"
18 -v 1 ## Verbosity is set to 1 (errors only) 18 ## Verbosity is set to 1 (errors only)
19 19 -v 1
20 #if str( $fastq_input.fastq_input_selector ) == "paired_iv": ## For interleaved fastq files set -p option 20
21 -p 21 #if str( $fastq_input.fastq_input_selector ) == "paired_iv":
22 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used 22 ## For interleaved fastq files set -p option
23 -I "${fastq_input.iset_stats}" 23 -p
24 #end if 24 ## check that insert statistics is used
25 #end if 25 #if str( $fastq_input.iset_stats ):
26 26 -I '${fastq_input.iset_stats}'
27 #if str( $analysis_type.analysis_type_selector ) == "pacbio": 27 #end if
28 -x pacbio 28 #end if
29 #elif str( $analysis_type.analysis_type_selector ) == "ont2d": 29
30 -x ont2d 30 #if str( $analysis_type.analysis_type_selector ) not in ["illumina", "full"]:
31 #elif str( $analysis_type.analysis_type_selector ) == "intractg": 31 -x '$analysis_type.analysis_type_selector'
32 -x intractg 32 #elif str( $analysis_type.analysis_type_selector ) == "full":
33 #elif str( $analysis_type.analysis_type_selector ) == "full": 33 ## Algorithmic options
34 #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "set": ## Algorithmic options 34 #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "set":
35 -k "${analysis_type.algorithmic_options.k}" 35 -k '${analysis_type.algorithmic_options.k}'
36 -w "${analysis_type.algorithmic_options.w}" 36 -w '${analysis_type.algorithmic_options.w}'
37 -d "${analysis_type.algorithmic_options.d}" 37 -d '${analysis_type.algorithmic_options.d}'
38 -r "${analysis_type.algorithmic_options.r}" 38 -r '${analysis_type.algorithmic_options.r}'
39 -y "${analysis_type.algorithmic_options.y}" 39 -y '${analysis_type.algorithmic_options.y}'
40 -c "${analysis_type.algorithmic_options.c}" 40 -c '${analysis_type.algorithmic_options.c}'
41 -D "${analysis_type.algorithmic_options.D}" 41 -D '${analysis_type.algorithmic_options.D}'
42 -W "${analysis_type.algorithmic_options.W}" 42 -W '${analysis_type.algorithmic_options.W}'
43 -m "${analysis_type.algorithmic_options.m}" 43 -m '${analysis_type.algorithmic_options.m}'
44 ${analysis_type.algorithmic_options.S} 44 ${analysis_type.algorithmic_options.S}
45 ${analysis_type.algorithmic_options.P} 45 ${analysis_type.algorithmic_options.P}
46 ${analysis_type.algorithmic_options.e} 46 ${analysis_type.algorithmic_options.e}
47 #end if 47 #end if
48 48
49 #if str( $analysis_type.scoring_options.scoring_options_selector ) == "set": ## Scoring options 49 ## Scoring options
50 -A "${analysis_type.scoring_options.A}" 50 #if str( $analysis_type.scoring_options.scoring_options_selector ) == "set":
51 -B "${analysis_type.scoring_options.B}" 51 -A '${analysis_type.scoring_options.A}'
52 -O "${analysis_type.scoring_options.O}" 52 -B '${analysis_type.scoring_options.B}'
53 -E "${analysis_type.scoring_options.E}" 53 -O '${analysis_type.scoring_options.O}'
54 -L "${analysis_type.scoring_options.L}" 54 -E '${analysis_type.scoring_options.E}'
55 -U "${analysis_type.scoring_options.U}" 55 -L '${analysis_type.scoring_options.L}'
56 #end if 56 -U '${analysis_type.scoring_options.U}'
57 57 #end if
58 #if str( $analysis_type.io_options.io_options_selector ) == "set": ## IO options 58
59 -T "${analysis_type.io_options.T}" 59 ## IO options
60 -h "${analysis_type.io_options.h}" 60 #if str( $analysis_type.io_options.io_options_selector ) == "set":
61 -T '${analysis_type.io_options.T}'
62 -h '${analysis_type.io_options.h}'
61 ${analysis_type.io_options.a} 63 ${analysis_type.io_options.a}
62 ${analysis_type.io_options.C} 64 ${analysis_type.io_options.C}
63 ${analysis_type.io_options.V} 65 ${analysis_type.io_options.V}
64 ${analysis_type.io_options.Y} 66 ${analysis_type.io_options.Y}
65 ${analysis_type.io_options.M} 67 ${analysis_type.io_options.M}
66 ${analysis_type.io_options.five} 68 ${analysis_type.io_options.five}
67 ${analysis_type.io_options.q} 69 ${analysis_type.io_options.q}
68 #end if 70 #end if
69 71
70 #end if 72 #end if
71 73
72 ## Handle read group options... 74 ## Handle read group options...
73 @define_read_group_helpers@ 75 @define_read_group_helpers@
74 #if str( $fastq_input.fastq_input_selector ) == "paired": 76 #if str( $fastq_input.fastq_input_selector ) == "paired":
75 #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1, $fastq_input.fastq_input2) 77 #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1, $fastq_input.fastq_input2)
76 #else: 78 #else:
77 #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1) 79 #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1)
78 #end if 80 #end if
79 @set_use_rg_var@ 81 @set_use_rg_var@
80 @set_read_group_vars@ 82 @set_read_group_vars@
81 #if $use_rg 83 #if $use_rg
82 @set_rg_string@ 84 @set_rg_string@
83 -R '$rg_string' 85 -R '$rg_string'
84 #end if 86 #end if
85 87
86 #if str( $fastq_input.fastq_input_selector ) == "paired": 88 #if str( $fastq_input.fastq_input_selector ) == "paired":
87 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used 89 ## check that insert statistics is used
88 -I "${fastq_input.iset_stats}" 90 #if str( $fastq_input.iset_stats ):
89 #end if 91 -I '${fastq_input.iset_stats}'
90 92 #end if
91 "${reference_fasta_filename}" 93
92 "${fastq_input.fastq_input1}" "${fastq_input.fastq_input2}" 94 '${reference_fasta_filename}'
93 #elif str( $fastq_input.fastq_input_selector ) == "paired_collection": 95 '${fastq_input.fastq_input1}' '${fastq_input.fastq_input2}'
94 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used 96 #elif str( $fastq_input.fastq_input_selector ) == "paired_collection":
95 -I "${fastq_input.iset_stats}" 97 ## check that insert statistics is used
96 #end if 98 #if str( $fastq_input.iset_stats ):
97 99 -I '${fastq_input.iset_stats}'
98 "${reference_fasta_filename}" 100 #end if
99 "${fastq_input.fastq_input1.forward}" "${fastq_input.fastq_input1.reverse}" 101
100 #else: 102 '${reference_fasta_filename}'
101 "${reference_fasta_filename}" 103 '${fastq_input.fastq_input1.forward}' '${fastq_input.fastq_input1.reverse}'
102 "${fastq_input.fastq_input1}" 104 #else:
103 #end if 105 '${reference_fasta_filename}'
104 106 '${fastq_input.fastq_input1}'
105 | samtools sort -O bam -o '$bam_output' 107 #end if
108
109 | samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '$bam_output'
106 ]]> 110 ]]>
107 </command> 111 </command>
108 112
109 <inputs> 113 <inputs>
110 <expand macro="reference_source_conditional" /> 114 <expand macro="reference_source_conditional" />
111 <conditional name="fastq_input"> 115 <conditional name="fastq_input">
112 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> 116 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
113 <option value="paired">Paired</option> 117 <option value="paired">Paired</option>
114 <option value="single">Single</option> 118 <option value="single">Single</option>
115 <option value="paired_collection">Paired Collection</option> 119 <option value="paired_collection">Paired Collection</option>
116 <option value="paired_iv">Paired Interleaved</option> 120 <option value="paired_iv">Paired Interleaved</option>
117 </param> 121 </param>
118 <when value="paired"> 122 <when value="paired">
119 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/> 123 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/>
120 <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/> 124 <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/>
121 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details."> 125 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
122 <sanitizer invalid_char=""> 126 <sanitizer invalid_char="">
123 <valid initial="string.digits"><add value=","/> </valid> 127 <valid initial="string.digits"><add value=","/> </valid>
124 </sanitizer> 128 </sanitizer>
125 </param> 129 </param>
126 </when> 130 </when>
127 <when value="single"> 131 <when value="single">
128 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with single reads"/> 132 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with single reads"/>
129 </when> 133 </when>
130 <when value="paired_collection"> 134 <when value="paired_collection">
131 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz,fasta" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> 135 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz,fasta" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
132 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details."> 136 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
133 <sanitizer invalid_char=""> 137 <sanitizer invalid_char="">
134 <valid initial="string.digits"><add value=","/> </valid> 138 <valid initial="string.digits"><add value=","/> </valid>
135 </sanitizer> 139 </sanitizer>
136 </param> 140 </param>
137 </when> 141 </when>
138 <when value="paired_iv"> 142 <when value="paired_iv">
139 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with interleaved reads"/> 143 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with interleaved reads"/>
140 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details."> 144 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
141 <sanitizer invalid_char=""> 145 <sanitizer invalid_char="">
142 <valid initial="string.digits"><add value=","/> </valid> 146 <valid initial="string.digits"><add value=","/> </valid>
143 </sanitizer> 147 </sanitizer>
144 </param> 148 </param>
145 </when> 149 </when>
146 </conditional>
147
148 <expand macro="read_group_conditional" />
149
150 <conditional name="analysis_type">
151 <param name="analysis_type_selector" type="select" label="Select analysis mode">
152 <option value="illumina">1.Simple Illumina mode</option>
153 <option value="pacbio">2.PacBio mode (-x pacbio)</option>
154 <option value="ont2d">3.Nanopore 2D-reads mode (-x ont2d)</option>
155 <option value="intractg">4.Intra-species contigs mode (-x intractg)</option>
156 <option value="full">5.Full list of options</option>
157 </param>
158 <when value="illumina">
159 <!-- do nothing -->
160 </when>
161 <when value="pacbio">
162 <!-- do nothing. all magic happens within <command> tag -->
163 </when>
164 <when value="ont2d">
165 <!-- do nothing. all magic happens within <command> tag -->
166 </when>
167 <when value="intractg">
168 <!-- do nothing. all magic happens within <command> tag -->
169 </when>
170 <when value="full">
171 <conditional name="algorithmic_options">
172 <param name="algorithmic_options_selector" type="select" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options.">
173 <option value="set">Set</option>
174 <option value="do_not_set" selected="True">Do not set</option>
175 </param>
176 <when value="set">
177 <param name="k" type="integer" value="19" label="Minimum seed length" help="-k; default=19"/>
178 <param name="w" type="integer" value="100" label="Band width for banded alignment" help="-w; default=100"/>
179 <param name="d" type="integer" value="100" label="Off-diagonal X-dropoff" help="-d; default=100"/>
180 <param name="r" type="float" value="1.5" label="Look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5; This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy" />
181 <param name="y" type="integer" value="20" label="Seed occurrence for the 3rd round seeding" help="-y; default=20" />
182 <param name="c" type="integer" value="500" label="Skip seeds with more than that many occurrences" help="-c; default=500"/>
183 <param name="D" type="float" value="0.5" label="Drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/>
184 <param name="W" type="integer" value="0" label="Discard a chain if seeded bases shorter than THIS VALUE" help="-W; default=0"/>
185 <param name="m" type="integer" value="50" label="Perform at most this many rounds of mate rescues for each read" help="-m; default=50"/>
186 <param name="S" type="boolean" truevalue="-S" falsevalue="" label="Skip mate rescue" help="-S"/>
187 <param name="P" type="boolean" truevalue="-P" falsevalue="" label="Skip pairing; mate rescue performed unless -S also in use" help="-P"/>
188 <param name="e" type="boolean" truevalue="-e" falsevalue="" label="Discard full-length exact matches" help="-e"/>
189 </when>
190 <when value="do_not_set">
191 <!-- do nothing -->
192 </when>
193 </conditional> 150 </conditional>
194 151
195 <conditional name="scoring_options"> 152 <expand macro="read_group_conditional" />
196 <param name="scoring_options_selector" type="select" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options."> 153
197 <option value="set">Set</option> 154 <conditional name="analysis_type">
198 <option value="do_not_set" selected="True">Do not set</option> 155 <param name="analysis_type_selector" type="select" label="Select analysis mode">
199 </param> 156 <option value="illumina">1.Simple Illumina mode</option>
200 <when value="set"> 157 <option value="pacbio">2.PacBio mode (-x pacbio)</option>
201 <param name="A" type="integer" value="1" label="Score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U unless overridden; default=1"/> 158 <option value="ont2d">3.Nanopore 2D-reads mode (-x ont2d)</option>
202 <param name="B" type="integer" value="4" label="Penalty for a mismatch" help="-B; default=4"/> 159 <option value="intractg">4.Intra-species contigs mode (-x intractg)</option>
203 <param name="O" type="text" value="6,6" label="Gap open penalties for deletions and insertions" help="-O; default=6,6"> 160 <option value="full">5.Full list of options</option>
204 <sanitizer invalid_char="">
205 <valid initial="string.digits"><add value=","/> </valid>
206 </sanitizer>
207 </param> 161 </param>
208 <param name="E" type="text" value="1,1" label="Gap extension penalties; a gap of size k cost &#39;-O + -E*k&#39;. If two numbers are specified, the first is the penalty of extending a deletion and the second for extending an insertion" help="-E; default=1,1"> 162 <when value="illumina">
209 <sanitizer invalid_char=""> 163 <!-- do nothing -->
210 <valid initial="string.digits"><add value=","/> </valid> 164 </when>
211 </sanitizer> 165 <when value="pacbio">
212 </param> 166 <!-- do nothing. all magic happens within <command> tag -->
213 <param name="L" type="text" value="5,5" label="Penalties for 5&#39;-end and 3&#39;-end clipping" help="-L; default=5,5; When performing Smith-Waterman extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best Smith-Waterman score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best Smith-Waterman score; clipping penalty is not deduced"> 167 </when>
214 <sanitizer invalid_char=""> 168 <when value="ont2d">
215 <valid initial="string.digits"><add value=","/> </valid> 169 <!-- do nothing. all magic happens within <command> tag -->
216 </sanitizer> 170 </when>
217 </param> 171 <when value="intractg">
218 <param name="U" type="integer" value="17" label="Penalty for an unpaired read pair" help="-U; default=17"/> 172 <!-- do nothing. all magic happens within <command> tag -->
219 </when> 173 </when>
220 <when value="do_not_set"> 174 <when value="full">
221 <!-- do nothing --> 175 <conditional name="algorithmic_options">
222 </when> 176 <param name="algorithmic_options_selector" type="select" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options.">
177 <option value="set">Set</option>
178 <option value="do_not_set" selected="True">Do not set</option>
179 </param>
180 <when value="set">
181 <param name="k" type="integer" value="19" label="Minimum seed length" help="-k; default=19"/>
182 <param name="w" type="integer" value="100" label="Band width for banded alignment" help="-w; default=100"/>
183 <param name="d" type="integer" value="100" label="Off-diagonal X-dropoff" help="-d; default=100"/>
184 <param name="r" type="float" value="1.5" label="Look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5; This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy" />
185 <param name="y" type="integer" value="20" label="Seed occurrence for the 3rd round seeding" help="-y; default=20" />
186 <param name="c" type="integer" value="500" label="Skip seeds with more than that many occurrences" help="-c; default=500"/>
187 <param name="D" type="float" value="0.5" label="Drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/>
188 <param name="W" type="integer" value="0" label="Discard a chain if seeded bases shorter than THIS VALUE" help="-W; default=0"/>
189 <param name="m" type="integer" value="50" label="Perform at most this many rounds of mate rescues for each read" help="-m; default=50"/>
190 <param name="S" type="boolean" truevalue="-S" falsevalue="" label="Skip mate rescue" help="-S"/>
191 <param name="P" type="boolean" truevalue="-P" falsevalue="" label="Skip pairing; mate rescue performed unless -S also in use" help="-P"/>
192 <param name="e" type="boolean" truevalue="-e" falsevalue="" label="Discard full-length exact matches" help="-e"/>
193 </when>
194 <when value="do_not_set">
195 <!-- do nothing -->
196 </when>
197 </conditional>
198
199 <conditional name="scoring_options">
200 <param name="scoring_options_selector" type="select" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options.">
201 <option value="set">Set</option>
202 <option value="do_not_set" selected="True">Do not set</option>
203 </param>
204 <when value="set">
205 <param name="A" type="integer" value="1" label="Score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U unless overridden; default=1"/>
206 <param name="B" type="integer" value="4" label="Penalty for a mismatch" help="-B; default=4"/>
207 <param name="O" type="text" value="6,6" label="Gap open penalties for deletions and insertions" help="-O; default=6,6">
208 <sanitizer invalid_char="">
209 <valid initial="string.digits"><add value=","/> </valid>
210 </sanitizer>
211 </param>
212 <param name="E" type="text" value="1,1" label="Gap extension penalties; a gap of size k cost &#39;-O + -E*k&#39;. If two numbers are specified, the first is the penalty of extending a deletion and the second for extending an insertion" help="-E; default=1,1">
213 <sanitizer invalid_char="">
214 <valid initial="string.digits"><add value=","/> </valid>
215 </sanitizer>
216 </param>
217 <param name="L" type="text" value="5,5" label="Penalties for 5&#39;-end and 3&#39;-end clipping" help="-L; default=5,5; When performing Smith-Waterman extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best Smith-Waterman score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best Smith-Waterman score; clipping penalty is not deduced">
218 <sanitizer invalid_char="">
219 <valid initial="string.digits"><add value=","/> </valid>
220 </sanitizer>
221 </param>
222 <param name="U" type="integer" value="17" label="Penalty for an unpaired read pair" help="-U; default=17"/>
223 </when>
224 <when value="do_not_set">
225 <!-- do nothing -->
226 </when>
227 </conditional>
228
229 <conditional name="io_options">
230 <param name="io_options_selector" type="select" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options.">
231 <option value="set">Set</option>
232 <option value="do_not_set" selected="True">Do not set</option>
233 </param>
234 <when value="set">
235 <param name="five" argument="-5" type="boolean" truevalue="-5" falsevalue="" label="For split alignment, take alignment with smallest coordinate as primary" help="Useful for HiC data"/>
236 <param argument="-q" type="boolean" truevalue="-q" falsevalue="" label="Don't lower MAPQ for split alignment" help="By default the MAPQ score of a supplementary alignment will be lowered to the primary alignment score."/>
237 <param name="T" type="integer" value="30" label="Minimum score to output" help="-T; default=30"/>
238 <param name="h" type="integer" value="5" label="If there are less than THIS VALUE hits with score &gt;80% of the max score, output them all in the XA tag" help="-h; default=5" />
239 <param name="a" type="boolean" truevalue="-a" falsevalue="" label="Output all alignments for single-ends or unpaired paired-ends" help="-a; These alignments will be flagged as secondary alignments"/>
240 <param name="C" type="boolean" truevalue="-C" falsevalue="" label="Append FASTA/FASTQ comment to BAM output" help="-C"/>
241 <param name="V" type="boolean" truevalue="-V" falsevalue="" label="Output the reference FASTA header in the XR tag" help="-C"/>
242 <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="Use soft clipping for supplementary alignments" help="-Y; By default, BWA-MEM uses soft clipping for the primary alignment and hard clipping for supplementary alignments" />
243 <param name="M" type="boolean" truevalue="-M" falsevalue="" label="Mark shorter split hits of a chimeric alignment in the FLAG field as 'secondary alignment' instead of 'supplementary alignment'" help="-M; For Picard&lt;1.96 compatibility" />
244 </when>
245 <when value="do_not_set">
246 <!-- do nothing -->
247 </when>
248 </conditional>
249 </when>
223 </conditional> 250 </conditional>
224 251 </inputs>
225 <conditional name="io_options"> 252
226 <param name="io_options_selector" type="select" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options."> 253 <outputs>
227 <option value="set">Set</option> 254 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)">
228 <option value="do_not_set" selected="True">Do not set</option> 255 <expand macro="dbKeyActionsBwaMem" />
229 </param> 256 </data>
230 <when value="set"> 257 </outputs>
231 <param name="five" argument="-5" type="boolean" truevalue="-5" falsevalue="" label="For split alignment, take alignment with smallest coordinate as primary" help="Useful for HiC data"/> 258
232 <param argument="-q" type="boolean" truevalue="-q" falsevalue="" label="Don't lower MAPQ for split alignment" help="By default the MAPQ score of a supplementary alignment will be lowered to the primary alignment score."/> 259 <tests>
233 <param name="T" type="integer" value="30" label="Minimum score to output" help="-T; default=30"/> 260 <test>
234 <param name="h" type="integer" value="5" label="If there are less than THIS VALUE hits with score &gt;80% of the max score, output them all in the XA tag" help="-h; default=5" /> 261 <param name="reference_source_selector" value="history" />
235 <param name="a" type="boolean" truevalue="-a" falsevalue="" label="Output all alignments for single-ends or unpaired paired-ends" help="-a; These alignments will be flagged as secondary alignments"/> 262 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
236 <param name="C" type="boolean" truevalue="-C" falsevalue="" label="Append FASTA/FASTQ comment to BAM output" help="-C"/> 263 <param name="fastq_input_selector" value="paired"/>
237 <param name="V" type="boolean" truevalue="-V" falsevalue="" label="Output the reference FASTA header in the XR tag" help="-C"/> 264 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
238 <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="Use soft clipping for supplementary alignments" help="-Y; By default, BWA-MEM uses soft clipping for the primary alignment and hard clipping for supplementary alignments" /> 265 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
239 <param name="M" type="boolean" truevalue="-M" falsevalue="" label="Mark shorter split hits of a chimeric alignment in the FLAG field as 'secondary alignment' instead of 'supplementary alignment'" help="-M; For Picard&lt;1.96 compatibility" /> 266 <param name="analysis_type_selector" value="illumina"/>
240 </when> 267 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" />
241 <when value="do_not_set"> 268 </test>
242 <!-- do nothing --> 269 <test>
243 </when> 270 <param name="reference_source_selector" value="history" />
244 </conditional> 271 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
245 </when> 272 <param name="fastq_input_selector" value="single"/>
246 </conditional> 273 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/>
247 </inputs> 274 <param name="analysis_type_selector" value="illumina"/>
248 275 <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="2" />
249 <outputs> 276 </test>
250 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> 277 <test>
251 <expand macro="dbKeyActionsBwaMem" /> 278 <param name="reference_source_selector" value="history" />
252 </data> 279 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
253 </outputs> 280 <param name="fastq_input_selector" value="paired"/>
254 281 <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/>
255 <tests> 282 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
256 <test> 283 <param name="analysis_type_selector" value="illumina"/>
257 <param name="reference_source_selector" value="history" /> 284 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" />
258 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> 285 </test>
259 <param name="fastq_input_selector" value="paired"/> 286 <test>
260 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> 287 <param name="reference_source_selector" value="history" />
261 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> 288 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
262 <param name="analysis_type_selector" value="illumina"/> 289 <param name="index_a" value="is"/>
263 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> 290 <param name="fastq_input_selector" value="paired"/>
264 </test> 291 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
265 <test> 292 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
266 <param name="reference_source_selector" value="history" /> 293 <param name="rg_selector" value="set"/>
267 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> 294 <param name="ID" value="rg1"/>
268 <param name="fastq_input_selector" value="single"/> 295 <param name="PL" value="CAPILLARY"/>
269 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/> 296 <param name="LB" value="AARDVARK-1" />
270 <param name="analysis_type_selector" value="illumina"/> 297 <param name="analysis_type_selector" value="illumina"/>
271 <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="2" /> 298 <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" />
272 </test> 299 </test>
273 <test> 300 </tests>
274 <param name="reference_source_selector" value="history" /> 301 <help><![CDATA[
275 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
276 <param name="fastq_input_selector" value="paired"/>
277 <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/>
278 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
279 <param name="analysis_type_selector" value="illumina"/>
280 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" />
281 </test>
282 <test>
283 <param name="reference_source_selector" value="history" />
284 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
285 <param name="index_a" value="is"/>
286 <param name="fastq_input_selector" value="paired"/>
287 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
288 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
289 <param name="rg_selector" value="set"/>
290 <param name="ID" value="rg1"/>
291 <param name="PL" value="CAPILLARY"/>
292 <param name="LB" value="AARDVARK-1" />
293 <param name="analysis_type_selector" value="illumina"/>
294 <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" />
295 </test>
296 </tests>
297 <help>
298 **What is does** 302 **What is does**
299 303
300 From http://arxiv.org/abs/1303.3997: 304 From http://arxiv.org/abs/1303.3997:
301 305
302 BWA-MEM is a new alignment algorithm for aligning sequence reads or long query sequences against a large reference genome such as human. 306 BWA-MEM is an alignment algorithm for aligning sequence reads or long query sequences against a large reference genome such as human.
303 It automatically chooses between local and end-to-end alignments, supports paired-end reads and performs chimeric alignment. 307 It automatically chooses between local and end-to-end alignments, supports paired-end reads and performs chimeric alignment.
304 The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases. 308 The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases.
305 For mapping 100bp sequences, BWA-MEM shows better performance than several state-of-art read aligners to date. 309
306 310 This Galaxy tool wraps bwa-mem module of bwa read mapping tool. The Galaxy implementation takes fastq files as input and produces output in BAM format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
307 It is best suited for mapping long (>70 nt) reads against large reference genomes.
308
309 This Galaxy tool wraps bwa-mem module of bwa read mapping tool. Galaxy implementation takes fastq files as input and produces output in BAM (not SAM) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
310 311
311 ----- 312 -----
312 313
313 **Indices: Selecting reference genomes for BWA** 314 **Indices: Selecting reference genomes for BWA**
314 315
315 Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options: 316 Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options:
316 317
317 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against. 318 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against.
318 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa mem`. 319 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa mem`.
319 320
320 If your genome of interest is not listed here you have two choices: 321 If your genome of interest is not listed here you have two choices:
321 322
322 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added 323 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added
323 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option. 324 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option.
324 325
326 327
327 **Galaxy-specific option** 328 **Galaxy-specific option**
328 329
329 Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are: 330 Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are:
330 331
331 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem &lt;reference index&gt; &lt;fastq dataset1&gt; [fastq dataset2] 332 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2]
332 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 &lt;reference index&gt; &lt;PacBio dataset in fastq format&gt; 333 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 <reference index> <PacBio dataset in fastq format>
333 3. *Full list of options*: Allows access to all options through Galaxy interface. 334 3. *Full list of options*: Allows access to all options through Galaxy interface.
334 335
335 ------
336
337 **BWA MEM options**
338
339 Each Galaxy parameter widget corresponds to command line flags listed below:
340
341 Algorithm options::
342
343 -k INT minimum seed length [19]
344 -w INT band width for banded alignment [100]
345 -d INT off-diagonal X-dropoff [100]
346 -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]
347 -y INT find MEMs longer than {-k} * {-r} with size less than INT [0]
348 -c INT skip seeds with more than INT occurrences [500]
349 -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50]
350 -W INT discard a chain if seeded bases shorter than INT [0]
351 -m INT perform at most INT rounds of mate rescues for each read [50]
352 -S skip mate rescue
353 -P skip pairing; mate rescue performed unless -S also in use
354 -e discard full-length exact matches
355
356 Scoring options::
357
358 -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1]
359 -B INT penalty for a mismatch [4]
360 -O INT[,INT] gap open penalties for deletions and insertions [6,6]
361 -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1]
362 -L INT[,INT] penalty for 5'- and 3'-end clipping [5,5]
363 -U INT penalty for an unpaired read pair [17]
364
365 Input/output options::
366
367 -p first query file consists of interleaved paired-end sequences
368 -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null]
369
370 -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [3]
371 -T INT minimum score to output [30]
372 -h INT if there are &lt;INT hits with score &gt;80% of the max score, output all in XA [5]
373 -a output all alignments for SE or unpaired PE
374 -C append FASTA/FASTQ comment to SAM output
375 -V output the reference FASTA header in the XR tag
376 -Y use soft clipping for supplementary alignments
377 -M mark shorter split hits as secondary
378
379 -I FLOAT[,FLOAT[,INT[,INT]]]
380 specify the mean, standard deviation (10% of the mean if absent), max
381 (4 sigma from the mean if absent) and min of the insert size distribution.
382 FR orientation only. [inferred]
383
384 @dataset_collections@
385
386 @RG@ 336 @RG@
387 337
388 @info@ 338 @info@
389 </help> 339 ]]></help>
390 <citations> 340 <citations>
391 <citation type="doi">10.1093/bioinformatics/btp324</citation> 341 <citation type="doi">10.1093/bioinformatics/btp324</citation>
392 <citation type="doi">10.1093/bioinformatics/btp698</citation> 342 <citation type="doi">10.1093/bioinformatics/btp698</citation>
393 <citation type="bibtex">@misc{1303.3997, 343 <citation type="bibtex">@misc{1303.3997,
394 Author = {Heng Li}, 344 Author = {Heng Li},
395 Title = {Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM}, 345 Title = {Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM},
396 Year = {2013}, 346 Year = {2013},
397 Eprint = {arXiv:1303.3997}, 347 Eprint = {arXiv:1303.3997},
398 url = {http://arxiv.org/abs/1303.3997}, 348 url = {http://arxiv.org/abs/1303.3997},
399 }</citation> 349 }</citation>
400 </citations> 350 </citations>
401 </tool> 351 </tool>