comparison bwa-mem.xml @ 5:86c73f0eb389 draft default tip

Uploaded
author devteam
date Thu, 06 Nov 2014 14:52:29 -0500
parents 5e72d136a39e
children
comparison
equal deleted inserted replaced
4:d04dfa7de2dc 5:86c73f0eb389
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="bwa_mem_0_7_10" name="BWA-MEM" version="bwa-0.7.10-r837-dirty_galaxy_0.1"> 2 <tool id="bwa_mem_0_7_10" name="BWA-MEM" version="bwa-0.7.10-r837-dirty_galaxy_0.2">
3
4 <macros>
5 <import>bwa_macros.xml</import>
6 </macros>
7
3 <requirements> 8 <requirements>
4 <requirement type="package" version="0.7.10.039ea20639">bwa</requirement> 9 <requirement type="package" version="0.7.10.039ea20639">bwa</requirement>
5 <requirement type="package" version="1.1">samtools</requirement> 10 <requirement type="package" version="1.1">samtools</requirement>
6 </requirements> 11 </requirements>
7 <description>- map medium and long reads (&gt; 100 bp) against reference genome</description> 12 <description>- map medium and long reads (&gt; 100 bp) against reference genome</description>
54 -t "\${GALAXY_SLOTS:-1}" 59 -t "\${GALAXY_SLOTS:-1}"
55 -v 1 ## Verbosity is set to 1 (errors only) 60 -v 1 ## Verbosity is set to 1 (errors only)
56 61
57 #if str( $fastq_input.fastq_input_selector ) == "paired_iv": ## For interleaved fastq files set -p option 62 #if str( $fastq_input.fastq_input_selector ) == "paired_iv": ## For interleaved fastq files set -p option
58 -p 63 -p
59 #if str( $fastq_input.iv_stats.iv_stats_selector ) == "True": ## check that insert statistics is used 64 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used
60 -I "${fastq_input.iv_stats.iset_stats}" 65 -I "${fastq_input.iset_stats}"
61 #end if 66 #end if
62 #end if 67 #end if
63 68
64 #if str( $analysis_type.analysis_type_selector ) == "pacbio": 69 #if str( $analysis_type.analysis_type_selector ) == "pacbio":
65 -x 70 -x
116 -R "@RG\tID:$rg.ID\tSM:$rg.SM" 121 -R "@RG\tID:$rg.ID\tSM:$rg.SM"
117 #end if 122 #end if
118 123
119 #if str( $fastq_input.fastq_input_selector ) == "paired": 124 #if str( $fastq_input.fastq_input_selector ) == "paired":
120 125
121 #if str( $fastq_input.paired_stats.paired_stats_selector ) == "True": ## check that insert statistics is used 126 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used
122 -I "${fastq_input.paired_stats.iset_stats}" 127 -I "${fastq_input.iset_stats}"
123 #end if 128 #end if
124 129
125 "${reference_fasta_filename}" 130 "${reference_fasta_filename}"
126 131
127 "${fastq_input.fastq_input1}" "${fastq_input.fastq_input2}" 132 "${fastq_input.fastq_input1}" "${fastq_input.fastq_input2}"
128 133
134 #elif str( $fastq_input.fastq_input_selector ) == "paired_collection":
135
136 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used
137 -I "${fastq_input.iset_stats}"
138 #end if
139
140
141 "${reference_fasta_filename}"
142
143 "${fastq_input.fastq_input1.forward}" "${fastq_input.fastq_input1.reverse}"
144
129 #else: 145 #else:
130 146
147
131 "${reference_fasta_filename}" 148 "${reference_fasta_filename}"
132 149
133 "${fastq_input.fastq_input1}" 150 "${fastq_input.fastq_input1}"
134 151
135 #end if 152 #end if
136 153
137 | samtools view -Sb - > $bam_output 154 | samtools view -Sb - > temporary_bam_file.bam &amp;&amp;
155
156 samtools sort -f temporary_bam_file.bam ${bam_output}
138 157
139 </command> 158 </command>
140 159
141 <inputs> 160 <inputs>
142 161
153 </options> 172 </options>
154 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> 173 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
155 </param> 174 </param>
156 </when> 175 </when>
157 <when value="history"> 176 <when value="history">
158 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> 177 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
159 </when> 178 </when>
160 </conditional> 179 </conditional>
161 <conditional name="fastq_input"> 180 <conditional name="fastq_input">
162 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> 181 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
163 <option value="paired">Paired</option> 182 <option value="paired">Paired</option>
164 <option value="single">Single</option> 183 <option value="single">Single</option>
184 <option value="paired_collection">Paired Collection</option>
165 <option value="paired_iv">Paired Interleaved</option> 185 <option value="paired_iv">Paired Interleaved</option>
166 </param> 186 </param>
167 <when value="paired"> 187 <when value="paired">
168 <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/> 188 <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
169 <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/> 189 <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
170 190 <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
171 <!-- PE stat selection block 1: If you make any changes in this conditional block, copy them to PE stat selection block 2 below as well --> 191 <sanitizer invalid_char="">
172 192 <valid initial="string.digits"><add value=","/> </valid>
173 <conditional name="paired_stats"> 193 </sanitizer>
174 <param name="paired_stats_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Specify insert size statistics?" help="-I; if you choose to not specify, it will be inferred from the data"/> 194 </param>
175 <when value="set">
176
177 <param name="iset_stats" type="text" value="250" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths in the form mean,sd,min,max" help="-I; only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
178 <sanitizer invalid_char="">
179 <valid initial="string.digits"><add value=","/> </valid>
180 </sanitizer>
181 </param>
182
183 </when>
184 <when value="do_not_set">
185 <!-- do nothing -->
186 </when>
187 </conditional>
188
189 <!-- end of PE stat selection block 1 -->
190
191 </when> 195 </when>
192 <when value="single"> 196 <when value="single">
193 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/> 197 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/>
194 </when> 198 </when>
199 <when value="paired_collection">
200 <param name="fastq_input1" format="fastqsanger" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
201 <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
202 <sanitizer invalid_char="">
203 <valid initial="string.digits"><add value=","/> </valid>
204 </sanitizer>
205 </param>
206 </when>
195 <when value="paired_iv"> 207 <when value="paired_iv">
196 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with interleaved reads"/> 208 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with interleaved reads"/>
197 209 <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
198 <!-- PE stat selection block 2: If you make any changes in this conditional block, copy them to PE stat selection block 1 above as well --> 210 <sanitizer invalid_char="">
199 211 <valid initial="string.digits"><add value=","/> </valid>
200 <conditional name="iv_stats"> 212 </sanitizer>
201 <param name="iv_stats_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Specify insert size statistics?" help="-I; if you choose to not specify, it will be inferred from the data"/> 213 </param>
202 <when value="set">
203
204 <param name="iset_stats" type="text" value="250" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths in the form mean,sd,min,max" help="-I; only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
205 <sanitizer invalid_char="">
206 <valid initial="string.digits"><add value=","/> </valid>
207 </sanitizer>
208 </param>
209
210 </when>
211 <when value="do_not_set">
212 <!-- do nothing -->
213 </when>
214 </conditional>
215
216 <!-- end of PE stat selection block 2 -->
217
218 </when> 214 </when>
219 </conditional> 215 </conditional>
216
220 217
221 <conditional name="rg"> 218 <conditional name="rg">
222 <param name="rg_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Specify readgroup information?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details"/> 219 <param name="rg_selector" type="select" label="Set read groups information?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details">
220 <option value="set">Set</option>
221 <option value="do_not_set" selected="True">Do not set</option>
222 </param>
223 <when value="set"> 223 <when value="set">
224 <param name="ID" type="text" value="readgroup1" size="20" label="Specify readgroup ID" help="This value must be unique among multiple samples in your experiment"> 224 <param name="ID" type="text" value="readgroup1" size="20" label="Specify readgroup ID" help="This value must be unique among multiple samples in your experiment">
225 <sanitizer invalid_char=""> 225 <sanitizer invalid_char="">
226 <valid initial="string.printable"/> 226 <valid initial="string.printable"/>
227 </sanitizer> 227 </sanitizer>
250 <when value="pacbio"> 250 <when value="pacbio">
251 <!-- do nothing. all magic happens within <command> tag --> 251 <!-- do nothing. all magic happens within <command> tag -->
252 </when> 252 </when>
253 <when value="full"> 253 <when value="full">
254 <conditional name="algorithmic_options"> 254 <conditional name="algorithmic_options">
255 <param name="algorithmic_options_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options." /> 255 <param name="algorithmic_options_selector" type="select" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options.">
256 <option value="set">Set</option>
257 <option value="do_not_set" selected="True">Do not set</option>
258 </param>
256 <when value="set"> 259 <when value="set">
257 <param name="k" type="integer" value="19" label="minimum seed length" help="-k; default=19"/> 260 <param name="k" type="integer" value="19" label="minimum seed length" help="-k; default=19"/>
258 <param name="w" type="integer" value="100" label="band width for banded alignment" help="-w; default=100"/> 261 <param name="w" type="integer" value="100" label="band width for banded alignment" help="-w; default=100"/>
259 <param name="d" type="integer" value="100" label="off-diagonal X-dropoff" help="-d; default=100"/> 262 <param name="d" type="integer" value="100" label="off-diagonal X-dropoff" help="-d; default=100"/>
260 <param name="r" type="float" value="1.5" label="look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5"/> 263 <param name="r" type="float" value="1.5" label="look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5"/>
269 </when> 272 </when>
270 <when value="do_not_set"> 273 <when value="do_not_set">
271 <!-- do nothing --> 274 <!-- do nothing -->
272 </when> 275 </when>
273 </conditional> 276 </conditional>
277
274 <conditional name="scoring_options"> 278 <conditional name="scoring_options">
275 <param name="scoring_options_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options." /> 279 <param name="scoring_options_selector" type="select" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options.">
280 <option value="set">Set</option>
281 <option value="do_not_set" selected="True">Do not set</option>
282 </param>
276 <when value="set"> 283 <when value="set">
277 <param name="A" type="integer" value="1" label="score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U; default=1"/> 284 <param name="A" type="integer" value="1" label="score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U; default=1"/>
278 <param name="B" type="integer" value="4" label="penalty for mismatch" help="-B; default=4"/> 285 <param name="B" type="integer" value="4" label="penalty for mismatch" help="-B; default=4"/>
279 <param name="O" type="text" value="6,6" label="gap open penalty for deletions and insertions" help="-O; default=6,6"> 286 <param name="O" type="text" value="6,6" label="gap open penalty for deletions and insertions" help="-O; default=6,6">
280 <sanitizer invalid_char=""> 287 <sanitizer invalid_char="">
295 </when> 302 </when>
296 <when value="do_not_set"> 303 <when value="do_not_set">
297 <!-- do nothing --> 304 <!-- do nothing -->
298 </when> 305 </when>
299 </conditional> 306 </conditional>
307
300 <conditional name="io_options"> 308 <conditional name="io_options">
301 <param name="io_options_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options." /> 309 <param name="io_options_selector" type="select" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options.">
310 <option value="set">Set</option>
311 <option value="do_not_set" selected="True">Do not set</option>
312 </param>
302 <when value="set"> 313 <when value="set">
303 <param name="T" type="integer" value="30" label="minimum score to output" help="-T; default=30"/> 314 <param name="T" type="integer" value="30" label="minimum score to output" help="-T; default=30"/>
304 <param name="h" type="integer" value="5" label="if there are this many hits with score >80% of the max score, output all in XA tag" help="-h; default=5"/> 315 <param name="h" type="integer" value="5" label="if there are this many hits with score >80% of the max score, output all in XA tag" help="-h; default=5"/>
305 <param name="a" type="boolean" truevalue="-a" falsevalue="" label="output all alignments for single-ends or unpaired paired-ends" help="-a"/> 316 <param name="a" type="boolean" truevalue="-a" falsevalue="" label="output all alignments for single-ends or unpaired paired-ends" help="-a"/>
306 <param name="C" type="boolean" truevalue="-C" falsevalue="" label="append FASTA/FASTQ comment to BAM output" help="-C"/> 317 <param name="C" type="boolean" truevalue="-C" falsevalue="" label="append FASTA/FASTQ comment to BAM output" help="-C"/>
416 -I FLOAT[,FLOAT[,INT[,INT]]] 427 -I FLOAT[,FLOAT[,INT[,INT]]]
417 specify the mean, standard deviation (10% of the mean if absent), max 428 specify the mean, standard deviation (10% of the mean if absent), max
418 (4 sigma from the mean if absent) and min of the insert size distribution. 429 (4 sigma from the mean if absent) and min of the insert size distribution.
419 FR orientation only. [inferred] 430 FR orientation only. [inferred]
420 431
421 ------ 432
422 433 @dataset_collections@
423 .. class:: warningmark 434
424 435 @RG@
425 **An important note on Read Groups** 436
426 437 @info@
427 One of the recommended best practices in NGS analysis is adding read group information to BAM files. You can do thid directly in BWA MEM interface using the 438
428 **Specify readgroup information?** widget. If you are not familiar with readgroups you shold know that this is effectively a way to tag reads with an additional ID.
429 This allows you to combine BAM files from, for example, multiple BWA MEM runs into a single dataset. This significantly simplifies downstream processing as
430 instead of dealing with multiple datasets you only have to handle only one. This is possible because the readgroup information allows you to identify
431 data from different experiments even if they are combined in one file. Many downstream analysis tools such as varinat callers (e.g., FreeBayes or Naive Varinat Caller
432 present in Galaxy) are aware of readgtroups and will automatically generate calls for each individual sample even if they are combined within a single file.
433
434 -----
435
436 .. class:: infomark
437
438 **More info**
439
440 To obtain more information about BWA MEM and ask questions use these resources:
441
442 1. https://biostar.usegalaxy.org/
443 2. https://www.biostars.org/
444 3. https://github.com/lh3/bwa
445 4. http://bio-bwa.sourceforge.net/
446 439
447 440
448 </help> 441 </help>
449 <citations> 442 <citations>
450 <citation type="doi">10.1093/bioinformatics/btp324</citation> 443 <citation type="doi">10.1093/bioinformatics/btp324</citation>