Mercurial > repos > devteam > bwa
annotate bwa-mem.xml @ 9:7bee165c3a44 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bwa commit b3b7a9898eb2b5d12745ef96250b04955bd1e209
author | devteam |
---|---|
date | Thu, 03 Dec 2015 17:38:14 -0500 |
parents | d8c9597bfb09 |
children | 6069ffa8b240 |
rev | line source |
---|---|
0 | 1 <?xml version="1.0"?> |
9
7bee165c3a44
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bwa commit b3b7a9898eb2b5d12745ef96250b04955bd1e209
devteam
parents:
7
diff
changeset
|
2 <tool id="bwa_mem" name="Map with BWA-MEM" version="0.4.2"> |
2 | 3 <description>- map medium and long reads (> 100 bp) against reference genome</description> |
0 | 4 <macros> |
5
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
5 <import>read_group_macros.xml</import> |
0 | 6 <import>bwa_macros.xml</import> |
7 </macros> | |
7
d8c9597bfb09
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bwa commit ddb8bdb9d62399f086b06b3469450d0aad2113bd
devteam
parents:
6
diff
changeset
|
8 <expand macro="requirements" /> |
6
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
9 <expand macro="stdio" /> |
0 | 10 <command> |
11 #set $reference_fasta_filename = "localref.fa" | |
2 | 12 |
0 | 13 #if str( $reference_source.reference_source_selector ) == "history": |
14 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && | |
2 | 15 |
0 | 16 ## The following shell commands decide with of the BWA indexing algorithms (IS or BWTSW) will be run |
17 ## depending ob the size of the input FASTA dataset | |
18 ( | |
19 size=`stat -c %s "${reference_fasta_filename}" 2>/dev/null`; ## Linux | |
2 | 20 if [ $? -eq 0 ]; |
0 | 21 then |
2 | 22 if [ "\$size" -lt 2000000000 ]; |
0 | 23 then |
24 bwa index -a is "${reference_fasta_filename}"; | |
25 echo "Generating BWA index with is algorithm"; | |
26 else | |
27 bwa index -a bwtsw "${reference_fasta_filename}"; | |
28 echo "Generating BWA index with bwtsw algorithm"; | |
29 fi; | |
30 fi; | |
31 | |
2 | 32 eval \$(stat -s "${reference_fasta_filename}" 2>/dev/null); ## OSX |
33 if [ -n "\$st_size" ]; | |
0 | 34 then |
2 | 35 if [ "\$st_size" -lt 2000000000 ]; |
0 | 36 then |
37 bwa index -a is "${reference_fasta_filename}"; | |
38 echo "Generating BWA index with is algorithm"; | |
39 else | |
40 bwa index -a bwtsw "${reference_fasta_filename}"; | |
41 echo "Generating BWA index with bwtsw algorithm"; | |
42 fi; | |
43 fi; | |
44 ) && | |
2 | 45 |
0 | 46 #else: |
47 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) | |
48 #end if | |
2 | 49 |
0 | 50 ## Begin BWA-MEM command line |
2 | 51 |
0 | 52 bwa mem |
53 -t "\${GALAXY_SLOTS:-1}" | |
2 | 54 -v 1 ## Verbosity is set to 1 (errors only) |
55 | |
0 | 56 #if str( $fastq_input.fastq_input_selector ) == "paired_iv": ## For interleaved fastq files set -p option |
57 -p | |
58 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used | |
59 -I "${fastq_input.iset_stats}" | |
60 #end if | |
61 #end if | |
2 | 62 |
0 | 63 #if str( $analysis_type.analysis_type_selector ) == "pacbio": |
2 | 64 -x pacbio |
0 | 65 #elif str( $analysis_type.analysis_type_selector ) == "full": |
2 | 66 #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "set": ## Algorithmic options |
0 | 67 -k "${analysis_type.algorithmic_options.k}" |
68 -w "${analysis_type.algorithmic_options.w}" | |
69 -d "${analysis_type.algorithmic_options.d}" | |
70 -r "${analysis_type.algorithmic_options.r}" | |
71 -y "${analysis_type.algorithmic_options.y}" | |
72 -c "${analysis_type.algorithmic_options.c}" | |
73 -D "${analysis_type.algorithmic_options.D}" | |
74 -W "${analysis_type.algorithmic_options.W}" | |
75 -m "${analysis_type.algorithmic_options.m}" | |
76 ${analysis_type.algorithmic_options.S} | |
77 ${analysis_type.algorithmic_options.P} | |
78 ${analysis_type.algorithmic_options.e} | |
79 #end if | |
2 | 80 |
81 #if str( $analysis_type.scoring_options.scoring_options_selector ) == "set": ## Scoring options | |
0 | 82 -A "${analysis_type.scoring_options.A}" |
83 -B "${analysis_type.scoring_options.B}" | |
84 -O "${analysis_type.scoring_options.O}" | |
85 -E "${analysis_type.scoring_options.E}" | |
86 -L "${analysis_type.scoring_options.L}" | |
87 -U "${analysis_type.scoring_options.U}" | |
88 #end if | |
2 | 89 |
90 #if str( $analysis_type.io_options.io_options_selector ) == "set": ## IO options | |
0 | 91 -T "${analysis_type.io_options.T}" |
92 -h "${analysis_type.io_options.h}" | |
93 ${analysis_type.io_options.a} | |
94 ${analysis_type.io_options.C} | |
95 ${analysis_type.io_options.V} | |
96 ${analysis_type.io_options.Y} | |
97 ${analysis_type.io_options.M} | |
98 #end if | |
2 | 99 |
0 | 100 #end if |
2 | 101 |
5
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
102 ## Handle read group options... |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
103 @define_read_group_helpers@ |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
104 #if str( $fastq_input.fastq_input_selector ) == "paired": |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
105 #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1, $fastq_input.fastq_input2) |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
106 #else: |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
107 #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1) |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
108 #end if |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
109 @set_use_rg_var@ |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
110 @set_read_group_vars@ |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
111 #if $use_rg |
2 | 112 @set_rg_string@ |
113 -R '$rg_string' | |
114 #end if | |
115 | |
0 | 116 #if str( $fastq_input.fastq_input_selector ) == "paired": |
117 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used | |
118 -I "${fastq_input.iset_stats}" | |
119 #end if | |
120 | |
121 "${reference_fasta_filename}" | |
2 | 122 "${fastq_input.fastq_input1}" "${fastq_input.fastq_input2}" |
123 #elif str( $fastq_input.fastq_input_selector ) == "paired_collection": | |
124 #if str( $fastq_input.iset_stats ): ## check that insert statistics is used | |
125 -I "${fastq_input.iset_stats}" | |
126 #end if | |
127 | |
0 | 128 "${reference_fasta_filename}" |
2 | 129 "${fastq_input.fastq_input1.forward}" "${fastq_input.fastq_input1.reverse}" |
130 #else: | |
131 "${reference_fasta_filename}" | |
0 | 132 "${fastq_input.fastq_input1}" |
133 #end if | |
2 | 134 |
0 | 135 | samtools view -Sb - > temporary_bam_file.bam && |
2 | 136 |
0 | 137 samtools sort -f temporary_bam_file.bam ${bam_output} |
138 </command> | |
2 | 139 |
0 | 140 <inputs> |
141 | |
142 <conditional name="reference_source"> | |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
143 <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below"> |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
144 <option value="cached">Use a built-in genome index</option> |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
145 <option value="history">Use a genome from history and build index</option> |
0 | 146 </param> |
147 <when value="cached"> | |
148 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> | |
149 <options from_data_table="bwa_mem_indexes"> | |
150 <filter type="sort_by" column="2" /> | |
151 <validator type="no_options" message="No indexes are available" /> | |
152 </options> | |
153 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
154 </param> | |
155 </when> | |
2 | 156 <when value="history"> |
0 | 157 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> |
158 </when> | |
159 </conditional> | |
160 <conditional name="fastq_input"> | |
161 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> | |
162 <option value="paired">Paired</option> | |
163 <option value="single">Single</option> | |
164 <option value="paired_collection">Paired Collection</option> | |
165 <option value="paired_iv">Paired Interleaved</option> | |
166 </param> | |
167 <when value="paired"> | |
168 <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/> | |
169 <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/> | |
7
d8c9597bfb09
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bwa commit ddb8bdb9d62399f086b06b3469450d0aad2113bd
devteam
parents:
6
diff
changeset
|
170 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> |
0 | 171 <sanitizer invalid_char=""> |
172 <valid initial="string.digits"><add value=","/> </valid> | |
173 </sanitizer> | |
174 </param> | |
2 | 175 </when> |
0 | 176 <when value="single"> |
177 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/> | |
178 </when> | |
179 <when value="paired_collection"> | |
180 <param name="fastq_input1" format="fastqsanger" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> | |
7
d8c9597bfb09
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bwa commit ddb8bdb9d62399f086b06b3469450d0aad2113bd
devteam
parents:
6
diff
changeset
|
181 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> |
0 | 182 <sanitizer invalid_char=""> |
183 <valid initial="string.digits"><add value=","/> </valid> | |
184 </sanitizer> | |
185 </param> | |
186 </when> | |
187 <when value="paired_iv"> | |
2 | 188 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with interleaved reads"/> |
7
d8c9597bfb09
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bwa commit ddb8bdb9d62399f086b06b3469450d0aad2113bd
devteam
parents:
6
diff
changeset
|
189 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> |
0 | 190 <sanitizer invalid_char=""> |
191 <valid initial="string.digits"><add value=","/> </valid> | |
192 </sanitizer> | |
2 | 193 </param> |
0 | 194 </when> |
195 </conditional> | |
2 | 196 |
5
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
197 <expand macro="read_group_conditional" /> |
2 | 198 |
0 | 199 <conditional name="analysis_type"> |
200 <param name="analysis_type_selector" type="select" label="Select analysis mode"> | |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
201 <option value="illumina">1.Simple Illumina mode</option> |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
202 <option value="pacbio">2.PacBio mode (-x pacbio)</option> |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
203 <option value="full">3.Full list of options</option> |
0 | 204 </param> |
205 <when value="illumina"> | |
206 <!-- do nothing --> | |
207 </when> | |
208 <when value="pacbio"> | |
209 <!-- do nothing. all magic happens within <command> tag --> | |
210 </when> | |
211 <when value="full"> | |
212 <conditional name="algorithmic_options"> | |
213 <param name="algorithmic_options_selector" type="select" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options."> | |
214 <option value="set">Set</option> | |
215 <option value="do_not_set" selected="True">Do not set</option> | |
216 </param> | |
217 <when value="set"> | |
2 | 218 <param name="k" type="integer" value="19" label="Minimum seed length" help="-k; default=19"/> |
219 <param name="w" type="integer" value="100" label="Band width for banded alignment" help="-w; default=100"/> | |
220 <param name="d" type="integer" value="100" label="Off-diagonal X-dropoff" help="-d; default=100"/> | |
221 <param name="r" type="float" value="1.5" label="Look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5; This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy" /> | |
222 <param name="y" type="integer" value="20" label="Seed occurrence for the 3rd round seeding" help="-y; default=20" /> | |
223 <param name="c" type="integer" value="500" label="Skip seeds with more than that many occurrences" help="-c; default=500"/> | |
224 <param name="D" type="float" value="0.5" label="Drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/> | |
225 <param name="W" type="integer" value="0" label="Discard a chain if seeded bases shorter than THIS VALUE" help="-W; default=0"/> | |
226 <param name="m" type="integer" value="50" label="Perform at most this many rounds of mate rescues for each read" help="-m; default=50"/> | |
227 <param name="S" type="boolean" truevalue="-S" falsevalue="" label="Skip mate rescue" help="-S"/> | |
228 <param name="P" type="boolean" truevalue="-P" falsevalue="" label="Skip pairing; mate rescue performed unless -S also in use" help="-P"/> | |
229 <param name="e" type="boolean" truevalue="-e" falsevalue="" label="Discard full-length exact matches" help="-e"/> | |
0 | 230 </when> |
231 <when value="do_not_set"> | |
232 <!-- do nothing --> | |
233 </when> | |
234 </conditional> | |
2 | 235 |
0 | 236 <conditional name="scoring_options"> |
237 <param name="scoring_options_selector" type="select" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options."> | |
238 <option value="set">Set</option> | |
239 <option value="do_not_set" selected="True">Do not set</option> | |
240 </param> | |
241 <when value="set"> | |
2 | 242 <param name="A" type="integer" value="1" label="Score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U unless overridden; default=1"/> |
243 <param name="B" type="integer" value="4" label="Penalty for a mismatch" help="-B; default=4"/> | |
244 <param name="O" type="text" value="6,6" label="Gap open penalties for deletions and insertions" help="-O; default=6,6"> | |
0 | 245 <sanitizer invalid_char=""> |
246 <valid initial="string.digits"><add value=","/> </valid> | |
247 </sanitizer> | |
248 </param> | |
2 | 249 <param name="E" type="text" value="1,1" label="Gap extension penalties; a gap of size k cost '-O + -E*k'. If two numbers are specified, the first is the penalty of extending a deletion and the second for extending an insertion" help="-E; default=1,1"> |
0 | 250 <sanitizer invalid_char=""> |
251 <valid initial="string.digits"><add value=","/> </valid> | |
252 </sanitizer> | |
253 </param> | |
2 | 254 <param name="L" type="text" value="5,5" label="Penalties for 5'-end and 3'-end clipping" help="-L; default=5,5; When performing Smith-Waterman extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best Smith-Waterman score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best Smith-Waterman score; clipping penalty is not deduced"> |
0 | 255 <sanitizer invalid_char=""> |
256 <valid initial="string.digits"><add value=","/> </valid> | |
257 </sanitizer> | |
258 </param> | |
2 | 259 <param name="U" type="integer" value="17" label="Penalty for an unpaired read pair" help="-U; default=17"/> |
0 | 260 </when> |
261 <when value="do_not_set"> | |
262 <!-- do nothing --> | |
263 </when> | |
264 </conditional> | |
2 | 265 |
0 | 266 <conditional name="io_options"> |
267 <param name="io_options_selector" type="select" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options."> | |
268 <option value="set">Set</option> | |
269 <option value="do_not_set" selected="True">Do not set</option> | |
270 </param> | |
271 <when value="set"> | |
2 | 272 <param name="T" type="integer" value="30" label="Minimum score to output" help="-T; default=30"/> |
273 <param name="h" type="integer" value="5" label="If there are less than THIS VALUE hits with score >80% of the max score, output them all in the XA tag" help="-h; default=5" /> | |
274 <param name="a" type="boolean" truevalue="-a" falsevalue="" label="Output all alignments for single-ends or unpaired paired-ends" help="-a; These alignments will be flagged as secondary alignments"/> | |
275 <param name="C" type="boolean" truevalue="-C" falsevalue="" label="Append FASTA/FASTQ comment to BAM output" help="-C"/> | |
276 <param name="V" type="boolean" truevalue="-V" falsevalue="" label="Output the reference FASTA header in the XR tag" help="-C"/> | |
277 <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="Use soft clipping for supplementary alignments" help="-Y; By default, BWA-MEM uses soft clipping for the primary alignment and hard clipping for supplementary alignments" /> | |
278 <param name="M" type="boolean" truevalue="-M" falsevalue="" label="Mark shorter split hits of a chimeric alignment in the FLAG field as 'secondary alignment' instead of 'supplementary alignment'" help="-M; For Picard<1.96 compatibility" /> | |
0 | 279 </when> |
280 <when value="do_not_set"> | |
281 <!-- do nothing --> | |
282 </when> | |
283 </conditional> | |
284 </when> | |
285 </conditional> | |
286 </inputs> | |
2 | 287 |
0 | 288 <outputs> |
6
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
289 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> |
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
290 <expand macro="dbKeyActionsBwaMem" /> |
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
291 </data> |
0 | 292 </outputs> |
2 | 293 |
0 | 294 <tests> |
295 <test> | |
296 <param name="reference_source_selector" value="history" /> | |
297 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> | |
298 <param name="fastq_input_selector" value="paired"/> | |
299 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> | |
300 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> | |
301 <param name="analysis_type_selector" value="illumina"/> | |
302 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> | |
303 </test> | |
2 | 304 <test> |
305 <param name="reference_source_selector" value="history" /> | |
306 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> | |
307 <param name="fastq_input_selector" value="paired"/> | |
308 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> | |
309 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> | |
310 <param name="rg_selector" value="set"/> | |
311 <param name="ID" value="rg1"/> | |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
312 <param name="PL" value="CAPILLARY"/> |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
313 <param name="LB" value="AARDVARK-1" /> |
2 | 314 <param name="analysis_type_selector" value="illumina"/> |
315 <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" /> | |
316 </test> | |
0 | 317 </tests> |
318 <help> | |
319 **What is does** | |
320 | |
321 From http://arxiv.org/abs/1303.3997: | |
322 | |
323 BWA-MEM is a new alignment algorithm for aligning sequence reads or long query sequences against a large reference genome such as human. | |
324 It automatically chooses between local and end-to-end alignments, supports paired-end reads and performs chimeric alignment. | |
325 The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases. | |
326 For mapping 100bp sequences, BWA-MEM shows better performance than several state-of-art read aligners to date. | |
327 | |
328 It is best suited for mapping long (>70 nt) reads against large reference genomes. | |
329 | |
330 This Galaxy tool wraps bwa-mem module of bwa read mapping tool. Galaxy implementation takes fastq files as input and produces output in BAM (not SAM) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). | |
331 | |
332 ----- | |
333 | |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
334 **Indices: Selecting reference genomes for BWA** |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
335 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
336 Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options: |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
337 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
338 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against. |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
339 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa mem`. |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
340 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
341 If your genome of interest is not listed here you have two choices: |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
342 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
343 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
344 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option. |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
345 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
346 ----- |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
2
diff
changeset
|
347 |
0 | 348 **Galaxy-specific option** |
349 | |
350 Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are: | |
351 | |
352 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] | |
353 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 <reference index> <PacBio dataset in fastq format> | |
354 3. *Full list of options*: Allows access to all options through Galaxy interface. | |
2 | 355 |
0 | 356 ------ |
357 | |
358 **BWA MEM options** | |
359 | |
360 Each Galaxy parameter widget corresponds to command line flags listed below: | |
361 | |
362 Algorithm options:: | |
363 | |
364 -k INT minimum seed length [19] | |
365 -w INT band width for banded alignment [100] | |
366 -d INT off-diagonal X-dropoff [100] | |
367 -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5] | |
368 -y INT find MEMs longer than {-k} * {-r} with size less than INT [0] | |
369 -c INT skip seeds with more than INT occurrences [500] | |
370 -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50] | |
371 -W INT discard a chain if seeded bases shorter than INT [0] | |
372 -m INT perform at most INT rounds of mate rescues for each read [50] | |
373 -S skip mate rescue | |
374 -P skip pairing; mate rescue performed unless -S also in use | |
375 -e discard full-length exact matches | |
376 | |
377 Scoring options:: | |
378 | |
379 -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1] | |
380 -B INT penalty for a mismatch [4] | |
381 -O INT[,INT] gap open penalties for deletions and insertions [6,6] | |
382 -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1] | |
383 -L INT[,INT] penalty for 5'- and 3'-end clipping [5,5] | |
384 -U INT penalty for an unpaired read pair [17] | |
385 | |
386 Input/output options:: | |
387 | |
388 -p first query file consists of interleaved paired-end sequences | |
389 -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null] | |
390 | |
391 -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [3] | |
392 -T INT minimum score to output [30] | |
393 -h INT if there are <INT hits with score >80% of the max score, output all in XA [5] | |
394 -a output all alignments for SE or unpaired PE | |
395 -C append FASTA/FASTQ comment to SAM output | |
396 -V output the reference FASTA header in the XR tag | |
397 -Y use soft clipping for supplementary alignments | |
398 -M mark shorter split hits as secondary | |
399 | |
400 -I FLOAT[,FLOAT[,INT[,INT]]] | |
401 specify the mean, standard deviation (10% of the mean if absent), max | |
402 (4 sigma from the mean if absent) and min of the insert size distribution. | |
403 FR orientation only. [inferred] | |
404 | |
405 @dataset_collections@ | |
406 | |
407 @RG@ | |
408 | |
409 @info@ | |
410 </help> | |
411 <citations> | |
412 <citation type="doi">10.1093/bioinformatics/btp324</citation> | |
413 <citation type="doi">10.1093/bioinformatics/btp698</citation> | |
414 <citation type="bibtex">@misc{1303.3997, | |
415 Author = {Heng Li}, | |
416 Title = {Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM}, | |
417 Year = {2013}, | |
418 Eprint = {arXiv:1303.3997}, | |
419 url = {http://arxiv.org/abs/1303.3997}, | |
420 }</citation> | |
421 </citations> | |
422 </tool> |