comparison bwa_long/bwa_wrapper.xml @ 0:fb4844b6a98e default tip

Migrated tool version 1.0.3 from old tool shed archive to new tool shed repository
author juanperin
date Tue, 07 Jun 2011 17:28:32 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fb4844b6a98e
1 <tool id="bwa_wrapper" name="Map with BWA" version="1.0.3">
2 <description></description>
3 <command interpreter="python">bwa_wrapper.py
4 --threads="4"
5 #if $genomeSource.refGenomeSource == "history":
6 --ref=$genomeSource.ownFile
7 #else:
8 --ref=$genomeSource.indices.value
9 #end if
10 --fastq=$paired.input1
11 #if $paired.sPaired == "paired":
12 --rfastq=$paired.input2
13 #else:
14 --rfastq="None"
15 #end if
16 --output=$output --genAlignType=$paired.sPaired --params=$params.source_select --fileSource=$genomeSource.refGenomeSource
17 #if $params.source_select == "pre_set":
18 --maxEditDist="None" --fracMissingAligns="None" --maxGapOpens="None" --maxGapExtens="None" --disallowLongDel="None" --disallowIndel="None" --seed="None" --maxEditDistSeed="None" --mismatchPenalty="None" --gapOpenPenalty="None" --gapExtensPenalty="None" --suboptAlign="None" --noIterSearch="None" --outputTopN="None" --maxInsertSize="None" --maxOccurPairing="None"
19 #else:
20 --maxEditDist=$params.maxEditDist --fracMissingAligns=$params.fracMissingAligns --maxGapOpens=$params.maxGapOpens --maxGapExtens=$params.maxGapExtens --disallowLongDel=$params.disallowLongDel --disallowIndel=$params.disallowIndel --seed=$params.seed --maxEditDistSeed=$params.maxEditDistSeed --mismatchPenalty=$params.mismatchPenalty --gapOpenPenalty=$params.gapOpenPenalty --gapExtensPenalty=$params.gapExtensPenalty --suboptAlign=$params.suboptAlign --noIterSearch=$params.noIterSearch --outputTopN=$params.outputTopN --maxInsertSize=$params.maxInsertSize --maxOccurPairing=$params.maxOccurPairing
21 #end if
22 #if $genomeSource.refGenomeSource == "history":
23 --dbkey=$dbkey
24 #else:
25 --dbkey="None"
26 #end if
27 --suppressHeader=$suppressHeader
28 </command>
29 <inputs>
30 <conditional name="genomeSource">
31 <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
32 <option value="indexed">Use a built-in index</option>
33 <option value="history">Use one from the history</option>
34 </param>
35 <when value="indexed">
36 <param name="indices" type="select" label="Select a reference genome">
37 <options from_file="bwa_index.loc">
38 <column name="value" index="1" />
39 <column name="name" index="0" />
40 </options>
41 </param>
42 </when>
43 <when value="history">
44 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
45 </when>
46 </conditional>
47 <conditional name="paired">
48 <param name="sPaired" type="select" label="Is this library mate-paired?">
49 <option value="single">Single-end</option>
50 <option value="paired">Paired-end</option>
51 <option value="longread">Long-reads</option>
52 </param>
53 <when value="single">
54 <param name="input1" type="data" format="fastqsanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
55 </when>
56 <when value="paired">
57 <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
58 <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
59 </when>
60 <when value="longread">
61 <param name="input1" type="data" format="fastqsanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
62 </when>
63 </conditional>
64 <conditional name="params">
65 <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
66 <option value="pre_set">Commonly Used</option>
67 <option value="full">Full Parameter List</option>
68 </param>
69 <when value="pre_set" />
70 <when value="full">
71 <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (-n)" help="Enter this value OR a fraction of missing alignments, not both" />
72 <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (-n)" help="Enter this value OR maximum edit distance, not both" />
73 <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (-o)" />
74 <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (-e)" help="-1 for k-difference mode (disallowing long gaps)" />
75 <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (-d)" />
76 <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (-i)" />
77 <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (-l)" help="Enter -1 for infinity" />
78 <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (-k)" />
79 <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (-M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
80 <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (-O)" />
81 <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (-E)" />
82 <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat" help="By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp) (-R)" />
83 <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default (-N)" />
84 <param name="outputTopN" type="integer" value="-1" label="Output top [value] hits" help="For single-end reads only. Enter -1 to disable outputting multiple hits. NOTE: If you put in a positive value here, your output will NOT be in SAM format (-n)" />
85 <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes (-a)" />
86 <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing (-o)" />
87 </when>
88 </conditional>
89 <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
90 </inputs>
91 <outputs>
92 <data format="sam" name="output" />
93 </outputs>
94 <tests>
95 <test>
96 <!--
97 BWA commands:
98 bwa aln -t 4 phiX test-data/bwa_wrapper_in1.fastq > bwa_wrapper_out1.sai
99 bwa samse phiX bwa_wrapper_out1.sai test-data/bwa_wrapper_in1.fastq >> bwa_wrapper_out1.sam
100 phiX.fasta is the prefix for the reference
101 remove the comment lines (beginning with '@') from the resulting sam file
102 note that 'phiX' should be 'PHIX174' to match what's in the indexed file
103 -->
104 <param name="refGenomeSource" value="indexed" />
105 <param name="indices" value="phiX" />
106 <param name="sPaired" value="single" />
107 <param name="input1" value="bwa_wrapper_in1.fastq" ftype="fastqsanger" />
108 <param name="source_select" value="pre_set" />
109 <param name="suppressHeader" value="true" />
110 <output name="output" file="bwa_wrapper_out1.sam" ftype="sam" sort="true" />
111 </test>
112 <test>
113 <!--
114 BWA commands:
115 cp test-data/phiX.fasta phiX.fasta
116 bwa index -a is phiX.fasta
117 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in1.fastq > bwa_wrapper_out2.sai
118 bwa samse phiX.fasta bwa_wrapper_out2.sai test-data/bwa_wrapper_in1.fastq >> bwa_wrapper_out2.sam
119 phiX.fasta is the prefix for the reference
120 remove the comment lines (beginning with '@') from the resulting sam file
121 -->
122 <param name="refGenomeSource" value="history" />
123 <param name="ownFile" value="phiX.fasta" />
124 <param name="sPaired" value="single" />
125 <param name="input1" value="bwa_wrapper_in1.fastq" ftype="fastqsanger" />
126 <param name="source_select" value="full" />
127 <param name="maxEditDist" value="0" />
128 <param name="fracMissingAligns" value="0.04" />
129 <param name="maxGapOpens" value="1" />
130 <param name="maxGapExtens" value="-1" />
131 <param name="disallowLongDel" value="16" />
132 <param name="disallowIndel" value="5" />
133 <param name="seed" value="-1" />
134 <param name="maxEditDistSeed" value="2" />
135 <param name="mismatchPenalty" value="3" />
136 <param name="gapOpenPenalty" value="11" />
137 <param name="gapExtensPenalty" value="4" />
138 <param name="suboptAlign" value="true" />
139 <param name="noIterSearch" value="true" />
140 <param name="outputTopN" value="-1" />
141 <param name="maxInsertSize" value="500" />
142 <param name="maxOccurPairing" value="100000" />
143 <param name="suppressHeader" value="true" />
144 <output name="output" file="bwa_wrapper_out2.sam" ftype="sam" sort="true" />
145 </test>
146 <test>
147 <!--
148 BWA commands:
149 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in2.fastq > bwa_wrapper_out3a.sai
150 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in3.fastq > bwa_wrapper_out3b.sai
151 bwa sampe -a 500 -o 100000 phiX.fasta bwa_wrapper_out3a.sai bwa_wrapper_out3b.sai test-data/bwa_wrapper_in2.fastq test-data/bwa_wrapper_in3.fastq >> bwa_wrapper_out3.sam
152 phiX.fasta is the prefix for the reference
153 remove the comment lines (beginning with '@') from the resulting sam file
154 note that 'phiX' should be 'PHIX174' to match what's in the indexed file
155 -->
156 <param name="refGenomeSource" value="indexed" />
157 <param name="indices" value="phiX" />
158 <param name="sPaired" value="paired" />
159 <param name="input1" value="bwa_wrapper_in2.fastq" ftype="fastqsanger" />
160 <param name="input2" value="bwa_wrapper_in3.fastq" ftype="fastqsanger" />
161 <param name="source_select" value="full" />
162 <param name="maxEditDist" value="0" />
163 <param name="fracMissingAligns" value="0.04" />
164 <param name="maxGapOpens" value="1" />
165 <param name="maxGapExtens" value="-1" />
166 <param name="disallowLongDel" value="16" />
167 <param name="disallowIndel" value="5" />
168 <param name="seed" value="-1" />
169 <param name="maxEditDistSeed" value="2" />
170 <param name="mismatchPenalty" value="3" />
171 <param name="gapOpenPenalty" value="11" />
172 <param name="gapExtensPenalty" value="4" />
173 <param name="suboptAlign" value="true" />
174 <param name="noIterSearch" value="true" />
175 <param name="outputTopN" value="-1" />
176 <param name="maxInsertSize" value="500" />
177 <param name="maxOccurPairing" value="100000" />
178 <param name="suppressHeader" value="true" />
179 <output name="output" file="bwa_wrapper_out3.sam" ftype="sam" sort="true" />
180 </test>
181 </tests>
182 <help>
183
184 **What it does**
185
186 BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60.
187
188 ------
189
190 **Know what you are doing**
191
192 .. class:: warningmark
193
194 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
195
196 .. __: http://bio-bwa.sourceforge.net/
197
198 ------
199
200 **Input formats**
201
202 BWA accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
203
204 ------
205
206 **Outputs**
207
208 The output is in SAM format, and has the following columns::
209
210 Column Description
211 -------- --------------------------------------------------------
212 1 QNAME Query (pair) NAME
213 2 FLAG bitwise FLAG
214 3 RNAME Reference sequence NAME
215 4 POS 1-based leftmost POSition/coordinate of clipped sequence
216 5 MAPQ MAPping Quality (Phred-scaled)
217 6 CIGAR extended CIGAR string
218 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
219 8 MPOS 1-based Mate POSition
220 9 ISIZE Inferred insert SIZE
221 10 SEQ query SEQuence on the same strand as the reference
222 11 QUAL query QUALity (ASCII-33 gives the Phred base quality)
223 12 OPT variable OPTional fields in the format TAG:VTYPE:VALU
224
225 The flags are as follows::
226
227 Flag Description
228 ------ -------------------------------------
229 0x0001 the read is paired in sequencing
230 0x0002 the read is mapped in a proper pair
231 0x0004 the query sequence itself is unmapped
232 0x0008 the mate is unmapped
233 0x0010 strand of the query (1 for reverse)
234 0x0020 strand of the mate
235 0x0040 the read is the first read in a pair
236 0x0080 the read is the second read in a pair
237 0x0100 the alignment is not primary
238
239 It looks like this (scroll sideways to see the entire example)::
240
241 QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT
242 HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
243 HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
244
245 -------
246
247 **BWA settings**
248
249 All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
250
251 ------
252
253 **BWA parameter list**
254
255 This is an exhaustive list of BWA options:
256
257 For **aln**::
258
259 -n NUM Maximum edit distance if the value is INT, or the fraction of missing
260 alignments given 2% uniform base error rate if FLOAT. In the latter
261 case, the maximum edit distance is automatically chosen for different
262 read lengths. [0.04]
263 -o INT Maximum number of gap opens [1]
264 -e INT Maximum number of gap extensions, -1 for k-difference mode
265 (disallowing long gaps) [-1]
266 -d INT Disallow a long deletion within INT bp towards the 3'-end [16]
267 -i INT Disallow an indel within INT bp towards the ends [5]
268 -l INT Take the first INT subsequence as seed. If INT is larger than the
269 query sequence, seeding will be disabled. For long reads, this option
270 is typically ranged from 25 to 35 for '-k 2'. [inf]
271 -k INT Maximum edit distance in the seed [2]
272 -t INT Number of threads (multi-threading mode) [1]
273 -M INT Mismatch penalty. BWA will not search for suboptimal hits with a score
274 lower than (bestScore-misMsc). [3]
275 -O INT Gap open penalty [11]
276 -E INT Gap extension penalty [4]
277 -c Reverse query but not complement it, which is required for alignment
278 in the color space.
279 -R Proceed with suboptimal alignments even if the top hit is a repeat. By
280 default, BWA only searches for suboptimal alignments if the top hit is
281 unique. Using this option has no effect on accuracy for single-end
282 reads. It is mainly designed for improving the alignment accuracy of
283 paired-end reads. However, the pairing procedure will be slowed down,
284 especially for very short reads (~32bp).
285 -N Disable iterative search. All hits with no more than maxDiff
286 differences will be found. This mode is much slower than the default.
287
288 For **samse**::
289
290 -n INT Output up to INT top hits. Value -1 to disable outputting multiple
291 hits. NOTE: Entering a value other than -1 will result in output that
292 is not in SAM format, and therefore not usable further down the
293 pipeline. Check the BWA documentation for details on the format of
294 the output. [-1]
295
296 For **sampe**::
297
298 -a INT Maximum insert size for a read pair to be considered as being mapped
299 properly. Since version 0.4.5, this option is only used when there
300 are not enough good alignment to infer the distribution of insert
301 sizes. [500]
302 -o INT Maximum occurrences of a read for pairing. A read with more
303 occurrences will be treated as a single-end read. Reducing this
304 parameter helps faster pairing. [100000]
305
306 </help>
307 <code file="bwa_wrapper_code.py" />
308 </tool>
309
310