annotate bwa_wrapper.xml @ 1:ccfa8e539bdf

add archive toolbox to manage zip outputs
author cmonjeau
date Mon, 24 Aug 2015 10:09:14 +0000
parents d6ba40f6c824
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
1 <tool id="bwa_wrapper_stacks" name="Map with BWA for STACKS" version="1.2.3">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
2 <description>from zip file with fastqsanger files</description>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
3 <requirements>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
4 <requirement type="package" version="0.6.2">bwa</requirement>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
5 </requirements>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
6 <description></description>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
7 <parallelism method="basic"></parallelism>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
8 <command interpreter="python">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
9 bwa_wrapper.py
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
10 --threads="4"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
11
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
12 #if $input1.ext == "fastqillumina":
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
13 --illumina1.3
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
14 #end if
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
15
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
16 ## reference source
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
17 --fileSource="${genomeSource.refGenomeSource}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
18 #if $genomeSource.refGenomeSource == "history":
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
19 ##build index on the fly
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
20 --ref="${genomeSource.ownFile}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
21 --dbkey="${dbkey}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
22 #else:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
23 ##use precomputed indexes
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
24 --ref="${genomeSource.indices.fields.path}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
25 --do_not_build_index
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
26 #end if
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
27
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
28 ## input file(s)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
29 --input1="${paired.input1}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
30
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
31 ## output file
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
32 --output="${output}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
33
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
34 ## run parameters
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
35 --params="${params.source_select}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
36 #if $params.source_select != "pre_set":
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
37 --maxEditDist="${params.maxEditDist}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
38 --fracMissingAligns="${params.fracMissingAligns}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
39 --maxGapOpens="${params.maxGapOpens}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
40 --maxGapExtens="${params.maxGapExtens}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
41 --disallowLongDel="${params.disallowLongDel}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
42 --disallowIndel="${params.disallowIndel}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
43 --seed="${params.seed}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
44 --maxEditDistSeed="${params.maxEditDistSeed}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
45 --mismatchPenalty="${params.mismatchPenalty}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
46 --gapOpenPenalty="${params.gapOpenPenalty}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
47 --gapExtensPenalty="${params.gapExtensPenalty}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
48 --suboptAlign="${params.suboptAlign}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
49 --noIterSearch="${params.noIterSearch}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
50 --outputTopN="${params.outputTopN}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
51 --outputTopNDisc="${params.outputTopNDisc}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
52 --maxInsertSize="${params.maxInsertSize}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
53 --maxOccurPairing="${params.maxOccurPairing}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
54 #if $params.readGroup.specReadGroup == "yes"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
55 --rgid="${params.readGroup.rgid}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
56 --rgcn="${params.readGroup.rgcn}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
57 --rgds="${params.readGroup.rgds}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
58 --rgdt="${params.readGroup.rgdt}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
59 --rgfo="${params.readGroup.rgfo}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
60 --rgks="${params.readGroup.rgks}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
61 --rglb="${params.readGroup.rglb}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
62 --rgpg="${params.readGroup.rgpg}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
63 --rgpi="${params.readGroup.rgpi}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
64 --rgpl="${params.readGroup.rgpl}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
65 --rgpu="${params.readGroup.rgpu}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
66 --rgsm="${params.readGroup.rgsm}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
67 #end if
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
68 #end if
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
69
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
70 ## suppress output SAM header
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
71 --suppressHeader="${suppressHeader}"
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
72 </command>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
73 <inputs>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
74 <conditional name="genomeSource">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
75 <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
76 <option value="indexed">Use a built-in index</option>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
77 <option value="history">Use one from the history</option>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
78 </param>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
79 <when value="indexed">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
80 <param name="indices" type="select" label="Select a reference genome">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
81 <options from_data_table="bwa_indexes">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
82 <filter type="sort_by" column="2" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
83 <validator type="no_options" message="No indexes are available" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
84 </options>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
85 </param>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
86 </when>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
87 <when value="history">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
88 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
89 </when>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
90 </conditional>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
91 <conditional name="paired">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
92 <param name="sPaired" type="select" label="Is this library mate-paired?">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
93 <option value="single">Single-end</option>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
94 </param>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
95 <when value="single">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
96 <param name="input1" type="data" format="zip" label="Zip file" help="Zip file with several FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
97 </when>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
98 </conditional>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
99 <conditional name="params">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
100 <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
101 <option value="pre_set">Commonly Used</option>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
102 <option value="full">Full Parameter List</option>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
103 </param>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
104 <when value="pre_set" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
105 <when value="full">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
106 <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
107 <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
108 <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
109 <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
110 <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
111 <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
112 <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
113 <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
114 <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
115 <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
116 <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
117 <param name="suboptAlign" type="integer" optional="True" label="Proceed with suboptimal alignments if there are no more than INT equally best hits. (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
118 <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
119 <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
120 <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
121 <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
122 <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
123 <conditional name="readGroup">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
124 <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
125 <option value="yes">Yes</option>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
126 <option value="no" selected="True">No</option>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
127 </param>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
128 <when value="yes">
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
129 <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
130 tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
131 IDs may be modified when merging SAM files in order to handle collisions." />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
132 <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
133 <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
134 <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
135 <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
136 flow of each read." help="Optional. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
137 various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
138 <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
139 <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
140 <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
141 <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
142 <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA,
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
143 SOLID, HELICOS, IONTORRENT and PACBIO" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
144 <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
145 <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
146 </when>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
147 <when value="no" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
148 </conditional>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
149 </when>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
150 </conditional>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
151 <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
152 </inputs>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
153 <outputs>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
154 <data format="zip" name="output" label="${tool.name} on ${on_string}: mapped reads"/>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
155 </outputs>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
156 <help>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
157
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
158 **What it does**
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
159
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
160 BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
161
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
162 ------
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
163
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
164 **Know what you are doing**
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
165
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
166 .. class:: warningmark
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
167
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
168 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
169
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
170 .. __: http://bio-bwa.sourceforge.net/
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
171
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
172
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
173 Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki &lt;https://www.e-biogenouest.org/wiki/ManArchiveGalaxy&gt;`_ .
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
174
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
175 ------
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
176
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
177 **Input formats**
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
178
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
179 BWA accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*) or Illumina FASTQ format (galaxy type *fastqillumina*). Use the FASTQ Groomer to prepare your files.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
180
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
181 ------
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
182
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
183 **A Note on Built-in Reference Genomes**
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
184
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
185 The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
186
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
187 ------
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
188
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
189 **Outputs**
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
190
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
191 The output is in SAM format, and has the following columns::
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
192
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
193 Column Description
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
194 -------- --------------------------------------------------------
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
195 1 QNAME Query (pair) NAME
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
196 2 FLAG bitwise FLAG
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
197 3 RNAME Reference sequence NAME
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
198 4 POS 1-based leftmost POSition/coordinate of clipped sequence
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
199 5 MAPQ MAPping Quality (Phred-scaled)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
200 6 CIGAR extended CIGAR string
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
201 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
202 8 MPOS 1-based Mate POSition
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
203 9 ISIZE Inferred insert SIZE
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
204 10 SEQ query SEQuence on the same strand as the reference
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
205 11 QUAL query QUALity (ASCII-33 gives the Phred base quality)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
206 12 OPT variable OPTional fields in the format TAG:VTYPE:VALU
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
207
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
208 The flags are as follows::
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
209
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
210 Flag Description
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
211 ------ -------------------------------------
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
212 0x0001 the read is paired in sequencing
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
213 0x0002 the read is mapped in a proper pair
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
214 0x0004 the query sequence itself is unmapped
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
215 0x0008 the mate is unmapped
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
216 0x0010 strand of the query (1 for reverse)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
217 0x0020 strand of the mate
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
218 0x0040 the read is the first read in a pair
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
219 0x0080 the read is the second read in a pair
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
220 0x0100 the alignment is not primary
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
221
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
222 It looks like this (scroll sideways to see the entire example)::
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
223
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
224 QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
225 HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
226 HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
227
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
228 -------
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
229
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
230 **BWA settings**
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
231
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
232 All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
233
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
234 ------
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
235
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
236 **BWA parameter list**
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
237
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
238 This is an exhaustive list of BWA options:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
239
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
240 For **aln**::
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
241
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
242 -n NUM Maximum edit distance if the value is INT, or the fraction of missing
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
243 alignments given 2% uniform base error rate if FLOAT. In the latter
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
244 case, the maximum edit distance is automatically chosen for different
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
245 read lengths. [0.04]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
246 -o INT Maximum number of gap opens [1]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
247 -e INT Maximum number of gap extensions, -1 for k-difference mode
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
248 (disallowing long gaps) [-1]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
249 -d INT Disallow a long deletion within INT bp towards the 3'-end [16]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
250 -i INT Disallow an indel within INT bp towards the ends [5]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
251 -l INT Take the first INT subsequence as seed. If INT is larger than the
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
252 query sequence, seeding will be disabled. For long reads, this option
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
253 is typically ranged from 25 to 35 for '-k 2'. [inf]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
254 -k INT Maximum edit distance in the seed [2]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
255 -t INT Number of threads (multi-threading mode) [1]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
256 -M INT Mismatch penalty. BWA will not search for suboptimal hits with a score
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
257 lower than (bestScore-misMsc). [3]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
258 -O INT Gap open penalty [11]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
259 -E INT Gap extension penalty [4]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
260 -c Reverse query but not complement it, which is required for alignment
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
261 in the color space.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
262 -R Proceed with suboptimal alignments even if the top hit is a repeat. By
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
263 default, BWA only searches for suboptimal alignments if the top hit is
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
264 unique. Using this option has no effect on accuracy for single-end
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
265 reads. It is mainly designed for improving the alignment accuracy of
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
266 paired-end reads. However, the pairing procedure will be slowed down,
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
267 especially for very short reads (~32bp).
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
268 -N Disable iterative search. All hits with no more than maxDiff
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
269 differences will be found. This mode is much slower than the default.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
270
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
271 For **samse**::
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
272
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
273 -n INT Maximum number of alignments to output in the XA tag for reads paired
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
274 properly. If a read has more than INT hits, the XA tag will not be
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
275 written. [3]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
276 -r STR Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
277
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
278 For **sampe**::
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
279
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
280 -a INT Maximum insert size for a read pair to be considered as being mapped
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
281 properly. Since version 0.4.5, this option is only used when there
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
282 are not enough good alignment to infer the distribution of insert
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
283 sizes. [500]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
284 -n INT Maximum number of alignments to output in the XA tag for reads paired
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
285 properly. If a read has more than INT hits, the XA tag will not be
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
286 written. [3]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
287 -N INT Maximum number of alignments to output in the XA tag for disconcordant
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
288 read pairs (excluding singletons). If a read has more than INT hits,
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
289 the XA tag will not be written. [10]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
290 -o INT Maximum occurrences of a read for pairing. A read with more
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
291 occurrences will be treated as a single-end read. Reducing this
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
292 parameter helps faster pairing. [100000]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
293 -r STR Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
294
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
295 For specifying the read group in **samse** or **sampe**, use the following::
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
296
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
297 @RG Read group. Unordered multiple @RG lines are allowed.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
298 ID Read group identifier. Each @RG line must have a unique ID. The value of
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
299 ID is used in the RG tags of alignment records. Must be unique among all
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
300 read groups in header section. Read group IDs may be modified when
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
301 merging SAM files in order to handle collisions.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
302 CN Name of sequencing center producing the read.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
303 DS Description.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
304 DT Date the run was produced (ISO8601 date or date/time).
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
305 FO Flow order. The array of nucleotide bases that correspond to the
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
306 nucleotides used for each flow of each read. Multi-base flows are encoded
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
307 in IUPAC format, and non-nucleotide flows by various other characters.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
308 Format : /\*|[ACMGRSVTWYHKDBN]+/
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
309 KS The array of nucleotide bases that correspond to the key sequence of each read.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
310 LB Library.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
311 PG Programs used for processing the read group.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
312 PI Predicted median insert size.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
313 PL Platform/technology used to produce the reads. Valid values : CAPILLARY,
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
314 LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
315 PU Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
316 SOLiD). Unique identifier.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
317 SM Sample. Use pool name where a pool is being sequenced.
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
318
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
319 </help>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
320 <citations>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
321 <citation type="doi">10.1111/mec.12354</citation>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
322 <citation type="doi">10.1111/mec.12330</citation>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
323 <citation type="doi">10.1534/g3.111.000240</citation>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
324 <citation type="doi">10.1534/genetics.111.127324</citation>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
325 <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
326 <citation type="doi">10.1073/pnas.1006538107</citation>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
327
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
328 <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
329 author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
330 title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
331 booktitle = {JOBIM 2013 Proceedings},
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
332 year = {2013},
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
333 url = {https://www.e-biogenouest.org/resources/128},
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
334 pages = {97-106}
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
335 }</citation>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
336 </citations>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
337 </tool>
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
338
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
339