comparison bowtie2_wrapper.xml @ 2:c1ec08cb34f9 draft

Uploaded
author devteam
date Fri, 12 Dec 2014 11:12:27 -0500
parents 96d2e31a3938
children ceb6467e276c
comparison
equal deleted inserted replaced
1:a54de7e658f7 2:c1ec08cb34f9
1 <tool id="bowtie2" name="Bowtie2" version="0.2"> 1 <tool id="bowtie2" name="Bowtie2" version="0.3">
2 <!-- Wrapper compatible with Bowtie version 2.0.0 --> 2 <!-- Wrapper compatible with Bowtie version 2.2.4 -->
3 <description>is a short-read aligner</description> 3 <description>- map reads against reference genome</description>
4 <version_command>bowtie2 --version</version_command> 4 <version_command>bowtie2 --version</version_command>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="2.1.0">bowtie2</requirement> 6 <requirement type="package" version="2.2.4">bowtie2</requirement>
7 <requirement type="package" version="0.1.18">samtools</requirement> 7 <requirement type="package" version="0.1.18">samtools</requirement>
8 </requirements> 8 </requirements>
9
10 <command> 9 <command>
10
11 ## prepare bowtie2 index 11 ## prepare bowtie2 index
12 #set index_path = '' 12 #set index_path = ''
13 #if str($reference_genome.source) == "history": 13 #if str($reference_genome.source) == "history":
14 bowtie2-build "$reference_genome.own_file" genome; ln -s "$reference_genome.own_file" genome.fa; 14 bowtie2-build "$reference_genome.own_file" genome &amp;&amp;
15 ln -s "$reference_genome.own_file" genome.fa &amp;&amp;
15 #set index_path = 'genome' 16 #set index_path = 'genome'
16 #else: 17 #else:
17 #set index_path = $reference_genome.index.fields.path 18 #set index_path = $reference_genome.index.fields.path
18 #end if 19 #end if
19 20
20 ## execute bowtie2 21 ## execute bowtie2
22
21 bowtie2 23 bowtie2
22 24
23 ## number of threads 25 ## number of threads
24 -p \${GALAXY_SLOTS:-4} 26 -p \${GALAXY_SLOTS:-4}
25 27
26 ## index file path 28 ## index file path
27 -x $index_path 29 -x $index_path
28 30
29 ## check for single/pair-end 31
30 #if str( $library.type ) == "single" 32 ## Fastq inputs
31 ## prepare inputs 33 #if str( $library.type ) == "single":
32 -U $library.input_1 34 -U "${input_1}"
33 35 #if str( $library.unaligned_file ) == "true":
34 #if $output_unaligned_reads_l
35 --un $output_unaligned_reads_l 36 --un $output_unaligned_reads_l
36 #end if 37 #end if
38 #elif str( $library.type ) == "paired":
39 -1 "${input_1}"
40 -2 "${input_2}"
41 #if str( $library.paired_options.paired_options_selector ) == "yes":
42 -I "${library.paired_options.I}"
43 -X "${library.paired_options.X}"
44 ${library.paired_options.fr_rf_ff}
45 ${library.paired_options.no_mixed}
46 ${library.paired_options.no_discordant}
47 ${library.paired_options.dovetail}
48 ${library.paired_options.no_contain}
49 ${library.paired_options.no_overlap}
50 #end if
51 #if str( $library.unaligned_file ) == "true":
52 --un-conc $output_unaligned_reads_l
53 #end if
37 #else 54 #else
38 ## prepare inputs 55 ## prepare collection
39 -1 $library.input_1 56 -1 $library.input_1.forward
40 -2 $library.input_2 57 -2 $library.input_1.reverse
41 -I $library.min_insert 58 #if str( $library.paired_options.paired_options_selector ) == "yes":
42 -X $library.max_insert 59 -I "${library.paired_options.I}"
43 60 -X "${library.paired_options.X}"
44 #if $output_unaligned_reads_l 61 ${library.paired_options.fr_rf_ff}
62 ${library.paired_options.no_mixed}
63 ${library.paired_options.no_discordant}
64 ${library.paired_options.dovetail}
65 ${library.paired_options.no_contain}
66 ${library.paired_options.no_overlap}
67 #end if
68 #if str( $library.unaligned_file ) == "true":
45 --un-conc $output_unaligned_reads_l 69 --un-conc $output_unaligned_reads_l
46 #end if 70 #end if
47 #end if 71 #end if
48 72
49 ## configure settings 73 ## Readgroups
50 #if str($params.full) == "yes": 74 #if str( $read_group.read_group_selector ) == "yes":
51 ## add alignment type 75 --rg-id "${read_group.rgid}"
52 $params.align_type 76 --rg "SM:${read_group.rgsm}"
53 77 --rg "LB:${read_group.rglb}"
54 ## add performance 78 --rg "PL:${read_group.rgpl}"
55 $params.performance
56
57 ## add time flag
58 $params.time
59
60 ## add nofw/norc
61 $params.nofw_norc
62
63 ## set gbar
64 --gbar $params.gbar
65
66 ## check skip
67 #if str($params.skip) != "0":
68 -s $params.skip
69 #end if
70
71 ## check upto
72 #if str($params.upto) != "0":
73 -u $params.upto
74 #end if
75
76 ## check trim5
77 #if str($params.trim5) != "0":
78 -5 $params.trim5
79 #end if
80
81 ## check trim3
82 #if str($params.trim3) != "0":
83 -3 $params.trim3
84 #end if
85 #end if 79 #end if
86 80
87 ## read group information 81 ## Analysis type
88 #if str($read_group.selection) == "yes": 82 #if ( str( $analysis_type.analysis_type_selector ) == "simple" and str( $analysis_type.presets ) != "no_presets" ):
89 #if $read_group.rgid and $read_group.rglb and $read_group.rgpl and $read_group.rgsm: 83 $analysis_type.presets
90 --rg-id "$read_group.rgid" 84 #elif str( $analysis_type.analysis_type_selector ) == "full":
91 --rg "LB:$read_group.rglb" 85 #if str( $analysis_type.input_options.input_options_selector ) == "yes":
92 --rg "PL:$read_group.rgpl" 86 --skip "${analysis_type.input_options.skip}"
93 --rg "SM:$read_group.rgsm" 87 --qupto "${analysis_type.input_options.qupto}"
94 #end if 88 --trim5 "${analysis_type.input_options.trim5}"
95 #end if 89 --trim3 "${analysis_type.input_options.trim3}"
96 90 ${analysis_type.input_options.qv_encoding}
97 ## view/sort and output file 91 ${analysis_type.input_options.solexa-quals}
92 ${analysis_type.input_options.int-quals}
93 #end if
94
95 #if str( $analysis_type.alignment_options.alignment_options_selector ) == "yes":
96 -N "${$analysis_type.alignment_options.N}"
97 -L "${$analysis_type.alignment_options.L}"
98 -i "${$analysis_type.alignment_options.i}"
99 --n_ceil "${$analysis_type.alignment_options.n_ceil}"
100 --dpad "${$analysis_type.alignment_options.dpad}"
101 --gbar "${$analysis_type.alignment_options.gbar}"
102 ${analysis_type.alignment_options.ignore-quals}
103 ${analysis_type.alignment_options.nofw}
104 ${analysis_type.alignment_options.norc}
105 ${analysis_type.alignment_options.no_1mm_upfront}
106 #if str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "end-to-end":
107 --end-to-end
108 --score-min "${$analysis_type.alignment_options.align_mode.core-min}"
109 #elif str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local":
110 --local
111 --score-min "${$analysis_type.alignment_options.align_mode.core-min}"
112 #end if
113 #end if
114
115 #if str( $analysis_type.scoring_options.scoring_options_selector ) == "yes":
116 --ma "${analysis_type.scoring_options.ma}"
117 --mp "${analysis_type.scoring_options.mp}"
118 --np "${analysis_type.scoring_options.np}"
119 --rdg "${analysis_type.scoring_options.rdg_read_open},${analysis_type.scoring_options.rdg_read_extend}"
120 --rfg "${analysis_type.scoring_options.rfg_ref_open},${analysis_type.scoring_options.rfg_ref_extend}"
121 #end if
122
123 #if str( $analysis_type.reporting_options.reporting_options_selector ) == "k":
124 -k "${analysis_type.reporting_options.k}"
125 #elif str( $analysis_type.reporting_options.reporting_options_selector ) == "a":
126 -a
127 #end if
128
129 #if str( $analysis_type.effort_options.effort_options_selector ) == "yes":
130 -D "${analysis_type.effort_options.D}"
131 -R "${analysis_type.effort_options.R}"
132 #end if
133
134 #if str( $analysis_type.sam_options.sam_options_selector ) == "yes":
135 ${analysis_type.sam_options.no-unal}
136 ${analysis_type.sam_options.omit-sec-seq}
137 #end if
138
139 #if str( $analysis_type.other_options.other_options_selector ) == "yes":
140 ${analysis_type.other_options.reorder}
141 ${analysis_type.other_options.non-deterministic}
142 --seed "${analysis_type.other_options.seed}"
143 #end if
144
145 #elif str( $analysis_type.analysis_type_selector ) == "cline":
146 ${analysis_type.cline}
147 #end if
148
149 ## view/sort and output BAM file
98 | samtools view -Su - | samtools sort -o - - > $output 150 | samtools view -Su - | samtools sort -o - - > $output
99 151
100 ## rename unaligned sequence files 152 ## rename unaligned sequence files
101 #if $library.type == "paired" and $output_unaligned_reads_l and $output_unaligned_reads_r: 153 #if $library.type == "paired" and $output_unaligned_reads_l and $output_unaligned_reads_r:
102 #set left = str($output_unaligned_reads_l).replace( '.dat', '.1.dat' ) 154 #set left = str($output_unaligned_reads_l).replace( '.dat', '.1.dat' )
103 #set right = str($output_unaligned_reads_l).replace( '.dat', '.2.dat' ) 155 #set right = str($output_unaligned_reads_l).replace( '.dat', '.2.dat' )
104 156
105 ; mv $left $output_unaligned_reads_l; 157 ; mv $left $output_unaligned_reads_l;
106 mv $right $output_unaligned_reads_r 158 mv $right $output_unaligned_reads_r
107 #end if 159 #end if
160
108 </command> 161 </command>
109 162
110 <!-- basic error handling --> 163 <!-- basic error handling -->
111 <stdio> 164 <stdio>
112 <exit_code range="1:" level="fatal" description="Tool exception" /> 165 <exit_code range="1:" level="fatal" description="Tool exception" />
113 </stdio> 166 </stdio>
114 167
115 <inputs> 168 <inputs>
116 <!-- single/paired --> 169 <!-- single/paired -->
117 <conditional name="library"> 170 <conditional name="library">
118 <param name="type" type="select" label="Is this library mate-paired?"> 171 <param name="type" type="select" label="Is this single or paired library">
119 <option value="single">Single-end</option> 172 <option value="single">Single-end</option>
120 <option value="paired">Paired-end</option> 173 <option value="paired">Paired-end</option>
174 <option value="paired_collection">Paired-end Dataset Collection</option>
121 </param> 175 </param>
176
122 <when value="single"> 177 <when value="single">
123 <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/> 178 <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
179 <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" />
124 </when> 180 </when>
125 <when value="paired"> 181 <when value="paired">
126 <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> 182 <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
127 <param name="input_2" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> 183 <param name="input_2" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
128 <param name="min_insert" type="integer" value="0" label="Minimum insert size for valid paired-end alignments" /> 184 <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" />
129 <param name="max_insert" type="integer" value="250" label="Maximum insert size for valid paired-end alignments" /> 185 <conditional name="paired_options">
186 <param name="paired_options_selector" type="select" label="Do you want to set paired-end options?" help="See &quot;Alignment Options&quot; section of Help below for information">
187 <option value="no" selected="True">No</option>
188 <option value="yes">Yes</option>
189 </param>
190 <when value="yes">
191 <param name="I" type="integer" value="0" min="0" label="Set the minimum fragment length for valid paired-end alignments" help="-I/--minins; E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied). A 19-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates. The larger the difference between `-I` and `-X`, the slower Bowtie 2 will run. This is because larger differences bewteen `-I` and `-X` require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. Default=0"/>
192 <param name="X" type="integer" value="500" min="0" lable="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Deafult=500"/>
193 <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriatefor Illumina's Paired-end Sequencing Assay)">
194 <option value="--fr" selected="True">--fr</option>
195 <option value="--rf">--fr</option>
196 <option value="--ff">--ff</option>
197 </param>
198 <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="--no-mixed; By default, when `bowtie2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default=False"/>
199 <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `bowtie2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default=False"/>
200 <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. See also: `Mates can overlap, contain or dovetail each other` in help section below; default=False"/>
201 <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/>
202 <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/>
203 </when>
204 <when value="no">
205 <!-- do nothing -->
206 </when>
207 </conditional>
208 </when>
209 <when value="paired_collection">
210 <param name="input_1" format="fastqsanger" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
211 <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" />
212 <conditional name="paired_options">
213 <param name="paired_options_selector" type="select" label="Do you want to set paired-end options?" help="See &quot;Alignment Options&quot; section of Help below for information">
214 <option value="no" selected="True">No</option>
215 <option value="yes">Yes</option>
216 </param>
217 <when value="yes">
218 <param name="I" type="integer" value="0" min="0" label="Set the minimum fragment length for valid paired-end alignments" help="-I/--minins; E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied). A 19-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates. The larger the difference between `-I` and `-X`, the slower Bowtie 2 will run. This is because larger differences bewteen `-I` and `-X` require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. Default=0"/>
219 <param name="X" type="integer" value="500" min="0" lable="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Deafult=500"/>
220 <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriatefor Illumina's Paired-end Sequencing Assay)">
221 <option value="--fr" selected="True">--fr</option>
222 <option value="--rf">--fr</option>
223 <option value="--ff">--ff</option>
224 </param>
225 <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="--no-mixed; By default, when `bowtie2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default=False"/>
226 <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `bowtie2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default=False"/>
227 <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. See also: `Mates can overlap, contain or dovetail each other` in help section below; default=False"/>
228 <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/>
229 <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/>
230 </when>
231 <when value="no">
232 <!-- do nothing -->
233 </when>
234 </conditional>
130 </when> 235 </when>
131 </conditional> 236 </conditional>
132 237
133 <!-- unaligned file -->
134 <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads to separate file(s)" />
135
136 <!-- reference genome --> 238 <!-- reference genome -->
137 <conditional name="reference_genome"> 239 <conditional name="reference_genome">
138 <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> 240 <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
139 <option value="indexed">Use a built-in index</option> 241 <option value="indexed">Use a built-in genome index</option>
140 <option value="history">Use one from the history</option> 242 <option value="history">Use a genome from the history and build index</option>
141 </param> 243 </param>
142 <when value="indexed"> 244 <when value="indexed">
143 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> 245 <param name="index" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
144 <options from_data_table="bowtie2_indexes"> 246 <options from_data_table="bowtie2_indexes">
145 <filter type="sort_by" column="2"/> 247 <filter type="sort_by" column="2"/>
146 <validator type="no_options" message="No indexes are available for the selected input dataset"/> 248 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
147 </options> 249 </options>
148 </param> 250 </param>
149 </when> 251 </when>
150 <when value="history"> 252 <when value="history">
151 <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> 253 <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select reference genome" />
152 </when> 254 </when>
153 </conditional> 255 </conditional>
154 256
155 <!-- group settings --> 257 <!-- read group settings -->
156 <conditional name="read_group"> 258 <conditional name="read_group">
157 <param name="selection" type="select" label="Specify the read group for this file?"> 259 <param name="read_group_selector" type="select" label="Specify the read group for this file?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details">
158 <option value="yes">Yes</option> 260 <option value="yes">Yes</option>
159 <option value="no" selected="True">No</option> 261 <option value="no" selected="True">No</option>
160 </param> 262 </param>
161 <when value="yes"> 263 <when value="yes">
162 <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group IDs may be modified when merging SAM files in order to handle collisions." /> 264 <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section." help="--rg-id; Required if RG specified. Read group IDs may be modified when merging SAM files in order to handle collisions." />
163 <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" /> 265 <param name="rglb" type="text" size="25" label="Library name (LB)" help="--rg; Required if RG specified" />
164 <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO" /> 266 <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="--rg; Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO" />
165 <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" /> 267 <param name="rgsm" type="text" size="25" label="Sample (SM)" help="--rg; Required if RG specified. Use pool name where a pool is being sequenced" />
166 </when> 268 </when>
167 <when value="no" /> 269 <when value="no" />
168 </conditional> 270 </conditional>
169 271
170 <!-- full/advanced params. --> 272 <conditional name="analysis_type">
171 <conditional name="params"> 273 <param name="analysis_type_selector" type="select" label="Select analysis mode">
172 <param name="full" type="select" label="Parameter Settings" help="You can use the default settings or set custom values for any of Bowtie's parameters."> 274 <option value="simple">1: Default setting only</option>
173 <option value="no">Use defaults</option> 275 <option value="full">2: Full parameter list</option>
174 <option value="yes">Full parameter list</option>
175 </param> 276 </param>
176 <when value="yes"> 277 <when value="simple">
177 <param name="align_type" type="select" label="Type of alignment"> 278 <param name="presets" type="select" display="radio" label="Do you want to use presets?" help="Allow selecting among several preset parameter settings. Choosing between these will result in dramatic changes in runtime. See help below to understand effects of these presets.">
178 <option value="" selected="true">End to end</option> 279 <option value="no_presets" selected="True">No, just use defaults</option>
179 <option value="--local">Local</option> 280 <option value="--very-fast">Very fast end-to-end (--very-fast)</option>
281 <option value="--fast">Fast end-to-end (--fast)</option>
282 <option value="--sensitive">Sensitive end-to-end (--sensitive)</option>
283 <option value="--very-sensitive">Very sensitive end-to-end (--very-sensitive)</option>
284 <option value="--very-fast-local">Very fast local (--very-fast-local)</option>
285 <option value="--fast-local">Fast local (--fast-local)</option>
286 <option value="--sensitive-local">Sensitive local (--sensitive-local)</option>
287 <option value="--very-sensitive-local">Very sensitive local (--very-sensitive-local)</option>
180 </param> 288 </param>
289 </when>
290 <when value="full">
291 <conditional name="input_options">
292 <param name="input_options_selector" type="select" label="Do you want to tweak input options?" help="See &quot;Input Options&quot; section of Help below for information">
293 <option value="yes">Yes</option>
294 <option value="no" selected="true">No</option>
295 </param>
296 <when value="yes">
297 <param name="skip" type="integer" min="0" value="0" lable="Skip (i.e. do not align) the first that many reads or pairs in the input" help="-s/--skip; default=0"/>
298 <param name="qupto" type="integer" min="-1" value="-1" label="Align the first that many reads or read pairs from the input (after the -s/--skip reads or pairs have been skipped), then stop" help="-u/--qupto; default=-1 (no limit)"/>
299 <param name="trim5" type="integer" min="0" value="0" label="Trim that many bases from 5' (left) end of each read before alignment" help="-5/--trim5; default=0"/>
300 <param name="trim3" type="integer" min="0" value="0" label="Trim that many bases from 3' (right) end of each read before alignment" help="-3/--trim3; default=0"/>
301 <param name="qv_encoding" type="select" display="radio" label="Select quality score encoding" help="See help below for more details">
302 <option value="--phred33">Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines (--phred33)</option>
303 <option value="--phred64" selected="True">Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding (--phred64)</option>
304 </param>
305 <param name="solexa-quals" type="boolean" truevalue="--solexa-quals" falsevalue="" checked="False" label="Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3)" help="--solexa-quals; default=False"/>
306 <param name="int-quals" type="boolean" truevalue="--int-quals" falsevalue="" checked="False" label="Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified" help="--int-quals; default=False"/>
307 </when>
308 <when value="no">
309 <!-- do nothing -->
310 </when>
311 </conditional>
312 <conditional name="alignment_options">
313 <param name="alignment_options_selector" type="select" label="Do you want to tweak alignment options?" help="See &quot;Alignment Options&quot; section of Help below for information">
314 <option value="yes">Yes</option>
315 <option value="no" selected="true">No</option>
316 </param>
317 <when value="yes">
318 <param name="N" type="integer" min="0" max="1" value="0" label="Set the number of mismatches to be allowed in a seed alignment during multiseed alignment (see `Multiseed alignment` section of help below)" help="-N; Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) but increases sensitivity; default=0"/>
319 <param name="L" type="integer" min="0" value="20" label="Sets the length of the seed substrings to align during multiseed alignment (see `Multiseed alignment` section of help below)" help="-L; Smaller values make alignment slower but more senstive. Default: the `--sensitive` preset is used by default, which sets `-L` to 20 both in `--end-to-end` mode and in `--local` mode"/>
320 <param name="i" type="text" value="S,1,1.15" size="10" label="Set a function governing the interval between seed substrings to use during multiseed alignment (see `Multiseed alignment` section of help below). Also see description of this option below in the help section" help="-i; Since it's best to use longer intervals for longer reads, this parameter sets the interval as a function of the read length, rather than a single one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length. See also `Setting function options` below in help section. If the function returns a result less than 1, it is rounded up to 1. Default: the `--sensitive` preset is used by default, which sets `-i` to `S,1,1.15` in `--end-to-end` mode to `-i S,1,0.75` in `--local` mode"/>
321 <param name="n_ceil" type="text" value="`L,0,0.15" label="Set a function governing the maximum number of ambiguous characters (usually `N`s and/or `.`s) allowed in a read as a function of read length" help="--n-ceil; For instance, specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, where x is the read length. See also: [setting function options]. Reads exceeding this ceiling are [filtered out]. Default=`L,0,0.15`"/>
322 <param name="dpad" type="integer" min="0" value="15" lable="Pad dynamic programming problems by that many columns on either side to allow gaps" help="--dpad; default=15"/>
323 <param name="gbar" type="integer" min="0" value="4" label="Disallow gaps within that many positions of the beginning or end of the read" help="--gbar; default=4"/>
324 <param name="ignore-quals" type="boolean" truevalue="--ignore-quals" falsevalue="" selected="False" label="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value" help="--ignore-quals; input is treated as though all quality values are high; default=False"/>
325 <param name="nofw" type="boolean" truevalue="--nofw" falsevalue="" selected="False" label="Do not attempt to align unpaired reads to the forward (Watson) reference strand" help="In paired-end mode, `--nofw` and `--norc` pertain to the fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default=False"/>
326 <param name="norc" type="boolean" truevalue="--norc" falsevalue="" selected="False" label="Do not attempt to align unpaired reads to the reverse (Crick) reference strand" help="In paired-end mode, `--nofw` and `--norc` pertain to the fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default=False"/>
327 <param name="no_1mm_upfront" type="boolean" truevalue="--no-1mm-upfront" falsevalue="" selected="False" label="Prevent searching for 1-mismatch end-to-end alignments before using the multiseed heuristic (see `Multiseed alignment` section of help baelow)" help="--no-1mm-upfront; By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch end-to-end alignment for the read *before* trying the [multiseed heuristic]. Such alignments can be found very quickly, and many short read alignments have exact or near-exact end-to-end alignments. However, this can lead to unexpected alignments when the user also sets options governing the [multiseed heuristic], like `-L` and `-N`. For instance, if the user specifies `-N 0` and `-L` equal to the length of the read, the user will be surprised to find 1-mismatch alignments reported. This option prevents Bowtie 2 from searching for 1-mismatch end-to-end alignments before using the [multiseed heuristic], which leads to the expected behavior when combined with options such as `-L` and `-N`. This comes at the expense of speed; Default=False"/>
328 <conditional name="align_mode">
329 <param name="align_mode_selector" type="select" display="radio" label="Select between `--local` and `--end-to-end` alignment modes" help="--local and --end-to-end; see help below for detailed explanation; default=--end-to-end">
330 <option value="end-to-end" selected="True">End to End (--end-to-end)</option>
331 <option value="local">Local (--local)</option>
332 </param>
333 <when value="end-to-end">
334 <param name="score-min" type="text" value="G,20,8" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting function options]. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/>
335 </when>
336 <when value="local">
337 <param name="score-min" type="text" value="L,-0.6,-0.6" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting function options]. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/>
338 </when>
339 </conditional>
340 </when>
341 <when value="no">
342 <!-- do nothing -->
343 </when>
344 </conditional>
345 <conditional name="scoring_options">
346 <param name="scoring_options_selector" type="select" label="Do you want to tweak scoring options?" help="See &quot;Scoring Options&quot; section of Help below for information">
347 <option value="yes">Yes</option>
348 <option value="no" selected="true">No</option>
349 </param>
350 <when value="yes">
351 <param name="ma" type="integer" value="2" label="Set the match bonus" help="--ma; In `--local` mode match bonus is added to the alignment score for each position where a read character aligns to a reference character and the characters match. Not used in `--end-to-end` mode; Default=2"/>
352 <param name="mp" type="text" size="10" value="6,2" label="Set the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers" help="--mp; A number less than or equal to `MX` and greater than or equal to `MN` is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an `N`. If `--ignore-quals` is specified, the number subtracted quals `MX`. Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` where Q is the Phred quality value; Default=6,2"/>
353 <param name="np" type="integer" value="1" label="Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as `N`" help="--np; Default=1"/>
354 <param name="rdg_read_open" type="integer" value="5" label="Set the read gap opening penalty" help="--rdg; this is the first component of --rdg flag - opening penalty; Default=5"/>
355 <param name="rdg_read_extend" type="integer" value="3" label="Set the read gap extension penalty" help="--rdg; this is the second component of --rdg flag - extension penalty; Default=3"/>
356 <param name="rfg_ref_open" type="integer" value="5" label="Set the reference gap opening penalty" help="--rfg; this is the first component of --rfg flag - opening penalty; Default=5"/>
357 <param name="rfg_ref_extend" type="integer" value="3" label="Set the reference gap extension penalty" help="--rfg; this is the second component of --rfg flag - extension penalty; Default=3"/>
358 </when>
359 <when value="no">
360 <!-- do nothing -->
361 </when>
362 </conditional>
363 <conditional name="reporting_options">
364 <param name="reporting_options_selector" type="select" label="Do you want to use -a or -k options" help="Make sure you understand implications of setting -k and -a. See &quot;Reporting Options&quot; section of Help below for information on -k and -a options">
365 <option value="no" selected="true">No, do not set</option>
366 <option value="k">Set -k option and enter -k value</option>
367 <option value="a">Set -a option</option>
368 </param>
369 <when value="no">
370 <!-- do nothing -->
371 </when>
372 <when value="-k">
373 <param name="k" type="integer" min="0" value="1" label="Searches for at most that many distinct, valid alignments for each read" help="-k; see detalied description of this option in the help section below. Note: Bowtie 2 is not designed with large values for `-k` in mind, and when aligning reads to long, repetitive genomes large `-k` can be very, very slow"/>
374 </when>
375 <when value="-a">
376 <!-- do nothing here; set -a flag on the command line-->
377 </when>
378 </conditional>
379 <conditional name="effort_options">
380 <param name="effort_options_selector" type="select" label="Do you want to tweak effort options?" help="See &quot;Effort Options&quot; section of Help below for information">
381 <option value="yes">Yes</option>
382 <option value="no" selected="true">No</option>
383 </param>
384 <when value="yes">
385 <param name="D" type="integer" value="15" min="0" label="Attemp that many consecutive seed extension attempts to `fail` before Bowtie 2 moves on, using the alignments found so far" help="-D; A seed extension `fails` if it does not yield a new best or a new second-best alignment. This limit is automatically adjusted up when -k or -a are specified. Default=15"/>
386 <param name="R" type="integer" value="2" min="0" label="Set the maximum number of times Bowtie 2 will `re-seed` reads with repetitive seeds" help="When `re-seeding`, Bowtie 2 simply chooses a new set of reads (same length, same number of mismatches allowed) at different offsets and searches for more alignments. A read is considered to have repetitive seeds if the total number of seed hits divided by the number of seeds that aligned at least once is greater than 300. Default=2"/>
387 </when>
388 <when value="no">
389 <!-- do nothing -->
390 </when>
391 </conditional>
181 392
182 <param name="performance" type="select" label="Preset option"> 393 <conditional name="sam_options">
183 <option value="">Default</option> 394 <param name="sam_options_selector" type="select" label="Do you want to tweak SAM/BAM Options?" help="See &quot;Output Options&quot; section of Help below for information">
184 <option value="--very-fast">Very fast</option> 395 <option value="yes">Yes</option>
185 <option value="--fast">Fast</option> 396 <option value="no" selected="true">No</option>
186 <option value="--sensitive" selected="true">Sensitive</option> 397 </param>
187 <option value="--very-sensitive">Very sensitive</option> 398 <when value="yes">
188 </param> 399 <param name="no-unal" type="boolean" truevalue="--no-unal" falsevalue="" label="Suppress SAM records for reads that failed to align" help="--no-unal; Default=False"/>
189 400 <param name="omit-sec-seq" type="boolean" truevalue="--omit-sec-seq" falsevalue="" label="Suppress SEQ and QUAL strings for secondary alignments" help="--omit-sec-seq; Default=False"/>
190 <param name="gbar" type="integer" value="4" label="Disallow gaps within n-positions of read" /> 401 </when>
191 402 <when value="no">
192 <param name="trim5" type="integer" value="0" label="Trim n-bases from 5' of each read" /> 403 <!-- do nothing -->
193 404 </when>
194 <param name="trim3" type="integer" value="0" label="Trim n-bases from 3' of each read" /> 405 </conditional>
195 406 <conditional name="other_options">
196 <param name="skip" type="integer" value="0" label="Skip the first n-reads" /> 407 <param name="other_options_selector" type="select" label="Do you want to tweak Other Options?" help="See &quot;Other Options&quot; section of Help below for information">
197 408 <option value="yes">Yes</option>
198 <param name="upto" type="integer" value="0" label="Number of reads to be aligned (0 = unlimited)" /> 409 <option value="no" selected="true">No</option>
199 410 </param>
200 <param name="nofw_norc" type="select" label="Strand directions"> 411 <when value="yes">
201 <option value="">Both</option> 412 <param name="reorder" type="boolean" truevalue="--reorder" falsevalue="" label="Guarantee that output SAM records are printed in an order corresponding to the order of the reads in the original input file" help="--reorder; Default=False"/>
202 <option value="--nofw">Disable forward</option> 413 <param name="seed" type="integer" value="0" min="0" label="Use this number as the seed for pseudo-random number generator" help="--seed; Default=0"/>
203 <option value="--norc">Disable reverse</option> 414 <param name="non-deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" label="Re-initialize the pseudo-random generator for each read using the current time" help="--non-deterministic; see Help below for explanation of this option; default=False"/>
204 </param> 415 </when>
205 416 <when value="no">
206 <param name="time" type="select" label="Log mapping time"> 417 <!-- do nothing -->
207 <option value="">No</option> 418 </when>
208 <option value="--time">Yes</option> 419 </conditional>
209 </param>
210
211 </when> 420 </when>
212 <when value="no" />
213 </conditional> 421 </conditional>
214
215 </inputs> 422 </inputs>
216 423
217 <!-- define outputs --> 424 <!-- define outputs -->
425
218 <outputs> 426 <outputs>
427
219 <data format="fastqsanger" name="output_unaligned_reads_l" label="${tool.name} on ${on_string}: unaligned reads (L)" > 428 <data format="fastqsanger" name="output_unaligned_reads_l" label="${tool.name} on ${on_string}: unaligned reads (L)" >
220 <filter>unaligned_file is True</filter> 429 <filter>library['unaligned_file'] is True</filter>
221 <actions> 430 <actions>
222 <action type="format"> 431 <action type="format">
223 <option type="from_param" name="library.input_1" param_attribute="ext" /> 432 <option type="from_param" name="library.input_1" param_attribute="ext" />
224 </action> 433 </action>
225 </actions> 434 </actions>
226 </data> 435 </data>
227 <data format="fastqsanger" name="output_unaligned_reads_r" label="${tool.name} on ${on_string}: unaligned reads (R)"> 436 <data format="fastqsanger" name="output_unaligned_reads_r" label="${tool.name} on ${on_string}: unaligned reads (R)">
228 <filter>library['type'] == "paired" and unaligned_file is True</filter> 437 <filter>( library['type'] == "paired" or library['type'] == "paired_collection" ) and library['unaligned_file'] is True</filter>
229 <actions> 438 <actions>
230 <action type="format"> 439 <action type="format">
231 <option type="from_param" name="library.input_1" param_attribute="ext" /> 440 <option type="from_param" name="library.input_1" param_attribute="ext" />
232 </action> 441 </action>
233 </actions> 442 </actions>
234 </data> 443 </data>
235 <data format="bam" name="output" label="${tool.name} on ${on_string}: aligned reads"> 444
445 <data format="bam" name="output" label="${tool.name} on ${on_string}: aligned reads in BAM format">
236 <actions> 446 <actions>
237 <conditional name="reference_genome.source"> 447 <conditional name="reference_genome.source">
238 <when value="indexed"> 448 <when value="indexed">
239 <action type="metadata" name="dbkey"> 449 <action type="metadata" name="dbkey">
240 <option type="from_data_table" name="bowtie2_indexes" column="1" offset="0"> 450 <option type="from_data_table" name="bowtie2_indexes" column="1" offset="0">
254 </outputs> 464 </outputs>
255 465
256 <tests> 466 <tests>
257 <test> 467 <test>
258 <!-- basic test on single paired default run --> 468 <!-- basic test on single paired default run -->
259 <param name="type" value="single"/> 469 <param name="type" value="paired"/>
260 <param name="selection" value="no"/> 470 <param name="selection" value="no"/>
261 <param name="full" value="no"/> 471 <param name="paired_options_selector" value="no"/>
262 <param name="unaligned_file" value="false"/> 472 <param name="unaligned_file" value="false"/>
473 <param name="analysis_type_selector" value="simple"/>
263 <param name="source" value="history" /> 474 <param name="source" value="history" />
264 <param name="input_1" value="bowtie2/phix_reads.fastq" ftype="fastqsanger"/> 475 <param name="input_1" value="bowtie2-fq1.fq" ftype="fastqsanger"/>
265 <param name="own_file" value="bowtie2/phix_genome.fasta" /> 476 <param name="input_2" value="bowtie2-fq2.fq" ftype="fastqsanger"/>
266 <output name="output" file="bowtie2/phix_mapped.bam" /> 477 <param name="own_file" value="bowtie2-ref.fasta" />
478 <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
267 </test> 479 </test>
268 </tests> 480 </tests>
269 481
270 <help> 482 <help>
483
271 **Bowtie2 Overview** 484 **Bowtie2 Overview**
272 485
273 Bowtie_ is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s or 1,000s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 supports gapped, local, and paired-end alignment modes. Bowtie 2 outputs alignments in SAM format, enabling interoperation with a large number of other tools. 486 Bowtie2_ is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s or 1,000s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 supports gapped, local, and paired-end alignment modes. Galaxy wrapper for Bowtie 2 outputs alignments in `BAM format`_, enabling interoperation with a large number of other tools available at this site.
274 487 Majority of information in this page is derived from an excellent `Bowtie2 manual`_ written by Ben Langmead.
275 *Please cite:* Langmead, B., Trapnell, C., Pop, M. and Salzberg, S.L. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biology 10:R25 (2009) 488
276 489 .. _Bowtie2: http://bowtie-bio.sourceforge.net/bowtie2/
277 .. _Bowtie: http://bowtie-bio.sourceforge.net/bowtie2/ 490 .. _`Bowtie2 manual`: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
491 .. _`BAM format`: http://samtools.github.io/hts-specs/SAMv1.pdf
492
493 -----
494
495 **Selecting reference genomes for Bowtie2**
496
497 Galaxy wrapper for Bowtie2 allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options:
498
499 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bowtie2-build utility and are ready to be mapped against.
500 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using bowtie2-build command, and then run mapping with bowtie2.
501
502 If your genome of interest is not listed here you have two choices:
503
504 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added
505 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option.
278 506
279 ------ 507 ------
280 508
509 .. class:: infomark
510
511 **Bowtie2 options**
512
513 Galaxy wrapper for Bowtie2 implements most but not all options available through the command line. Supported options are described below.
514
515 -----
516
281 **Inputs** 517 **Inputs**
282 518
283 Bowtie 2 accepts files in Sanger FASTQ format (single or pair-end). Use the FASTQ Groomer to prepare your files. 519 Bowtie 2 accepts files in Sanger FASTQ format (single or pair-end). Use the FASTQ Groomer to prepare your files.
284 520
285 ------ 521 ------
286 522
287 **Outputs** 523 **Input options**::
288 524
289 The mapped sequence reads are provided as BAM file, while unmapped reads are optionally available as SAM records. When Bowtie 2 finishes running, it prints messages summarizing what happened. These messages are printed to the "standard error" ("stderr") filehandle. For datasets consisting of unpaired reads, the summary might look like this:: 525 -s/--skip &lt;int&gt;
290 526 Skip (i.e. do not align) the first `&lt;int&gt;` reads or pairs in the input.
291 20000 reads; of these: 527
292 20000 (100.00%) were unpaired; of these: 528 -u/--qupto &lt;int&gt;
293 1247 (6.24%) aligned 0 times 529 Align the first `&lt;int&gt;` reads or read pairs from the input (after the
294 18739 (93.69%) aligned exactly 1 time 530 `-s`/`--skip` reads or pairs have been skipped), then stop. Default: no limit.
295 14 (0.07%) aligned >1 times 531
296 93.77% overall alignment rate 532 -5/--trim5 &lt;int&gt;
297 533 Trim `&lt;int&gt;` bases from 5' (left) end of each read before alignment (default: 0).
534
535 -3/--trim3 &lt;int&gt;
536 Trim `&lt;int&gt;` bases from 3' (right) end of each read before alignment (default: 0).
537
538 --phred33
539 Input qualities are ASCII chars equal to the Phred quality plus 33. This is
540 also called the "Phred+33" encoding, which is used by the very latest Illumina
541 pipelines.
542
543 --phred64
544 Input qualities are ASCII chars equal to the [Phred quality] plus 64. This is
545 also called the "Phred+64" encoding.
546
547 --solexa-quals
548 Convert input qualities from Solexa Phred quality (which can be negative) to
549 Phred Phred quality (which can't). This scheme was used in older Illumina GA
550 Pipeline versions (prior to 1.3). Default: off.
551
552 --int-quals
553 Quality values are represented in the read input file as space-separated ASCII integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`....
554 Integers are treated as being on the [Phred quality] scale unless
555 `--solexa-quals` is also specified. Default: off.
556
298 ------ 557 ------
299 558
300 **Alignment options** 559 **Presets in `--end-to-end` mode**::
301 560
302 *--end-to-end/--local* 561 --very-fast
303 562 Same as: `-D 5 -R 1 -N 0 -L 22 -i S,0,2.50`
304 By default, Bowtie 2 performs end-to-end read alignment. That is, it searches for alignments involving all of the read characters. This is also called an "untrimmed" or "unclipped" alignment. When the --local option is specified, Bowtie 2 performs local read alignment. In this mode, Bowtie 2 might "trim" or "clip" some read characters from one or both ends of the alignment if doing so maximizes the alignment score. 563
305 564 --fast
306 End-to-end alignment example:: 565 Same as: `-D 10 -R 2 -N 0 -L 22 -i S,0,2.50`
307 566
308 Read GACTGGGCGATCTCGACTTCG 567 --sensitive
309 ||||| |||||||||||||| 568 Same as: `-D 15 -R 2 -L 22 -i S,1,1.15` (default in `--end-to-end` mode)
310 Reference GACTG--CGATCTCGACATCG 569
311 570 --very-sensitive
312 Local alignment example:: 571 Same as: `-D 20 -R 3 -N 0 -L 20 -i S,1,0.50`
313
314 Read ACGGTTGCGTTAA-TCCGCCACG
315 ||||||||| ||||||
316 Reference TAACTTGCGTTAAATCCGCCTGG
317
318 *-s/--skip (default: 0)*
319
320 Skip (i.e. do not align) the first n-reads or pairs in the input.
321
322 *-u/--qupto (default: no limit)*
323
324 Align the first n-reads or read pairs from the input (after the -s/--skip reads or pairs have been skipped), then stop.
325
326 *-5/--trim5 (default: 0)*
327
328 Trim n-bases from 5' (left) end of each read before alignment.
329
330 *-3/--trim3 (default: 0)*
331
332 Trim n-bases from 3' (right) end of each read before alignment.
333
334 *--nofw/--norc (default: both strands enabled)*
335
336 If --nofw is specified, Bowtie 2 will not attempt to align unpaired reads to the forward (Watson) reference strand. If --norc is specified, bowtie2 will not attempt to align unpaired reads against the reverse-complement (Crick) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes Bowtie 2 to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default: both strands enabled.
337
338 *--gbar (default: 4)*
339
340 Disallow gaps within n-positions of the beginning or end of the read.
341 572
342 ------ 573 ------
343 574
344 **Paired-end options** 575 **Presets options in `--local` mode**::
345 576
346 *-I/--minins (default: 0)* 577 --very-fast-local
347 578 Same as: `-D 5 -R 1 -N 0 -L 25 -i S,1,2.00`
348 The minimum fragment length for valid paired-end alignments. E.g. if -I 60 is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as -X is also satisfied). A 19-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -I constraint is applied with respect to the untrimmed mates. 579
349 580 --fast-local
350 The larger the difference between -I and -X, the slower Bowtie 2 will run. This is because larger differences bewteen -I and -X require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. 581 Same as: `-D 10 -R 2 -N 0 -L 22 -i S,1,1.75`
351 582
352 *-X/--maxins (default: 0)* 583 --sensitive-local
353 584 Same as: `-D 15 -R 2 -N 0 -L 20 -i S,1,0.75` (default in `--local` mode)
354 The maximum fragment length for valid paired-end alignments. E.g. if -X 100 is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates. 585
355 586 --very-sensitive-local
356 The larger the difference between -I and -X, the slower Bowtie 2 will run. This is because larger differences bewteen -I and -X require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. 587 Same as: `-D 20 -R 3 -N 0 -L 20 -i S,1,0.50`
357 588
358 ------ 589 ------
359 590
360 **SAM options** 591 **Alignment options**::
361 592
362 *--rg-id [text]* 593 -N &lt;int&gt;
363 594 Sets the number of mismatches to allowed in a seed alignment during [multiseed
364 Set the read group ID to [text]. This causes the SAM @RG header line to be printed, with [text] as the value associated with the ID: tag. It also causes the RG:Z: extra field to be attached to each SAM output record, with value set to [text]. 595 alignment]. Can be set to 0 or 1. Setting this higher makes alignment slower
365 596 (often much slower) but increases sensitivity. Default: 0.
366 *--rg [text]* 597
367 598 -L &lt;int&gt;
368 Add [text] as a field on the @RG header line. Note: in order for the @RG line to appear, --rg-id must also be specified. This is because the ID tag is required by the SAM Spec. Specify --rg multiple times to set multiple fields. See the SAM Spec for details about what fields are legal. 599 Sets the length of the seed substrings to align during [multiseed alignment].
369 600 Smaller values make alignment slower but more senstive. Default: the
601 `--sensitive` preset is used by default, which sets `-L` to 20 both in
602 `--end-to-end` mode and in `--local` mode.
603
604 -i &lt;func&gt;
605 Sets a function governing the interval between seed substrings to use during
606 [multiseed alignment]. For instance, if the read has 30 characers, and seed
607 length is 10, and the seed interval is 6, the seeds extracted will be:
608
609 Read: TAGCTACGCTCTACGCTATCATGCATAAAC
610 Seed 1 fw: TAGCTACGCT
611 Seed 1 rc: AGCGTAGCTA
612 Seed 2 fw: CGCTCTACGC
613 Seed 2 rc: GCGTAGAGCG
614 Seed 3 fw: ACGCTATCAT
615 Seed 3 rc: ATGATAGCGT
616 Seed 4 fw: TCATGCATAA
617 Seed 4 rc: TTATGCATGA
618
619 Since it's best to use longer intervals for longer reads, this parameter sets
620 the interval as a function of the read length, rather than a single
621 one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the
622 interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length.
623 See also: [setting function options]. If the function returns a result less than
624 1, it is rounded up to 1. Default: the `--sensitive` preset is used by
625 default, which sets `-i` to `S,1,1.15` in `--end-to-end` mode to `-i S,1,0.75`
626 in `--local` mode.
627
628 --n-ceil &lt;func&gt;
629 Sets a function governing the maximum number of ambiguous characters (usually
630 `N`s and/or `.`s) allowed in a read as a function of read length. For instance,
631 specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`,
632 where x is the read length. See also: [setting function options]. Reads
633 exceeding this ceiling are [filtered out]. Default: `L,0,0.15`.
634
635 --dpad &lt;int&gt;
636 "Pads" dynamic programming problems by `&lt;int&gt;` columns on either side to allow
637 gaps. Default: 15.
638
639 --gbar &lt;int&gt;
640 Disallow gaps within `&lt;int&gt;` positions of the beginning or end of the read.
641 Default: 4.
642
643 --ignore-quals
644 When calculating a mismatch penalty, always consider the quality value at the
645 mismatched position to be the highest possible, regardless of the actual value.
646 I.e. input is treated as though all quality values are high. This is also the
647 default behavior when the input doesn't specify quality values (e.g. in `-f`,
648 `-r`, or `-c` modes).
649
650 --nofw/--norc
651 If `--nofw` is specified, `bowtie2` will not attempt to align unpaired reads to
652 the forward (Watson) reference strand. If `--norc` is specified, `bowtie2` will
653 not attempt to align unpaired reads against the reverse-complement (Crick)
654 reference strand. In paired-end mode, `--nofw` and `--norc` pertain to the
655 fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those
656 paired-end configurations corresponding to fragments from the reverse-complement
657 (Crick) strand. Default: both strands enabled.
658
659 --no-1mm-upfront
660 By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch
661 end-to-end alignment for the read *before* trying the [multiseed heuristic]. Such
662 alignments can be found very quickly, and many short read alignments have exact or
663 near-exact end-to-end alignments. However, this can lead to unexpected
664 alignments when the user also sets options governing the [multiseed heuristic],
665 like `-L` and `-N`. For instance, if the user specifies `-N 0` and `-L` equal
666 to the length of the read, the user will be surprised to find 1-mismatch alignments
667 reported. This option prevents Bowtie 2 from searching for 1-mismatch end-to-end
668 alignments before using the [multiseed heuristic], which leads to the expected
669 behavior when combined with options such as `-L` and `-N`. This comes at the
670 expense of speed.
671
672 --end-to-end
673 In this mode, Bowtie 2 requires that the entire read align from one end to the
674 other, without any trimming (or "soft clipping") of characters from either end.
675 The match bonus `--ma` always equals 0 in this mode, so all alignment scores
676 are less than or equal to 0, and the greatest possible alignment score is 0.
677 This is mutually exclusive with `--local`. `--end-to-end` is the default mode.
678
679 --local
680 In this mode, Bowtie 2 does not require that the entire read align from one end
681 to the other. Rather, some characters may be omitted ("soft clipped") from the
682 ends in order to achieve the greatest possible alignment score. The match bonus
683 `--ma` is used in this mode, and the best possible alignment score is equal to
684 the match bonus (`--ma`) times the length of the read. Specifying `--local`
685 and one of the presets (e.g. `--local --very-fast`) is equivalent to specifying
686 the local version of the preset (`--very-fast-local`). This is mutually
687 exclusive with `--end-to-end`. `--end-to-end` is the default mode.
688
689 -----
690
691 **Scoring options**::
692
693 --ma &lt;int&gt;
694 Sets the match bonus. In `--local` mode `&lt;int&gt;` is added to the alignment
695 score for each position where a read character aligns to a reference character
696 and the characters match. Not used in `--end-to-end` mode. Default: 2.
697
698 --mp MX,MN
699 Sets the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers. A
700 number less than or equal to `MX` and greater than or equal to `MN` is
701 subtracted from the alignment score for each position where a read character
702 aligns to a reference character, the characters do not match, and neither is an
703 `N`. If `--ignore-quals` is specified, the number subtracted quals `MX`.
704 Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )`
705 where Q is the Phred quality value. Default: `MX` = 6, `MN` = 2.
706
707 --np &lt;int&gt;
708 Sets penalty for positions where the read, reference, or both, contain an
709 ambiguous character such as `N`. Default: 1.
710
711 --rdg &lt;int1&gt;,&lt;int2&gt;
712 Sets the read gap open (`&lt;int1&gt;`) and extend (`&lt;int2&gt;`) penalties. A read gap of
713 length N gets a penalty of `&lt;int1&gt;` + N * `&lt;int2&gt;`. Default: 5, 3.
714
715 --rfg &lt;int1&gt;,&lt;int2&gt;
716 Sets the reference gap open (`&lt;int1&gt;`) and extend (`&lt;int2&gt;`) penalties. A
717 reference gap of length N gets a penalty of `&lt;int1&gt;` + N * `&lt;int2&gt;`. Default:
718 5, 3.
719
720 --score-min &lt;func&gt;
721 Sets a function governing the minimum alignment score needed for an alignment to
722 be considered "valid" (i.e. good enough to report). This is a function of read
723 length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f`
724 to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting
725 function options]. The default in `--end-to-end` mode is `L,-0.6,-0.6` and
726 the default in `--local` mode is `G,20,8`.
727
728 -----
729
730 **Reporting options**::
731
732 -k &lt;int&gt;
733 By default, `bowtie2` searches for distinct, valid alignments for each read.
734 When it finds a valid alignment, it continues looking for alignments that are
735 nearly as good or better. The best alignment found is reported (randomly
736 selected from among best if tied). Information about the best alignments is
737 used to estimate mapping quality and to set SAM optional fields, such as
738 `AS:i` and `XS:i`.
739
740 When `-k` is specified, however, `bowtie2` behaves differently. Instead, it
741 searches for at most `&lt;int&gt;` distinct, valid alignments for each read. The
742 search terminates when it can't find more distinct valid alignments, or when it
743 finds `&lt;int&gt;`, whichever happens first. All alignments found are reported in
744 descending order by alignment score. The alignment score for a paired-end
745 alignment equals the sum of the alignment scores of the individual mates. Each
746 reported read or pair alignment beyond the first has the SAM 'secondary' bit
747 (which equals 256) set in its FLAGS field. For reads that have more than
748 `&lt;int&gt;` distinct, valid alignments, `bowtie2` does not guarantee that the
749 `&lt;int&gt;` alignments reported are the best possible in terms of alignment score.
750 `-k` is mutually exclusive with `-a`.
751
752 Note: Bowtie 2 is not designed with large values for `-k` in mind, and when
753 aligning reads to long, repetitive genomes large `-k` can be very, very slow.
754
755 -a
756 Like `-k` but with no upper limit on number of alignments to search for. `-a`
757 is mutually exclusive with `-k`.
758
759 Note: Bowtie 2 is not designed with `-a` mode in mind, and when
760 aligning reads to long, repetitive genomes this mode can be very, very slow.
761
762 -----
763
764 **Effort options**::
765
766 -D &lt;int&gt;
767 Up to `&lt;int&gt;` consecutive seed extension attempts can "fail" before Bowtie 2
768 moves on, using the alignments found so far. A seed extension "fails" if it
769 does not yield a new best or a new second-best alignment. This limit is
770 automatically adjusted up when -k or -a are specified. Default: 15.
771
772 -R &lt;int&gt;
773 `&lt;int&gt;` is the maximum number of times Bowtie 2 will "re-seed" reads with
774 repetitive seeds. When "re-seeding," Bowtie 2 simply chooses a new set of reads
775 (same length, same number of mismatches allowed) at different offsets and
776 searches for more alignments. A read is considered to have repetitive seeds if
777 the total number of seed hits divided by the number of seeds that aligned at
778 least once is greater than 300. Default: 2.
779
780 -----
781
782 **Paired-end options**::
783
784 -I/--minins &lt;int&gt;
785 The minimum fragment length for valid paired-end alignments. E.g. if `-I 60` is
786 specified and a paired-end alignment consists of two 20-bp alignments in the
787 appropriate orientation with a 20-bp gap between them, that alignment is
788 considered valid (as long as `-X` is also satisfied). A 19-bp gap would not
789 be valid in that case. If trimming options `-3` or `-5` are also used, the
790 `-I` constraint is applied with respect to the untrimmed mates.
791
792 The larger the difference between `-I` and `-X`, the slower Bowtie 2 will
793 run. This is because larger differences bewteen `-I` and `-X` require that
794 Bowtie 2 scan a larger window to determine if a concordant alignment exists.
795 For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very
796 efficient.
797
798 Default: 0 (essentially imposing no minimum)
799
800 -X/--maxins &lt;int&gt;
801 The maximum fragment length for valid paired-end alignments. E.g. if `-X 100`
802 is specified and a paired-end alignment consists of two 20-bp alignments in the
803 proper orientation with a 60-bp gap between them, that alignment is considered
804 valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in
805 that case. If trimming options `-3` or `-5` are also used, the `-X`
806 constraint is applied with respect to the untrimmed mates, not the trimmed
807 mates.
808
809 The larger the difference between `-I` and `-X`, the slower Bowtie 2 will
810 run. This is because larger differences bewteen `-I` and `-X` require that
811 Bowtie 2 scan a larger window to determine if a concordant alignment exists.
812 For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very
813 efficient.
814
815 Default: 500.
816
817 --fr/--rf/--ff
818 The upstream/downstream mate orientations for a valid paired-end alignment
819 against the forward reference strand. E.g., if `--fr` is specified and there is
820 a candidate paired-end alignment where mate 1 appears upstream of the reverse
821 complement of mate 2 and the fragment length constraints (`-I` and `-X`) are
822 met, that alignment is valid. Also, if mate 2 appears upstream of the reverse
823 complement of mate 1 and all other constraints are met, that too is valid.
824 `--rf` likewise requires that an upstream mate1 be reverse-complemented and a
825 downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1
826 and a downstream mate 2 to be forward-oriented. Default: `--fr` (appropriate
827 for Illumina's Paired-end Sequencing Assay).
828
829 --no-mixed
830 By default, when `bowtie2` cannot find a concordant or discordant alignment for
831 a pair, it then tries to find alignments for the individual mates. This option
832 disables that behavior.
833
834 --no-discordant
835 By default, `bowtie2` looks for discordant alignments if it cannot find any
836 concordant alignments. A discordant alignment is an alignment where both mates
837 align uniquely, but that does not satisfy the paired-end constraints
838 (`--fr`/`--rf`/`--ff`, `-I`, `-X`). This option disables that behavior.
839
840 --dovetail
841 If the mates "dovetail", that is if one mate alignment extends past the
842 beginning of the other such that the wrong mate begins upstream, consider that
843 to be concordant. See also: [Mates can overlap, contain or dovetail each
844 other]. Default: mates cannot dovetail in a concordant alignment.
845
846 --no-contain
847 If one mate alignment contains the other, consider that to be non-concordant.
848 See also: [Mates can overlap, contain or dovetail each other]. Default: a mate
849 can contain the other in a concordant alignment.
850
851 --no-overlap
852 If one mate alignment overlaps the other at all, consider that to be
853 non-concordant. See also: [Mates can overlap, contain or dovetail each other].
854 Default: mates can overlap in a concordant alignment.
855
370 ------ 856 ------
371 857
372 **Output options** 858 **SAM options**::
373 859
374 *--un/--un-conc* 860 --rg-id &lt;text&gt;
375 861 Set the read group ID to `&lt;text&gt;`. This causes the SAM `@RG` header line to be
376 Write reads that fail to align concordantly to file(s). These reads correspond to the SAM records. 862 printed, with `&lt;text&gt;` as the value associated with the `ID:` tag. It also
377 863 causes the `RG:Z:` extra field to be attached to each SAM output record, with
378 *-t/--time (default: off)* 864 value set to `&lt;text&gt;`.
379 865
380 Print the wall-clock time required to load the index files and align the reads. This is printed to the "standard error" ("stderr") filehandle. 866 --rg &lt;text&gt;
867 Add `&lt;text&gt;` (usually of the form `TAG:VAL`, e.g. `SM:Pool1`) as a field on the
868 `@RG` header line. Note: in order for the `@RG` line to appear, `--rg-id`
869 must also be specified. This is because the `ID` tag is required by the [SAM
870 Spec][SAM]. Specify `--rg` multiple times to set multiple fields. See the
871 [SAM Spec][SAM] for details about what fields are legal.
872
873 --omit-sec-seq
874 When printing secondary alignments, Bowtie 2 by default will write out the `SEQ`
875 and `QUAL` strings. Specifying this option causes Bowtie 2 to print an asterix
876 in those fields instead.
877
878 -----
879
880 **Other options**::
881
882 --reorder
883 Guarantees that output SAM records are printed in an order corresponding to the
884 order of the reads in the original input file, even when `-p` is set greater
885 than 1. Specifying `--reorder` and setting `-p` greater than 1 causes Bowtie
886 2 to run somewhat slower and use somewhat more memory then if `--reorder` were
887 not specified. Has no effect if `-p` is set to 1, since output order will
888 naturally correspond to input order in that case.
889
890 --seed &lt;int&gt;
891 Use `&lt;int&gt;` as the seed for pseudo-random number generator. Default: 0.
892
893 --non-deterministic
894 Normally, Bowtie 2 re-initializes its pseudo-random generator for each read. It
895 seeds the generator with a number derived from (a) the read name, (b) the
896 nucleotide sequence, (c) the quality sequence, (d) the value of the `--seed`
897 option. This means that if two reads are identical (same name, same
898 nucleotides, same qualities) Bowtie 2 will find and report the same alignment(s)
899 for both, even if there was ambiguity. When `--non-deterministic` is specified,
900 Bowtie 2 re-initializes its pseudo-random generator for each read using the
901 current time. This means that Bowtie 2 will not necessarily report the same
902 alignment for two identical reads. This is counter-intuitive for some users,
903 but might be more appropriate in situations where the input consists of many
904 identical reads.
381 905
382 </help> 906 </help>
907 <citations>
908 <citation type="doi">10.1186/gb-2009-10-3-r25</citation>
909 <citation type="doi">10.1038/nmeth.1923</citation>
910 </citations>
383 </tool> 911 </tool>