annotate bwa-mem.xml @ 0:5e72d136a39e draft

Uploaded
author devteam
date Mon, 29 Sep 2014 16:22:24 -0400
parents
children 86c73f0eb389
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5e72d136a39e Uploaded
devteam
parents:
diff changeset
1 <?xml version="1.0"?>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
2 <tool id="bwa_mem_0_7_10" name="BWA-MEM" version="bwa-0.7.10-r837-dirty_galaxy_0.1">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
3 <requirements>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
4 <requirement type="package" version="0.7.10.039ea20639">bwa</requirement>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
5 <requirement type="package" version="1.1">samtools</requirement>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
6 </requirements>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
7 <description>- map medium and long reads (&gt; 100 bp) against reference genome</description>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
8 <command>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
9
5e72d136a39e Uploaded
devteam
parents:
diff changeset
10 #set $reference_fasta_filename = "localref.fa"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
11
5e72d136a39e Uploaded
devteam
parents:
diff changeset
12 #if str( $reference_source.reference_source_selector ) == "history":
5e72d136a39e Uploaded
devteam
parents:
diff changeset
13
5e72d136a39e Uploaded
devteam
parents:
diff changeset
14 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
5e72d136a39e Uploaded
devteam
parents:
diff changeset
15
5e72d136a39e Uploaded
devteam
parents:
diff changeset
16 ## The following shell commands decide with of the BWA indexing algorithms (IS or BWTSW) will be run
5e72d136a39e Uploaded
devteam
parents:
diff changeset
17 ## depending ob the size of the input FASTA dataset
5e72d136a39e Uploaded
devteam
parents:
diff changeset
18
5e72d136a39e Uploaded
devteam
parents:
diff changeset
19 (
5e72d136a39e Uploaded
devteam
parents:
diff changeset
20 size=`stat -c %s "${reference_fasta_filename}" 2&gt;/dev/null`; ## Linux
5e72d136a39e Uploaded
devteam
parents:
diff changeset
21 if [ $? -eq 0 ];
5e72d136a39e Uploaded
devteam
parents:
diff changeset
22 then
5e72d136a39e Uploaded
devteam
parents:
diff changeset
23 if [ \$size -lt 2000000000 ];
5e72d136a39e Uploaded
devteam
parents:
diff changeset
24 then
5e72d136a39e Uploaded
devteam
parents:
diff changeset
25 bwa index -a is "${reference_fasta_filename}";
5e72d136a39e Uploaded
devteam
parents:
diff changeset
26 echo "Generating BWA index with is algorithm";
5e72d136a39e Uploaded
devteam
parents:
diff changeset
27 else
5e72d136a39e Uploaded
devteam
parents:
diff changeset
28 bwa index -a bwtsw "${reference_fasta_filename}";
5e72d136a39e Uploaded
devteam
parents:
diff changeset
29 echo "Generating BWA index with bwtsw algorithm";
5e72d136a39e Uploaded
devteam
parents:
diff changeset
30 fi;
5e72d136a39e Uploaded
devteam
parents:
diff changeset
31 fi;
5e72d136a39e Uploaded
devteam
parents:
diff changeset
32
5e72d136a39e Uploaded
devteam
parents:
diff changeset
33 eval \$(stat -s "${reference_fasta_filename}"); ## OSX
5e72d136a39e Uploaded
devteam
parents:
diff changeset
34 if [ $? -eq 0 ];
5e72d136a39e Uploaded
devteam
parents:
diff changeset
35 then
5e72d136a39e Uploaded
devteam
parents:
diff changeset
36 if [ \$st_size -lt 2000000000 ];
5e72d136a39e Uploaded
devteam
parents:
diff changeset
37 then
5e72d136a39e Uploaded
devteam
parents:
diff changeset
38 bwa index -a is "${reference_fasta_filename}";
5e72d136a39e Uploaded
devteam
parents:
diff changeset
39 echo "Generating BWA index with is algorithm";
5e72d136a39e Uploaded
devteam
parents:
diff changeset
40 else
5e72d136a39e Uploaded
devteam
parents:
diff changeset
41 bwa index -a bwtsw "${reference_fasta_filename}";
5e72d136a39e Uploaded
devteam
parents:
diff changeset
42 echo "Generating BWA index with bwtsw algorithm";
5e72d136a39e Uploaded
devteam
parents:
diff changeset
43 fi;
5e72d136a39e Uploaded
devteam
parents:
diff changeset
44 fi;
5e72d136a39e Uploaded
devteam
parents:
diff changeset
45 ) &amp;&amp;
5e72d136a39e Uploaded
devteam
parents:
diff changeset
46
5e72d136a39e Uploaded
devteam
parents:
diff changeset
47 #else:
5e72d136a39e Uploaded
devteam
parents:
diff changeset
48 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
5e72d136a39e Uploaded
devteam
parents:
diff changeset
49 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
50
5e72d136a39e Uploaded
devteam
parents:
diff changeset
51 ## Begin BWA-MEM command line
5e72d136a39e Uploaded
devteam
parents:
diff changeset
52
5e72d136a39e Uploaded
devteam
parents:
diff changeset
53 bwa mem
5e72d136a39e Uploaded
devteam
parents:
diff changeset
54 -t "\${GALAXY_SLOTS:-1}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
55 -v 1 ## Verbosity is set to 1 (errors only)
5e72d136a39e Uploaded
devteam
parents:
diff changeset
56
5e72d136a39e Uploaded
devteam
parents:
diff changeset
57 #if str( $fastq_input.fastq_input_selector ) == "paired_iv": ## For interleaved fastq files set -p option
5e72d136a39e Uploaded
devteam
parents:
diff changeset
58 -p
5e72d136a39e Uploaded
devteam
parents:
diff changeset
59 #if str( $fastq_input.iv_stats.iv_stats_selector ) == "True": ## check that insert statistics is used
5e72d136a39e Uploaded
devteam
parents:
diff changeset
60 -I "${fastq_input.iv_stats.iset_stats}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
61 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
62 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
63
5e72d136a39e Uploaded
devteam
parents:
diff changeset
64 #if str( $analysis_type.analysis_type_selector ) == "pacbio":
5e72d136a39e Uploaded
devteam
parents:
diff changeset
65 -x
5e72d136a39e Uploaded
devteam
parents:
diff changeset
66
5e72d136a39e Uploaded
devteam
parents:
diff changeset
67 #elif str( $analysis_type.analysis_type_selector ) == "full":
5e72d136a39e Uploaded
devteam
parents:
diff changeset
68
5e72d136a39e Uploaded
devteam
parents:
diff changeset
69 #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "True": ## Algorithmic options
5e72d136a39e Uploaded
devteam
parents:
diff changeset
70
5e72d136a39e Uploaded
devteam
parents:
diff changeset
71 -k "${analysis_type.algorithmic_options.k}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
72 -w "${analysis_type.algorithmic_options.w}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
73 -d "${analysis_type.algorithmic_options.d}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
74 -r "${analysis_type.algorithmic_options.r}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
75 -y "${analysis_type.algorithmic_options.y}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
76 -c "${analysis_type.algorithmic_options.c}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
77 -D "${analysis_type.algorithmic_options.D}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
78 -W "${analysis_type.algorithmic_options.W}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
79 -m "${analysis_type.algorithmic_options.m}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
80 ${analysis_type.algorithmic_options.S}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
81 ${analysis_type.algorithmic_options.P}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
82 ${analysis_type.algorithmic_options.e}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
83
5e72d136a39e Uploaded
devteam
parents:
diff changeset
84 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
85
5e72d136a39e Uploaded
devteam
parents:
diff changeset
86 #if str( $analysis_type.scoring_options.scoring_options_selector ) == "True": ## Scoring options
5e72d136a39e Uploaded
devteam
parents:
diff changeset
87
5e72d136a39e Uploaded
devteam
parents:
diff changeset
88 -A "${analysis_type.scoring_options.A}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
89 -B "${analysis_type.scoring_options.B}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
90 -O "${analysis_type.scoring_options.O}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
91 -E "${analysis_type.scoring_options.E}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
92 -L "${analysis_type.scoring_options.L}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
93 -U "${analysis_type.scoring_options.U}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
94
5e72d136a39e Uploaded
devteam
parents:
diff changeset
95 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
96
5e72d136a39e Uploaded
devteam
parents:
diff changeset
97 #if str( $analysis_type.io_options.io_options_selector ) == "True": ## IO options
5e72d136a39e Uploaded
devteam
parents:
diff changeset
98
5e72d136a39e Uploaded
devteam
parents:
diff changeset
99 -T "${analysis_type.io_options.T}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
100 -h "${analysis_type.io_options.h}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
101 ${analysis_type.io_options.a}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
102 ${analysis_type.io_options.C}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
103 ${analysis_type.io_options.V}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
104 ${analysis_type.io_options.Y}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
105 ${analysis_type.io_options.M}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
106
5e72d136a39e Uploaded
devteam
parents:
diff changeset
107 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
108
5e72d136a39e Uploaded
devteam
parents:
diff changeset
109 #elif str( $analysis_type.analysis_type_selector ) == "cline":
5e72d136a39e Uploaded
devteam
parents:
diff changeset
110
5e72d136a39e Uploaded
devteam
parents:
diff changeset
111 ${analysis_type.cline}
5e72d136a39e Uploaded
devteam
parents:
diff changeset
112
5e72d136a39e Uploaded
devteam
parents:
diff changeset
113 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
114
5e72d136a39e Uploaded
devteam
parents:
diff changeset
115 #if str( $rg.rg_selector ) == "True":
5e72d136a39e Uploaded
devteam
parents:
diff changeset
116 -R "@RG\tID:$rg.ID\tSM:$rg.SM"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
117 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
118
5e72d136a39e Uploaded
devteam
parents:
diff changeset
119 #if str( $fastq_input.fastq_input_selector ) == "paired":
5e72d136a39e Uploaded
devteam
parents:
diff changeset
120
5e72d136a39e Uploaded
devteam
parents:
diff changeset
121 #if str( $fastq_input.paired_stats.paired_stats_selector ) == "True": ## check that insert statistics is used
5e72d136a39e Uploaded
devteam
parents:
diff changeset
122 -I "${fastq_input.paired_stats.iset_stats}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
123 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
124
5e72d136a39e Uploaded
devteam
parents:
diff changeset
125 "${reference_fasta_filename}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
126
5e72d136a39e Uploaded
devteam
parents:
diff changeset
127 "${fastq_input.fastq_input1}" "${fastq_input.fastq_input2}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
128
5e72d136a39e Uploaded
devteam
parents:
diff changeset
129 #else:
5e72d136a39e Uploaded
devteam
parents:
diff changeset
130
5e72d136a39e Uploaded
devteam
parents:
diff changeset
131 "${reference_fasta_filename}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
132
5e72d136a39e Uploaded
devteam
parents:
diff changeset
133 "${fastq_input.fastq_input1}"
5e72d136a39e Uploaded
devteam
parents:
diff changeset
134
5e72d136a39e Uploaded
devteam
parents:
diff changeset
135 #end if
5e72d136a39e Uploaded
devteam
parents:
diff changeset
136
5e72d136a39e Uploaded
devteam
parents:
diff changeset
137 | samtools view -Sb - > $bam_output
5e72d136a39e Uploaded
devteam
parents:
diff changeset
138
5e72d136a39e Uploaded
devteam
parents:
diff changeset
139 </command>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
140
5e72d136a39e Uploaded
devteam
parents:
diff changeset
141 <inputs>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
142
5e72d136a39e Uploaded
devteam
parents:
diff changeset
143 <conditional name="reference_source">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
144 <param name="reference_source_selector" type="select" label="Load reference genome from">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
145 <option value="cached">Local cache</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
146 <option value="history">History</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
147 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
148 <when value="cached">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
149 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
150 <options from_data_table="bwa_mem_indexes">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
151 <filter type="sort_by" column="2" />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
152 <validator type="no_options" message="No indexes are available" />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
153 </options>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
154 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
155 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
156 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
157 <when value="history">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
158 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
159 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
160 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
161 <conditional name="fastq_input">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
162 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
163 <option value="paired">Paired</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
164 <option value="single">Single</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
165 <option value="paired_iv">Paired Interleaved</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
166 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
167 <when value="paired">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
168 <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
169 <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
170
5e72d136a39e Uploaded
devteam
parents:
diff changeset
171 <!-- PE stat selection block 1: If you make any changes in this conditional block, copy them to PE stat selection block 2 below as well -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
172
5e72d136a39e Uploaded
devteam
parents:
diff changeset
173 <conditional name="paired_stats">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
174 <param name="paired_stats_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Specify insert size statistics?" help="-I; if you choose to not specify, it will be inferred from the data"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
175 <when value="set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
176
5e72d136a39e Uploaded
devteam
parents:
diff changeset
177 <param name="iset_stats" type="text" value="250" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths in the form mean,sd,min,max" help="-I; only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
178 <sanitizer invalid_char="">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
179 <valid initial="string.digits"><add value=","/> </valid>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
180 </sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
181 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
182
5e72d136a39e Uploaded
devteam
parents:
diff changeset
183 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
184 <when value="do_not_set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
185 <!-- do nothing -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
186 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
187 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
188
5e72d136a39e Uploaded
devteam
parents:
diff changeset
189 <!-- end of PE stat selection block 1 -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
190
5e72d136a39e Uploaded
devteam
parents:
diff changeset
191 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
192 <when value="single">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
193 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
194 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
195 <when value="paired_iv">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
196 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with interleaved reads"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
197
5e72d136a39e Uploaded
devteam
parents:
diff changeset
198 <!-- PE stat selection block 2: If you make any changes in this conditional block, copy them to PE stat selection block 1 above as well -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
199
5e72d136a39e Uploaded
devteam
parents:
diff changeset
200 <conditional name="iv_stats">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
201 <param name="iv_stats_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Specify insert size statistics?" help="-I; if you choose to not specify, it will be inferred from the data"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
202 <when value="set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
203
5e72d136a39e Uploaded
devteam
parents:
diff changeset
204 <param name="iset_stats" type="text" value="250" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths in the form mean,sd,min,max" help="-I; only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
205 <sanitizer invalid_char="">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
206 <valid initial="string.digits"><add value=","/> </valid>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
207 </sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
208 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
209
5e72d136a39e Uploaded
devteam
parents:
diff changeset
210 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
211 <when value="do_not_set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
212 <!-- do nothing -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
213 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
214 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
215
5e72d136a39e Uploaded
devteam
parents:
diff changeset
216 <!-- end of PE stat selection block 2 -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
217
5e72d136a39e Uploaded
devteam
parents:
diff changeset
218 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
219 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
220
5e72d136a39e Uploaded
devteam
parents:
diff changeset
221 <conditional name="rg">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
222 <param name="rg_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Specify readgroup information?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
223 <when value="set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
224 <param name="ID" type="text" value="readgroup1" size="20" label="Specify readgroup ID" help="This value must be unique among multiple samples in your experiment">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
225 <sanitizer invalid_char="">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
226 <valid initial="string.printable"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
227 </sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
228 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
229 <param name="SM" type="text" value="blood" size="20" label="Specify readgroup sample name (SM)" help="This value should be descriptive">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
230 <sanitizer invalid_char="">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
231 <valid initial="string.printable"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
232 </sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
233 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
234 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
235 <when value="do_not_set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
236 <!-- do nothing -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
237 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
238 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
239
5e72d136a39e Uploaded
devteam
parents:
diff changeset
240 <conditional name="analysis_type">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
241 <param name="analysis_type_selector" type="select" label="Select analysis mode">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
242 <option value="illumina">1.Simple Illumina mode</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
243 <option value="pacbio">2.PacBio mode</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
244 <option value="full">3.Full list of options</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
245 <option value="cline">4.Input parameters on the command line</option>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
246 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
247 <when value="illumina">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
248 <!-- do nothing -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
249 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
250 <when value="pacbio">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
251 <!-- do nothing. all magic happens within <command> tag -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
252 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
253 <when value="full">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
254 <conditional name="algorithmic_options">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
255 <param name="algorithmic_options_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options." />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
256 <when value="set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
257 <param name="k" type="integer" value="19" label="minimum seed length" help="-k; default=19"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
258 <param name="w" type="integer" value="100" label="band width for banded alignment" help="-w; default=100"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
259 <param name="d" type="integer" value="100" label="off-diagonal X-dropoff" help="-d; default=100"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
260 <param name="r" type="float" value="1.5" label="look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
261 <param name="y" type="integer" value="0" label="find maximum exact matches (MEMs) longer than -k * -r with size less than THIS VALUE" help="-y; default=0"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
262 <param name="c" type="integer" value="500" label="skip seeds with more than that many occurrences" help="-c; default=500"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
263 <param name="D" type="float" value="0.5" label="drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
264 <param name="W" type="integer" value="0" label="discard a chain if seeded bases shorter than" help="-W; default=0"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
265 <param name="m" type="integer" value="50" label="perform at most this many rounds of mate rescues for each read" help="-m; default=50"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
266 <param name="S" type="boolean" truevalue="-S" falsevalue="" label="skip mate rescue" help="-S"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
267 <param name="P" type="boolean" truevalue="-P" falsevalue="" label="skip pairing; mate rescue performed unless -S also in use" help="-P"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
268 <param name="e" type="boolean" truevalue="-e" falsevalue="" label="discard full-length exact matches" help="-e"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
269 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
270 <when value="do_not_set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
271 <!-- do nothing -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
272 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
273 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
274 <conditional name="scoring_options">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
275 <param name="scoring_options_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options." />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
276 <when value="set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
277 <param name="A" type="integer" value="1" label="score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U; default=1"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
278 <param name="B" type="integer" value="4" label="penalty for mismatch" help="-B; default=4"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
279 <param name="O" type="text" value="6,6" label="gap open penalty for deletions and insertions" help="-O; default=6,6">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
280 <sanitizer invalid_char="">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
281 <valid initial="string.digits"><add value=","/> </valid>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
282 </sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
283 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
284 <param name="E" type="text" value="1,1" label="gap extension penalty; a gap of size k cost &#39;-O + -E*k&#39; " help="-E; default=1,1">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
285 <sanitizer invalid_char="">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
286 <valid initial="string.digits"><add value=","/> </valid>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
287 </sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
288 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
289 <param name="L" type="text" value="5,5" label="penalty for 5&#39;-end and 3&#39;-end clipping" help="-L; default=5,5">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
290 <sanitizer invalid_char="">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
291 <valid initial="string.digits"><add value=","/> </valid>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
292 </sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
293 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
294 <param name="U" type="integer" value="17" label="penalty for an unpaired read pair" help="-U; default=17"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
295 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
296 <when value="do_not_set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
297 <!-- do nothing -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
298 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
299 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
300 <conditional name="io_options">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
301 <param name="io_options_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options." />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
302 <when value="set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
303 <param name="T" type="integer" value="30" label="minimum score to output" help="-T; default=30"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
304 <param name="h" type="integer" value="5" label="if there are this many hits with score >80% of the max score, output all in XA tag" help="-h; default=5"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
305 <param name="a" type="boolean" truevalue="-a" falsevalue="" label="output all alignments for single-ends or unpaired paired-ends" help="-a"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
306 <param name="C" type="boolean" truevalue="-C" falsevalue="" label="append FASTA/FASTQ comment to BAM output" help="-C"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
307 <param name="V" type="boolean" truevalue="-V" falsevalue="" label="output the reference FASTA header in the XR tag" help="-C"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
308 <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="use soft clipping for supplementary alignments" help="-Y"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
309 <param name="M" type="boolean" truevalue="-M" falsevalue="" label="mark shorter split hits as secondary" help="-M"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
310 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
311 <when value="do_not_set">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
312 <!-- do nothing -->
5e72d136a39e Uploaded
devteam
parents:
diff changeset
313 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
314 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
315 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
316 <when value="cline">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
317 <param name="cline" size="60" type="text" value="-T 30 -c 250" label="Type command line options here" help="All paremeters that DO NOT involve filenames can be typed here.">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
318 <sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
319 <valid initial="string.printable">
5e72d136a39e Uploaded
devteam
parents:
diff changeset
320 <remove value="&apos;"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
321 </valid>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
322 </sanitizer>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
323 </param>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
324 </when>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
325 </conditional>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
326 </inputs>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
327
5e72d136a39e Uploaded
devteam
parents:
diff changeset
328 <outputs>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
329 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
330 </outputs>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
331
5e72d136a39e Uploaded
devteam
parents:
diff changeset
332 <tests>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
333 <test>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
334 <param name="reference_source_selector" value="history" />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
335 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
336 <param name="fastq_input_selector" value="paired"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
337 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
338 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
339 <param name="analysis_type_selector" value="illumina"/>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
340 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
341 </test>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
342 </tests>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
343 <stdio>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
344 <exit_code range="1:" />
5e72d136a39e Uploaded
devteam
parents:
diff changeset
345 </stdio>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
346 <help>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
347
5e72d136a39e Uploaded
devteam
parents:
diff changeset
348 **What is does**
5e72d136a39e Uploaded
devteam
parents:
diff changeset
349
5e72d136a39e Uploaded
devteam
parents:
diff changeset
350 From http://arxiv.org/abs/1303.3997:
5e72d136a39e Uploaded
devteam
parents:
diff changeset
351
5e72d136a39e Uploaded
devteam
parents:
diff changeset
352 BWA-MEM is a new alignment algorithm for aligning sequence reads or long query sequences against a large reference genome such as human.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
353 It automatically chooses between local and end-to-end alignments, supports paired-end reads and performs chimeric alignment.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
354 The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
355 For mapping 100bp sequences, BWA-MEM shows better performance than several state-of-art read aligners to date.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
356
5e72d136a39e Uploaded
devteam
parents:
diff changeset
357 It is best suited for mapping long (>70 nt) reads against large reference genomes.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
358
5e72d136a39e Uploaded
devteam
parents:
diff changeset
359 This Galaxy tool wraps bwa-mem module of bwa read mapping tool. Galaxy implementation takes fastq files as input and produces output in BAM (not SAM) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
5e72d136a39e Uploaded
devteam
parents:
diff changeset
360
5e72d136a39e Uploaded
devteam
parents:
diff changeset
361 -----
5e72d136a39e Uploaded
devteam
parents:
diff changeset
362
5e72d136a39e Uploaded
devteam
parents:
diff changeset
363 **Galaxy-specific option**
5e72d136a39e Uploaded
devteam
parents:
diff changeset
364
5e72d136a39e Uploaded
devteam
parents:
diff changeset
365 Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are:
5e72d136a39e Uploaded
devteam
parents:
diff changeset
366
5e72d136a39e Uploaded
devteam
parents:
diff changeset
367 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem &lt;reference index&gt; &lt;fastq dataset1&gt; [fastq dataset2]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
368 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 &lt;reference index&gt; &lt;PacBio dataset in fastq format&gt;
5e72d136a39e Uploaded
devteam
parents:
diff changeset
369 3. *Full list of options*: Allows access to all options through Galaxy interface.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
370 4. *Input parameters on the command line*: Similar to the choice above but for those who does not like clicking. Here options can be directly typed into a text box.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
371
5e72d136a39e Uploaded
devteam
parents:
diff changeset
372 ------
5e72d136a39e Uploaded
devteam
parents:
diff changeset
373
5e72d136a39e Uploaded
devteam
parents:
diff changeset
374 **BWA MEM options**
5e72d136a39e Uploaded
devteam
parents:
diff changeset
375
5e72d136a39e Uploaded
devteam
parents:
diff changeset
376 Each Galaxy parameter widget corresponds to command line flags listed below:
5e72d136a39e Uploaded
devteam
parents:
diff changeset
377
5e72d136a39e Uploaded
devteam
parents:
diff changeset
378 Algorithm options::
5e72d136a39e Uploaded
devteam
parents:
diff changeset
379
5e72d136a39e Uploaded
devteam
parents:
diff changeset
380 -k INT minimum seed length [19]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
381 -w INT band width for banded alignment [100]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
382 -d INT off-diagonal X-dropoff [100]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
383 -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
384 -y INT find MEMs longer than {-k} * {-r} with size less than INT [0]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
385 -c INT skip seeds with more than INT occurrences [500]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
386 -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
387 -W INT discard a chain if seeded bases shorter than INT [0]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
388 -m INT perform at most INT rounds of mate rescues for each read [50]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
389 -S skip mate rescue
5e72d136a39e Uploaded
devteam
parents:
diff changeset
390 -P skip pairing; mate rescue performed unless -S also in use
5e72d136a39e Uploaded
devteam
parents:
diff changeset
391 -e discard full-length exact matches
5e72d136a39e Uploaded
devteam
parents:
diff changeset
392
5e72d136a39e Uploaded
devteam
parents:
diff changeset
393 Scoring options::
5e72d136a39e Uploaded
devteam
parents:
diff changeset
394
5e72d136a39e Uploaded
devteam
parents:
diff changeset
395 -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
396 -B INT penalty for a mismatch [4]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
397 -O INT[,INT] gap open penalties for deletions and insertions [6,6]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
398 -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
399 -L INT[,INT] penalty for 5'- and 3'-end clipping [5,5]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
400 -U INT penalty for an unpaired read pair [17]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
401
5e72d136a39e Uploaded
devteam
parents:
diff changeset
402 Input/output options::
5e72d136a39e Uploaded
devteam
parents:
diff changeset
403
5e72d136a39e Uploaded
devteam
parents:
diff changeset
404 -p first query file consists of interleaved paired-end sequences
5e72d136a39e Uploaded
devteam
parents:
diff changeset
405 -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
406
5e72d136a39e Uploaded
devteam
parents:
diff changeset
407 -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [3]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
408 -T INT minimum score to output [30]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
409 -h INT if there are &lt;INT hits with score &gt;80% of the max score, output all in XA [5]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
410 -a output all alignments for SE or unpaired PE
5e72d136a39e Uploaded
devteam
parents:
diff changeset
411 -C append FASTA/FASTQ comment to SAM output
5e72d136a39e Uploaded
devteam
parents:
diff changeset
412 -V output the reference FASTA header in the XR tag
5e72d136a39e Uploaded
devteam
parents:
diff changeset
413 -Y use soft clipping for supplementary alignments
5e72d136a39e Uploaded
devteam
parents:
diff changeset
414 -M mark shorter split hits as secondary
5e72d136a39e Uploaded
devteam
parents:
diff changeset
415
5e72d136a39e Uploaded
devteam
parents:
diff changeset
416 -I FLOAT[,FLOAT[,INT[,INT]]]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
417 specify the mean, standard deviation (10% of the mean if absent), max
5e72d136a39e Uploaded
devteam
parents:
diff changeset
418 (4 sigma from the mean if absent) and min of the insert size distribution.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
419 FR orientation only. [inferred]
5e72d136a39e Uploaded
devteam
parents:
diff changeset
420
5e72d136a39e Uploaded
devteam
parents:
diff changeset
421 ------
5e72d136a39e Uploaded
devteam
parents:
diff changeset
422
5e72d136a39e Uploaded
devteam
parents:
diff changeset
423 .. class:: warningmark
5e72d136a39e Uploaded
devteam
parents:
diff changeset
424
5e72d136a39e Uploaded
devteam
parents:
diff changeset
425 **An important note on Read Groups**
5e72d136a39e Uploaded
devteam
parents:
diff changeset
426
5e72d136a39e Uploaded
devteam
parents:
diff changeset
427 One of the recommended best practices in NGS analysis is adding read group information to BAM files. You can do thid directly in BWA MEM interface using the
5e72d136a39e Uploaded
devteam
parents:
diff changeset
428 **Specify readgroup information?** widget. If you are not familiar with readgroups you shold know that this is effectively a way to tag reads with an additional ID.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
429 This allows you to combine BAM files from, for example, multiple BWA MEM runs into a single dataset. This significantly simplifies downstream processing as
5e72d136a39e Uploaded
devteam
parents:
diff changeset
430 instead of dealing with multiple datasets you only have to handle only one. This is possible because the readgroup information allows you to identify
5e72d136a39e Uploaded
devteam
parents:
diff changeset
431 data from different experiments even if they are combined in one file. Many downstream analysis tools such as varinat callers (e.g., FreeBayes or Naive Varinat Caller
5e72d136a39e Uploaded
devteam
parents:
diff changeset
432 present in Galaxy) are aware of readgtroups and will automatically generate calls for each individual sample even if they are combined within a single file.
5e72d136a39e Uploaded
devteam
parents:
diff changeset
433
5e72d136a39e Uploaded
devteam
parents:
diff changeset
434 -----
5e72d136a39e Uploaded
devteam
parents:
diff changeset
435
5e72d136a39e Uploaded
devteam
parents:
diff changeset
436 .. class:: infomark
5e72d136a39e Uploaded
devteam
parents:
diff changeset
437
5e72d136a39e Uploaded
devteam
parents:
diff changeset
438 **More info**
5e72d136a39e Uploaded
devteam
parents:
diff changeset
439
5e72d136a39e Uploaded
devteam
parents:
diff changeset
440 To obtain more information about BWA MEM and ask questions use these resources:
5e72d136a39e Uploaded
devteam
parents:
diff changeset
441
5e72d136a39e Uploaded
devteam
parents:
diff changeset
442 1. https://biostar.usegalaxy.org/
5e72d136a39e Uploaded
devteam
parents:
diff changeset
443 2. https://www.biostars.org/
5e72d136a39e Uploaded
devteam
parents:
diff changeset
444 3. https://github.com/lh3/bwa
5e72d136a39e Uploaded
devteam
parents:
diff changeset
445 4. http://bio-bwa.sourceforge.net/
5e72d136a39e Uploaded
devteam
parents:
diff changeset
446
5e72d136a39e Uploaded
devteam
parents:
diff changeset
447
5e72d136a39e Uploaded
devteam
parents:
diff changeset
448 </help>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
449 <citations>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
450 <citation type="doi">10.1093/bioinformatics/btp324</citation>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
451 <citation type="doi">10.1093/bioinformatics/btp698</citation>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
452 <citation type="bibtex">@misc{1303.3997,
5e72d136a39e Uploaded
devteam
parents:
diff changeset
453 Author = {Heng Li},
5e72d136a39e Uploaded
devteam
parents:
diff changeset
454 Title = {Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM},
5e72d136a39e Uploaded
devteam
parents:
diff changeset
455 Year = {2013},
5e72d136a39e Uploaded
devteam
parents:
diff changeset
456 Eprint = {arXiv:1303.3997},
5e72d136a39e Uploaded
devteam
parents:
diff changeset
457 url = {http://arxiv.org/abs/1303.3997},
5e72d136a39e Uploaded
devteam
parents:
diff changeset
458 }</citation>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
459 </citations>
5e72d136a39e Uploaded
devteam
parents:
diff changeset
460 </tool>