0
|
1 <?xml version="1.0"?>
|
|
2 <tool id="freebayes" name="FreeBayes" version="0.0.2">
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.9.4_9696d0ce8a962f7bb61c4791be5ce44312b81cf8">freebayes</requirement>
|
|
5 <requirement type="package" version="0.1.18">samtools</requirement>
|
|
6 </requirements>
|
|
7 <description> - Bayesian genetic variant detector</description>
|
|
8 <command>
|
|
9 ##set up input files
|
|
10 #set $reference_fasta_filename = "localref.fa"
|
|
11 #if str( $reference_source.reference_source_selector ) == "history":
|
|
12 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
|
|
13 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 &&
|
|
14 #else:
|
|
15 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
|
|
16 #end if
|
|
17 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
|
|
18 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &&
|
|
19 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
|
|
20 #end for
|
|
21 ##finished setting up inputs
|
|
22
|
|
23 ##start FreeBayes commandline
|
|
24 freebayes
|
|
25 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
|
|
26 --bam "localbam_${bam_count}.bam"
|
|
27 #end for
|
|
28 --fasta-reference "${reference_fasta_filename}"
|
|
29
|
|
30 ##outputs
|
|
31 --vcf "${output_vcf}"
|
|
32
|
|
33 ##advanced options
|
|
34 #if str( $options_type.options_type_selector ) == "advanced":
|
|
35 ##additional outputs
|
|
36 #if $options_type.output_trace_option:
|
|
37 --trace "${output_trace}"
|
|
38 #end if
|
|
39 #if $options_type.output_failed_alleles_option:
|
|
40 --failed-alleles "${output_failed_alleles_bed}"
|
|
41 #end if
|
|
42
|
|
43 ##additional inputs
|
|
44 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
|
|
45 --targets "${options_type.target_limit_type.input_target_bed}"
|
|
46 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region":
|
|
47 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}"
|
|
48 #end if
|
|
49 #if $options_type.input_sample_file:
|
|
50 --samples "${options_type.input_sample_file}"
|
|
51 #end if
|
|
52 #if $options_type.input_populations_file:
|
|
53 --populations "${options_type.input_populations_file}"
|
|
54 #end if
|
|
55 #if $options_type.input_cnv_map_bed:
|
|
56 --cnv-map "${options_type.input_cnv_map_bed}"
|
|
57 #end if
|
|
58 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf":
|
|
59 --variant-input "${options_type.input_variant_type.input_variant_vcf}"
|
|
60 ${options_type.input_variant_type.only_use_input_alleles}
|
|
61 #end if
|
|
62
|
|
63 ##reporting
|
|
64 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set":
|
|
65 --pvar "${options_type.section_reporting_type.pvar}"
|
|
66 ${options_type.section_reporting_type.show_reference_repeats}
|
|
67 #end if
|
|
68
|
|
69 ##population model
|
|
70 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set":
|
|
71 --theta "${options_type.section_population_model_type.theta}"
|
|
72 --ploidy "${options_type.section_population_model_type.ploidy}"
|
|
73 ${options_type.section_population_model_type.pooled}
|
|
74 #end if
|
|
75
|
|
76 ##reference allele
|
|
77 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele":
|
|
78 --use-reference-allele
|
|
79 ${options_type.use_reference_allele_type.diploid_reference}
|
|
80 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}"
|
|
81 #end if
|
|
82
|
|
83 ##allele scope
|
|
84 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set":
|
|
85 ${options_type.section_allele_scope_type.no_snps}
|
|
86 ${options_type.section_allele_scope_type.no_indels}
|
|
87 ${options_type.section_allele_scope_type.no_mnps}
|
|
88 ${options_type.section_allele_scope_type.no_complex}
|
|
89 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}"
|
|
90 #if $options_type.section_allele_scope_type.max_complex_gap:
|
|
91 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}"
|
|
92 #end if
|
|
93 #end if
|
|
94
|
|
95 ##indel realignment
|
|
96 ${options_type.left_align_indels}
|
|
97
|
|
98 ##input filters
|
|
99 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set":
|
|
100 ${options_type.section_input_filters_type.use_duplicate_reads}
|
|
101 #if str( $options_type.section_input_filters_type.no_filter_type.no_filter_type_selector ) == "apply_filters":
|
|
102 --min-mapping-quality "${options_type.section_input_filters_type.no_filter_type.min_mapping_quality}"
|
|
103 --min-base-quality "${options_type.section_input_filters_type.no_filter_type.min_base_quality}"
|
|
104 --min-supporting-quality "${options_type.section_input_filters_type.no_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.no_filter_type.min_supporting_quality_bq}"
|
|
105 #else:
|
|
106 --no-filters
|
|
107 #end if
|
|
108 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}"
|
|
109 #if $options_type.section_input_filters_type.read_mismatch_limit:
|
|
110 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}"
|
|
111 #end if
|
|
112 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}"
|
|
113 #if $options_type.section_input_filters_type.read_snp_limit:
|
|
114 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}"
|
|
115 #end if
|
|
116 #if $options_type.section_input_filters_type.read_indel_limit:
|
|
117 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}"
|
|
118 #end if
|
|
119 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}"
|
|
120 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}"
|
|
121 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}"
|
|
122 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}"
|
|
123 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}"
|
|
124 --min-coverage "${options_type.section_input_filters_type.min_coverage}"
|
|
125 #end if
|
|
126
|
|
127 ##bayesian priors
|
|
128 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set":
|
|
129 ${options_type.section_bayesian_priors_type.no_ewens_priors}
|
|
130 ${options_type.section_bayesian_priors_type.no_population_priors}
|
|
131 ${options_type.section_bayesian_priors_type.hwe_priors}
|
|
132 #end if
|
|
133
|
|
134 ##observation prior expectations
|
|
135 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set":
|
|
136 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors}
|
|
137 ${options_type.section_observation_prior_expectations_type.allele_balance_priors}
|
|
138 #end if
|
|
139
|
|
140 ##algorithmic features
|
|
141 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set":
|
|
142 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}"
|
|
143 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}"
|
|
144 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}"
|
|
145 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}"
|
|
146 ${options_type.section_algorithmic_features_type.no_permute}
|
|
147 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes}
|
|
148 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold:
|
|
149 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}"
|
|
150 #end if
|
|
151 ${options_type.section_algorithmic_features_type.use_mapping_quality}
|
|
152 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}"
|
|
153 ${options_type.section_algorithmic_features_type.no_marginals}
|
|
154 #end if
|
|
155
|
|
156 #end if
|
|
157 </command>
|
|
158 <inputs>
|
|
159 <conditional name="reference_source">
|
|
160 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
|
|
161 <option value="cached">Locally cached</option>
|
|
162 <option value="history">History</option>
|
|
163 </param>
|
|
164 <when value="cached">
|
|
165 <repeat name="input_bams" title="Sample BAM file" min="1">
|
|
166 <param name="input_bam" type="data" format="bam" label="BAM file">
|
|
167 <validator type="unspecified_build" />
|
|
168 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." />
|
|
169 </param>
|
|
170 </repeat>
|
|
171 <param name="ref_file" type="select" label="Using reference genome">
|
|
172 <options from_data_table="sam_fa_indexes">
|
|
173 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...-->
|
|
174 </options>
|
|
175 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
|
|
176 </param>
|
|
177 </when>
|
|
178 <when value="history"> <!-- FIX ME!!!! -->
|
|
179 <repeat name="input_bams" title="Sample BAM file" min="1">
|
|
180 <param name="input_bam" type="data" format="bam" label="BAM file" />
|
|
181 </repeat>
|
|
182 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
|
|
183 </when>
|
|
184 </conditional>
|
|
185
|
|
186 <conditional name="options_type">
|
|
187 <param name="options_type_selector" type="select" label="Basic or Advanced options">
|
|
188 <option value="basic" selected="True">Basic</option>
|
|
189 <option value="advanced">Advanced</option>
|
|
190 </param>
|
|
191 <when value="basic">
|
|
192 <!-- Do nothing here -->
|
|
193 </when>
|
|
194 <when value="advanced">
|
|
195
|
|
196 <!-- output -->
|
|
197 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" />
|
|
198 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" />
|
|
199
|
|
200
|
|
201 <!-- input -->
|
|
202 <conditional name="target_limit_type">
|
|
203 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets">
|
|
204 <option value="do_not_limit" selected="True">Do not limit</option>
|
|
205 <option value="limit_by_target_file">Limit by target file</option>
|
|
206 <option value="limit_by_region">Limit to region</option>
|
|
207 </param>
|
|
208 <when value="do_not_limit">
|
|
209 <!-- Do nothing here -->
|
|
210 </when>
|
|
211 <when value="limit_by_target_file">
|
|
212 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." />
|
|
213 </when>
|
|
214 <when value="limit_by_region">
|
|
215 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? -->
|
|
216 <param name="region_start" type="integer" label="Region Start" value="" />
|
|
217 <param name="region_end" type="integer" label="Region End" value="" />
|
|
218 </when>
|
|
219 </conditional>
|
|
220 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" />
|
|
221 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" />
|
|
222 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" />
|
|
223 <conditional name="input_variant_type">
|
|
224 <param name="input_variant_type_selector" type="select" label="Provide variants file">
|
|
225 <option value="do_not_provide" selected="True">Do not provide</option>
|
|
226 <option value="provide_vcf">Provide VCF file</option>
|
|
227 </param>
|
|
228 <when value="do_not_provide">
|
|
229 <!-- Do nothing here -->
|
|
230 </when>
|
|
231 <when value="provide_vcf">
|
|
232 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" />
|
|
233 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" />
|
|
234 </when>
|
|
235 </conditional>
|
|
236
|
|
237
|
|
238 <!-- reporting -->
|
|
239 <conditional name="section_reporting_type">
|
|
240 <param name="section_reporting_type_selector" type="select" label="Set Reporting options">
|
|
241 <option value="do_not_set" selected="True">Do not set</option>
|
|
242 <option value="set">Set</option>
|
|
243 </param>
|
|
244 <when value="do_not_set">
|
|
245 <!-- do nothing here -->
|
|
246 </when>
|
|
247 <when value="set">
|
|
248 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" />
|
|
249 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" />
|
|
250 </when>
|
|
251 </conditional>
|
|
252
|
|
253
|
|
254 <!-- population model -->
|
|
255 <conditional name="section_population_model_type">
|
|
256 <param name="section_population_model_type_selector" type="select" label="Set population model options">
|
|
257 <option value="do_not_set" selected="True">Do not set</option>
|
|
258 <option value="set">Set</option>
|
|
259 </param>
|
|
260 <when value="do_not_set">
|
|
261 <!-- do nothing here -->
|
|
262 </when>
|
|
263 <when value="set">
|
|
264 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/>
|
|
265 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" />
|
|
266 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." />
|
|
267 </when>
|
|
268 </conditional>
|
|
269
|
|
270 <!-- reference allele -->
|
|
271 <conditional name="use_reference_allele_type">
|
|
272 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis">
|
|
273 <option value="do_not_include_reference_allele" selected="True">Do not include</option>
|
|
274 <option value="include_reference_allele">Include</option>
|
|
275 </param>
|
|
276 <when value="do_not_include_reference_allele">
|
|
277 <!-- Do nothing here -->
|
|
278 </when>
|
|
279 <when value="include_reference_allele">
|
|
280 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" />
|
|
281 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" />
|
|
282 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" />
|
|
283 </when>
|
|
284 </conditional>
|
|
285
|
|
286 <!-- allele scope -->
|
|
287 <conditional name="section_allele_scope_type">
|
|
288 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options">
|
|
289 <option value="do_not_set" selected="True">Do not set</option>
|
|
290 <option value="set">Set</option>
|
|
291 </param>
|
|
292 <when value="do_not_set">
|
|
293 <!-- do nothing here -->
|
|
294 </when>
|
|
295 <when value="set">
|
|
296 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" />
|
|
297 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" />
|
|
298 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" />
|
|
299 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" />
|
|
300 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" />
|
|
301 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/>
|
|
302 </when>
|
|
303 </conditional>
|
|
304
|
|
305 <!-- indel realignment -->
|
|
306 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" />
|
|
307
|
|
308 <!-- input filters -->
|
|
309 <conditional name="section_input_filters_type">
|
|
310 <param name="section_input_filters_type_selector" type="select" label="Set input filters options">
|
|
311 <option value="do_not_set" selected="True">Do not set</option>
|
|
312 <option value="set">Set</option>
|
|
313 </param>
|
|
314 <when value="do_not_set">
|
|
315 <!-- do nothing here -->
|
|
316 </when>
|
|
317 <when value="set">
|
|
318 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" />
|
|
319 <conditional name="no_filter_type">
|
|
320 <param name="no_filter_type_selector" type="select" label="Apply filters">
|
|
321 <option value="apply_filters" selected="True">Apply</option>
|
|
322 <option value="no_filters">Do not apply</option>
|
|
323 </param>
|
|
324 <when value="no_filters">
|
|
325 <!-- Do nothing here --> <!-- no-filters -->
|
|
326 </when>
|
|
327 <when value="apply_filters">
|
|
328 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="30" />
|
|
329 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="20" />
|
|
330 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" />
|
|
331 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" />
|
|
332 </when>
|
|
333 </conditional>
|
|
334 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is >=" value="10" />
|
|
335 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="" optional="True" />
|
|
336 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="1.0" />
|
|
337 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold" value="" optional="True" />
|
|
338 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" />
|
|
339 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" />
|
|
340 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" />
|
|
341 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" />
|
|
342 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" />
|
|
343 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" />
|
|
344 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" />
|
|
345 </when>
|
|
346 </conditional>
|
|
347
|
|
348
|
|
349 <!-- bayesian priors -->
|
|
350 <conditional name="section_bayesian_priors_type">
|
|
351 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options">
|
|
352 <option value="do_not_set" selected="True">Do not set</option>
|
|
353 <option value="set">Set</option>
|
|
354 </param>
|
|
355 <when value="do_not_set">
|
|
356 <!-- do nothing here -->
|
|
357 </when>
|
|
358 <when value="set">
|
|
359 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" />
|
|
360 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" />
|
|
361 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
|
|
362 </when>
|
|
363 </conditional>
|
|
364
|
|
365 <!-- observation prior expectations -->
|
|
366 <conditional name="section_observation_prior_expectations_type">
|
|
367 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options">
|
|
368 <option value="do_not_set" selected="True">Do not set</option>
|
|
369 <option value="set">Set</option>
|
|
370 </param>
|
|
371 <when value="do_not_set">
|
|
372 <!-- do nothing here -->
|
|
373 </when>
|
|
374 <when value="set">
|
|
375 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" />
|
|
376 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" />
|
|
377 </when>
|
|
378 </conditional>
|
|
379
|
|
380
|
|
381 <!-- algorithmic features -->
|
|
382 <conditional name="section_algorithmic_features_type">
|
|
383 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options">
|
|
384 <option value="do_not_set" selected="True">Do not set</option>
|
|
385 <option value="set">Set</option>
|
|
386 </param>
|
|
387 <when value="do_not_set">
|
|
388 <!-- do nothing here -->
|
|
389 </when>
|
|
390 <when value="set">
|
|
391 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" />
|
|
392 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" />
|
|
393 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" />
|
|
394 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" />
|
|
395 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" />
|
|
396 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" />
|
|
397 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" />
|
|
398 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" />
|
|
399 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" />
|
|
400 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" />
|
|
401 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" />
|
|
402 </when>
|
|
403 </conditional>
|
|
404
|
|
405
|
|
406 </when>
|
|
407 </conditional>
|
|
408
|
|
409 </inputs>
|
|
410 <outputs>
|
|
411 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
|
|
412 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
|
|
413 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter>
|
|
414 </data>
|
|
415 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
|
|
416 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter>
|
|
417 </data>
|
|
418 </outputs>
|
|
419 <tests>
|
|
420 <test>
|
|
421 <param name="reference_source_selector" value="history" />
|
|
422 <param name="ref_file" ftype="fasta" value="phiX.fasta"/>
|
|
423 <param name="input_bam" ftype="bam" value="gatk/fake_phiX_reads_1.bam"/>
|
|
424 <param name="options_type_selector" value="basic"/>
|
|
425 <output name="output_vcf" file="variant_detection/freebayes/freebayes_out_1.vcf.contains" compare="contains"/>
|
|
426 <!-- <output name="output_failed_alleles_bed" file="empty_file.dat" />
|
|
427 <output name="output_trace" file="variant_detection/freebayes/freebayes_out_1.output_trace" /> -->
|
|
428 </test>
|
|
429 </tests>
|
|
430 <help>
|
|
431 **What it does**
|
|
432
|
|
433 This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file.
|
|
434
|
|
435 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners.
|
|
436
|
|
437 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development.
|
|
438
|
|
439 Go `here <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_ for details on FreeBayes.
|
|
440
|
|
441 ------
|
|
442
|
|
443 **Inputs**
|
|
444
|
|
445 FreeBayes accepts an input aligned BAM file.
|
|
446
|
|
447
|
|
448 **Outputs**
|
|
449
|
|
450 The output is in the VCF format.
|
|
451
|
|
452 -------
|
|
453
|
|
454 **Settings**::
|
|
455
|
|
456 input and output:
|
|
457
|
|
458 -b --bam FILE Add FILE to the set of BAM files to be analyzed.
|
|
459 -c --stdin Read BAM input on stdin.
|
|
460 -v --vcf FILE Output VCF-format results to FILE.
|
|
461 -f --fasta-reference FILE
|
|
462 Use FILE as the reference sequence for analysis.
|
|
463 An index file (FILE.fai) will be created if none exists.
|
|
464 If neither --targets nor --region are specified, FreeBayes
|
|
465 will analyze every position in this reference.
|
|
466 -t --targets FILE
|
|
467 Limit analysis to targets listed in the BED-format FILE.
|
|
468 -r --region <chrom>:<start_position>..<end_position>
|
|
469 Limit analysis to the specified region, 0-base coordinates,
|
|
470 end_position not included (same as BED format).
|
|
471 -s --samples FILE
|
|
472 Limit analysis to samples listed (one per line) in the FILE.
|
|
473 By default FreeBayes will analyze all samples in its input
|
|
474 BAM files.
|
|
475 --populations FILE
|
|
476 Each line of FILE should list a sample and a population which
|
|
477 it is part of. The population-based bayesian inference model
|
|
478 will then be partitioned on the basis of the populations.
|
|
479 -A --cnv-map FILE
|
|
480 Read a copy number map from the BED file FILE, which has
|
|
481 the format:
|
|
482 reference sequence, start, end, sample name, copy number
|
|
483 ... for each region in each sample which does not have the
|
|
484 default copy number as set by --ploidy.
|
|
485 -L --trace FILE Output an algorithmic trace to FILE.
|
|
486 --failed-alleles FILE
|
|
487 Write a BED file of the analyzed positions which do not
|
|
488 pass --pvar to FILE.
|
|
489 -@ --variant-input VCF
|
|
490 Use variants reported in VCF file as input to the algorithm.
|
|
491 A report will be generated for every record in the VCF file.
|
|
492 -l --only-use-input-alleles
|
|
493 Only provide variant calls and genotype likelihoods for sites
|
|
494 and alleles which are provided in the VCF input, and provide
|
|
495 output in the VCF for all input alleles, not just those which
|
|
496 have support in the data.
|
|
497
|
|
498 reporting:
|
|
499
|
|
500 -P --pvar N Report sites if the probability that there is a polymorphism
|
|
501 at the site is greater than N. default: 0.0001
|
|
502 -_ --show-reference-repeats
|
|
503 Calculate and show information about reference repeats in
|
|
504 the VCF output.
|
|
505
|
|
506 population model:
|
|
507
|
|
508 -T --theta N The expected mutation rate or pairwise nucleotide diversity
|
|
509 among the population under analysis. This serves as the
|
|
510 single parameter to the Ewens Sampling Formula prior model
|
|
511 default: 0.001
|
|
512 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
|
|
513 -J --pooled Assume that samples result from pooled sequencing.
|
|
514 When using this flag, set --ploidy to the number of
|
|
515 alleles in each sample.
|
|
516
|
|
517 reference allele:
|
|
518
|
|
519 -Z --use-reference-allele
|
|
520 This flag includes the reference allele in the analysis as
|
|
521 if it is another sample from the same population.
|
|
522 -H --diploid-reference
|
|
523 If using the reference sequence as a sample (-Z),
|
|
524 treat it as diploid. default: false (reference is haploid)
|
|
525 --reference-quality MQ,BQ
|
|
526 Assign mapping quality of MQ to the reference allele at each
|
|
527 site and base quality of BQ. default: 100,60
|
|
528
|
|
529 allele scope:
|
|
530
|
|
531 -I --no-snps Ignore SNP alleles.
|
|
532 -i --no-indels Ignore insertion and deletion alleles.
|
|
533 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
|
|
534 -u --no-complex Ignore complex events (composites of other classes).
|
|
535 -n --use-best-n-alleles N
|
|
536 Evaluate only the best N SNP alleles, ranked by sum of
|
|
537 supporting quality scores. (Set to 0 to use all; default: all)
|
|
538 -E --max-complex-gap N
|
|
539 Allow complex alleles with contiguous embedded matches of up
|
|
540 to this length.
|
|
541
|
|
542 indel realignment:
|
|
543
|
|
544 -O --left-align-indels
|
|
545 Left-realign and merge gaps embedded in reads. default: false
|
|
546
|
|
547 input filters:
|
|
548
|
|
549 -4 --use-duplicate-reads
|
|
550 Include duplicate-marked alignments in the analysis.
|
|
551 default: exclude duplicates
|
|
552 -m --min-mapping-quality Q
|
|
553 Exclude alignments from analysis if they have a mapping
|
|
554 quality less than Q. default: 30
|
|
555 -q --min-base-quality Q
|
|
556 Exclude alleles from analysis if their supporting base
|
|
557 quality is less than Q. default: 20
|
|
558 -R --min-supporting-quality MQ,BQ
|
|
559 In order to consider an alternate allele, at least one supporting
|
|
560 alignment must have mapping quality MQ, and one supporting
|
|
561 allele must have base quality BQ. default: 0,0, unset
|
|
562 -Q --mismatch-base-quality-threshold Q
|
|
563 Count mismatches toward --read-mismatch-limit if the base
|
|
564 quality of the mismatch is >= Q. default: 10
|
|
565 -U --read-mismatch-limit N
|
|
566 Exclude reads with more than N mismatches where each mismatch
|
|
567 has base quality >= mismatch-base-quality-threshold.
|
|
568 default: ~unbounded
|
|
569 -z --read-max-mismatch-fraction N
|
|
570 Exclude reads with more than N [0,1] fraction of mismatches where
|
|
571 each mismatch has base quality >= mismatch-base-quality-threshold
|
|
572 default: 1.0
|
|
573 -$ --read-snp-limit N
|
|
574 Exclude reads with more than N base mismatches, ignoring gaps
|
|
575 with quality >= mismatch-base-quality-threshold.
|
|
576 default: ~unbounded
|
|
577 -e --read-indel-limit N
|
|
578 Exclude reads with more than N separate gaps.
|
|
579 default: ~unbounded
|
|
580 -0 --no-filters Do not use any input base and mapping quality filters
|
|
581 Equivalent to -m 0 -q 0 -R 0 -S 0
|
|
582 -x --indel-exclusion-window
|
|
583 Ignore portions of alignments this many bases from a
|
|
584 putative insertion or deletion allele. default: 0
|
|
585 -F --min-alternate-fraction N
|
|
586 Require at least this fraction of observations supporting
|
|
587 an alternate allele within a single individual in the
|
|
588 in order to evaluate the position. default: 0.0
|
|
589 -C --min-alternate-count N
|
|
590 Require at least this count of observations supporting
|
|
591 an alternate allele within a single individual in order
|
|
592 to evaluate the position. default: 1
|
|
593 -3 --min-alternate-qsum N
|
|
594 Require at least this sum of quality of observations supporting
|
|
595 an alternate allele within a single individual in order
|
|
596 to evaluate the position. default: 0
|
|
597 -G --min-alternate-total N
|
|
598 Require at least this count of observations supporting
|
|
599 an alternate allele within the total population in order
|
|
600 to use the allele in analysis. default: 1
|
|
601 -! --min-coverage N
|
|
602 Require at least this coverage to process a site. default: 0
|
|
603
|
|
604 bayesian priors:
|
|
605
|
|
606 -Y --no-ewens-priors
|
|
607 Turns off the Ewens' Sampling Formula component of the priors.
|
|
608 -k --no-population-priors
|
|
609 Equivalent to --pooled --no-ewens-priors
|
|
610 -w --hwe-priors Use the probability of the combination arising under HWE given
|
|
611 the allele frequency as estimated by observation frequency.
|
|
612
|
|
613 observation prior expectations:
|
|
614
|
|
615 -V --binomial-obs-priors
|
|
616 Incorporate expectations about osbervations into the priors,
|
|
617 Uses read placement probability, strand balance probability,
|
|
618 and read position (5'-3') probability.
|
|
619 -a --allele-balance-priors
|
|
620 Use aggregate probability of observation balance between alleles
|
|
621 as a component of the priors. Best for observations with minimal
|
|
622 inherent reference bias.
|
|
623
|
|
624 algorithmic features:
|
|
625
|
|
626 -M --site-selection-max-iterations N
|
|
627 Uses hill-climbing algorithm to search posterior space for N
|
|
628 iterations to determine if the site should be evaluated. Set to 0
|
|
629 to prevent use of this algorithm for site selection, and
|
|
630 to a low integer for improvide site selection at a slight
|
|
631 performance penalty. default: 5.
|
|
632 -B --genotyping-max-iterations N
|
|
633 Iterate no more than N times during genotyping step. default: 25.
|
|
634 --genotyping-max-banddepth N
|
|
635 Integrate no deeper than the Nth best genotype by likelihood when
|
|
636 genotyping. default: 6.
|
|
637 -W --posterior-integration-limits N,M
|
|
638 Integrate all genotype combinations in our posterior space
|
|
639 which include no more than N samples with their Mth best
|
|
640 data likelihood. default: 1,3.
|
|
641 -K --no-permute
|
|
642 Do not scale prior probability of genotype combination given allele
|
|
643 frequency by the number of permutations of included genotypes.
|
|
644 -N --exclude-unobserved-genotypes
|
|
645 Skip sample genotypings for which the sample has no supporting reads.
|
|
646 -S --genotype-variant-threshold N
|
|
647 Limit posterior integration to samples where the second-best
|
|
648 genotype likelihood is no more than log(N) from the highest
|
|
649 genotype likelihood for the sample. default: ~unbounded
|
|
650 -j --use-mapping-quality
|
|
651 Use mapping quality of alleles when calculating data likelihoods.
|
|
652 -D --read-dependence-factor N
|
|
653 Incorporate non-independence of reads by scaling successive
|
|
654 observations by this factor during data likelihood
|
|
655 calculations. default: 0.9
|
|
656 -= --no-marginals
|
|
657 Do not calculate the marginal probability of genotypes. Saves
|
|
658 time and improves scaling performance in large populations.
|
|
659
|
|
660
|
|
661 ------
|
|
662
|
|
663 **Citation**
|
|
664
|
|
665 For the underlying tool, please cite `FreeBayes <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_.
|
|
666
|
|
667 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
|
|
668
|
|
669 </help>
|
|
670 </tool>
|