13
|
1 <?xml version="1.0"?>
|
|
2 <tool id="freebayes" name="FreeBayes" version="freebayes-0.9.14">
|
|
3 <requirements>
|
|
4 <requirement type="package" version="freebayes-0.9.14_8a407cf5f4">freebayes</requirement>
|
|
5 <requirement type="package" version="0.1.18">samtools</requirement>
|
|
6 </requirements>
|
|
7 <description> - Bayesian genetic variant detector</description>
|
|
8 <command>
|
|
9 ##set up input files
|
|
10
|
|
11 #set $reference_fasta_filename = "localref.fa"
|
|
12
|
|
13 #if str( $reference_source.reference_source_selector ) == "history":
|
|
14 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
|
|
15 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 &&
|
|
16 #else:
|
|
17 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
|
|
18 #end if
|
|
19
|
|
20 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
|
|
21 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &&
|
|
22 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
|
|
23 #end for
|
|
24
|
|
25 ## Tabixize optional input_varinat_vcf file (for --variant-input option)
|
|
26
|
|
27 #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and str( $options_type.optional_inputs.optional_inputs_selector ) == 'set' and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
|
|
28 ln -s "${options_type.optional_inputs.input_variant_type.input_variant_vcf}" input_variant_vcf.vcf.gz &&
|
|
29 ln -s "${Tabixized_input}" input_variant_vcf.vcf.gz.tbi &&
|
|
30 #end if
|
|
31
|
|
32 ##finished setting up inputs
|
|
33
|
|
34 ##COMMAND LINE STARTS HERE
|
|
35
|
|
36 freebayes
|
|
37 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
|
|
38 --bam "localbam_${bam_count}.bam"
|
|
39 #end for
|
|
40 --fasta-reference "${reference_fasta_filename}"
|
|
41
|
|
42 ##outputs
|
|
43 --vcf "${output_vcf}"
|
|
44
|
|
45 #if str( $target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
|
|
46 --targets "${target_limit_type.input_target_bed}"
|
|
47 #elif str( $target_limit_type.target_limit_type_selector ) == "limit_by_region":
|
|
48 --region "${target_limit_type.region_chromosome}:${target_limit_type.region_start}..${target_limit_type.region_end}"
|
|
49 #end if
|
|
50
|
|
51 ##advanced options
|
|
52 #if str( $options_type.options_type_selector ) == "simple":
|
|
53 ##do nothing as command like build up to this point is sufficinet for simple diploid calling
|
|
54
|
|
55 #elif str( $options_type.options_type_selector ) == "simple_w_filters":
|
|
56
|
|
57 --standard-filters
|
|
58 --min-coverage "${options_type.min_coverage}"
|
|
59
|
|
60 #elif str( $options_type.options_type_selector ) == "naive":
|
|
61
|
|
62 --haplotype-length 0
|
|
63 --min-alternate-count 1
|
|
64 --min-alternate-fraction 0
|
|
65 --pooled-continuous
|
|
66 --report-monomorphic
|
|
67
|
|
68 #elif str( $options_type.options_type_selector ) == "naive_w_filters":
|
|
69
|
|
70 --haplotype-length 0
|
|
71 --min-alternate-count 1
|
|
72 --min-alternate-fraction 0
|
|
73 --pooled-continuous
|
|
74 --report-monomorphic
|
|
75 --standard-filters
|
|
76 --min-coverage "${options_type.min_coverage}"
|
|
77
|
|
78 #elif str( $options_type.options_type_selector ) == "cline":
|
|
79
|
|
80 ${options_type.cline}
|
|
81
|
|
82 @optional_inputs_outputs@
|
|
83
|
|
84 #elif str( $options_type.options_type_selector ) == "full":
|
|
85
|
|
86 ##optional inputs and outputs
|
|
87
|
|
88 @optional_inputs_outputs@
|
|
89
|
|
90 ## REPORTING
|
|
91
|
|
92 #if str( $options_type.reporting.reporting_selector ) == "True":
|
|
93 --pvar ${options_type.reporting.pvar}
|
|
94 #end if
|
|
95
|
|
96 ## POPULATION MODEL
|
|
97
|
|
98 #if str( $options_type.population_model.population_model_selector ) == "True":
|
|
99 --theta "${options_type.population_model.T}"
|
|
100 --ploidy "${options_type.population_model.P}"
|
|
101 ${options_type.population_model.J}
|
|
102 ${options_type.population_model.K}
|
|
103
|
|
104 #end if
|
|
105
|
|
106 ## REFERENCE ALLELE
|
|
107
|
|
108 #if str( $options_type.reference_allele.reference_allele_selector ) == "True":
|
|
109 ${options_type.reference_allele.Z}
|
|
110 --reference-quality "${options_type.reference_allele.reference_quality}"
|
|
111 #end if
|
|
112
|
|
113 ## ALLELE SCOPE
|
|
114
|
|
115 #if str( $options_type.allele_scope.allele_scope_selector ) == "True":
|
|
116 ${options_type.allele_scope.I}
|
|
117 ${options_type.allele_scope.i}
|
|
118 ${options_type.allele_scope.X}
|
|
119 ${options_type.allele_scope.u}
|
|
120 -n "${options_type.allele_scope.n}"
|
|
121 --haplotype-length "${options_type.allele_scope.haplotype_length}"
|
|
122 --min-repeat-length "${options_type.allele_scope.min_repeat_length}"
|
|
123 --min-repeat-entropy "${options_type.allele_scope.min_repeat_entropy}"
|
|
124 ${options_type.allele_scope.no_partial_observations}
|
|
125 #end if
|
|
126
|
|
127 ## REALIGNMENT
|
|
128
|
|
129 ${options_type.O}
|
|
130
|
|
131 ##INPUT FILTERS
|
|
132
|
|
133 #if str( $options_type.input_filters.input_filters_selector ) == "True":
|
|
134 ${options_type.input_filters.use_duplicate_reads}
|
|
135 -m "${options_type.input_filters.m}"
|
|
136 -q "${options_type.input_filters.q}"
|
|
137 -R "${options_type.input_filters.R}"
|
|
138 -Y "${options_type.input_filters.Y}"
|
|
139
|
|
140 #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "True":
|
|
141 -Q "${options_type.input_filters.mismatch_filters.Q}"
|
|
142 -U "${options_type.input_filters.mismatch_filters.U}"
|
|
143 -z "${options_type.input_filters.mismatch_filters.z}"
|
|
144 --read-snp-limit "${options_type.input_filters.mismatch_filters.read_snp_limit}"
|
|
145 #end if
|
|
146
|
|
147 -e "${options_type.input_filters.e}"
|
|
148 -F "${options_type.input_filters.F}"
|
|
149 -C "${options_type.input_filters.C}"
|
|
150 --min-alternate-qsum "${options_type.input_filters.min_alternate_qsum}"
|
|
151 -G "${options_type.input_filters.G}"
|
|
152 --min-coverage "${options_type.input_filters.min_coverage}"
|
|
153 #end if
|
|
154
|
|
155 ## POPULATION AND MAPPABILITY PRIORS
|
|
156
|
|
157 #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "True":
|
|
158 ${options_type.population_mappability_priors.k}
|
|
159 ${options_type.population_mappability_priors.w}
|
|
160 ${options_type.population_mappability_priors.V}
|
|
161 ${options_type.population_mappability_priors.a}
|
|
162 #end if
|
|
163
|
|
164 ## GENOTYPE LIKELIHOODS
|
|
165
|
|
166 #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "True":
|
|
167 --base-quality-cap "${$options_type.genotype_likelihoods.base_quality_cap}"
|
|
168 ${$options_type.genotype_likelihoods.experimental_gls}
|
|
169 --prob_contamination "${$options_type.genotype_likelihoods.prob_contamination}"
|
|
170 #end if
|
|
171
|
|
172 ## ALGORITHMIC FEATURES
|
|
173
|
|
174 #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "True":
|
|
175 v
|
|
176 -B "${options_type.algorithmic_features.B}"
|
|
177 --genotyping-max-banddepth "${options_type.algorithmic_features.genotyping_max_banddepth}"
|
|
178 -W "${options_type.algorithmic_features.W}"
|
|
179 ${options_type.algorithmic_features.N}
|
|
180
|
|
181 #if str( $options_type.algorithmic_features.genotype_variant_threshold.genotype_variant_threshold_selector ) == "True":
|
|
182 -S "${options_type.algorithmic_features.genotype_variant_threshold.S}"
|
|
183 #end if
|
|
184
|
|
185 ${options_type.algorithmic_features.j}
|
|
186 ${options_type.algorithmic_features.H}
|
|
187 -D "${options_type.algorithmic_features.D}"
|
|
188 ${options_type.algorithmic_features.genotype_qualities}
|
|
189 #end if
|
|
190 #end if
|
|
191
|
|
192 </command>
|
|
193
|
|
194 <macros>
|
|
195 <token name="@optional_inputs_outputs@">
|
|
196 ## This token gets injected in commane in two instances: when options_type.options_type_selector == "full" and "cline"
|
|
197
|
|
198 #if $options_type.optional_inputs.optional_inputs_selector:
|
|
199
|
|
200 #if $options_type.optional_inputs.output_trace_option:
|
|
201 --trace "${output_trace}"
|
|
202 #end if
|
|
203
|
|
204 #if $options_type.optional_inputs.output_failed_alleles_option:
|
|
205 --failed-alleles "${output_failed_alleles_bed}"
|
|
206 #end if
|
|
207
|
|
208 #if $options_type.optional_inputs.samples:
|
|
209 --samples "${options_type.optional_inputs.samples}"
|
|
210 #end if
|
|
211
|
|
212 #if $options_type.optional_inputs.populations:
|
|
213 --populations "${options_type.optional_inputs.populations}"
|
|
214 #end if
|
|
215
|
|
216 #if $options_type.optional_inputs.A:
|
|
217 --cnv-map "${options_type.optional_inputs.A}"
|
|
218 #end if
|
|
219
|
|
220 #if str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
|
|
221 --variant-input input_variant_vcf.vcf.gz ## input_variant_vcf.vcf.gz is symlinked to a galaxy-generated dataset in "Tabixize optional input_varinat_vcf file" section of the command line above
|
|
222 ${options_type.optional_inputs.input_variant_type.only_use_input_alleles}
|
|
223 #end if
|
|
224
|
|
225 #if $options_type.optional_inputs.haplotype_basis_alleles:
|
|
226 --haplotype-basis-alleles "${options_type.optional_inputs.haplotype_basis_alleles}"
|
|
227 #end if
|
|
228
|
|
229 #if $options_type.optional_inputs.observation_bias:
|
|
230 --observation-bias "${options_type.optional_inputs.observation_bias}"
|
|
231 #end if
|
|
232
|
|
233 #if $options_type.optional_inputs.contamination_estimates:
|
|
234 --contamination-estimates "${options_type.optional_inputs.contamination_estimates}"
|
|
235 #end if
|
|
236
|
|
237 #end if
|
|
238 </token>
|
|
239 <xml name="optional_file_inputs">
|
|
240 <conditional name="optional_inputs">
|
|
241 <param name="optional_inputs_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Do you want to provide additional inputs?" help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --varinat-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates" />
|
|
242 <when value="set">
|
|
243 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" help="--failed-alleles" />
|
|
244 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" help="--trace"/>
|
|
245 <param name="samples" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" help="-s --samples; default=By default FreeBayes will analyze all samples in its input BAM files"/>
|
|
246 <param name="populations" type="data" format="txt" label="Populations File" optional="True" help="--populations; default=False. Each line of FILE should list a sample and a population which it is part of. The population-based bayesian inference model will then be partitioned on the basis of the populations" />
|
|
247 <param name="A" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" help="-A --cnv-map; default=copy number is set to as specified by --ploidy. Read a copy number map from the BED file FILE, which has the format: reference sequence, start, end, sample name, copy number ... for each region in each sample which does not have the default copy number as set by --ploidy."/>
|
|
248 <conditional name="input_variant_type">
|
|
249 <param name="input_variant_type_selector" type="select" label="Provide variants file">
|
|
250 <option value="do_not_provide" selected="True">Do not provide</option>
|
|
251 <option value="provide_vcf">Provide VCF file</option>
|
|
252 </param>
|
|
253 <when value="do_not_provide">
|
|
254 <!-- Do nothing here -->
|
|
255 </when>
|
|
256 <when value="provide_vcf">
|
|
257 <param name="input_variant_vcf" type="data" format="vcf_bgzip" label="Use variants reported in VCF file as input to the algorithm">
|
|
258 <conversion name="Tabixized_input" type="tabix" />
|
|
259 </param>
|
|
260 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" />
|
|
261 </when>
|
|
262 </conditional>
|
|
263 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" help="--haplotype-basis-alleles" />
|
|
264 <param name="report_monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="False" label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes." help="--report-monomorphic " />
|
|
265 <param name="observation_bias" optional="True" type="data" format="tabular" label="Load read length-dependent allele observation biases from" help="--observation-bias; The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" />
|
|
266 <param name="contamination_estimates" optional="True" type="data" format="tabular" label="Upload per-sample estimates of contamination from" help="--contamination-estimates; The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates." />
|
|
267 </when>
|
|
268 <when value="do_not_set">
|
|
269 <!-- do nothing -->
|
|
270 </when>
|
|
271 </conditional>
|
|
272 </xml>
|
|
273 </macros>
|
|
274
|
|
275 <inputs>
|
|
276 <conditional name="reference_source">
|
|
277 <param name="reference_source_selector" type="select" label="Load reference genome from">
|
|
278 <option value="cached">Local cache</option>
|
|
279 <option value="history">History</option>
|
|
280 </param>
|
|
281 <when value="cached">
|
|
282 <repeat name="input_bams" title="Sample BAM file" min="1">
|
|
283 <param name="input_bam" type="data" format="bam" label="BAM file">
|
|
284 <validator type="unspecified_build" />
|
|
285 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />
|
|
286 </param>
|
|
287 </repeat>
|
|
288 <param name="ref_file" type="select" label="Using reference genome">
|
|
289 <options from_data_table="sam_fa_indexes">
|
|
290 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...-->
|
|
291 </options>
|
|
292 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
|
|
293 </param>
|
|
294 </when>
|
|
295 <when value="history"> <!-- FIX ME!!!! -->
|
|
296 <repeat name="input_bams" title="Sample BAM file" min="1">
|
|
297 <param name="input_bam" type="data" format="bam" label="BAM file" />
|
|
298 </repeat>
|
|
299 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
|
|
300 </when>
|
|
301 </conditional>
|
|
302
|
|
303 <conditional name="target_limit_type">
|
|
304 <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options">
|
|
305 <option value="do_not_limit" selected="True">Do not limit</option>
|
|
306 <option value="limit_by_target_file">Limit by target file</option>
|
|
307 <option value="limit_by_region">Limit to region</option>
|
|
308 </param>
|
|
309 <when value="do_not_limit">
|
|
310 <!-- Do nothing here -->
|
|
311 </when>
|
|
312 <when value="limit_by_target_file">
|
|
313 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." help="-t --targets"/>
|
|
314 </when>
|
|
315 <when value="limit_by_region">
|
|
316 <param name="region_chromosome" type="text" label="Region Chromosome" value="" help="-r --region"/> <!--only once? -->
|
|
317 <param name="region_start" type="integer" label="Region Start" value="" />
|
|
318 <param name="region_end" type="integer" label="Region End" value="" />
|
|
319 </when>
|
|
320 </conditional>
|
|
321
|
|
322 <conditional name="options_type">
|
|
323 <param name="options_type_selector" type="select" label="Choose parameter selection level" help="Select how much control over the freebayes run you need" >
|
|
324 <option value="simple" selected="True">1:Simple diploid calling</option>
|
|
325 <option value="simple_w_filters">2:Simple diploid calling with filtering and coverage</option>
|
|
326 <option value="naive">3:Frequency-based pooled calling</option>
|
|
327 <option value="naive_w_filters">4:Frequency-based pooled calling with filtering and coverage</option>
|
|
328 <option value="full">5:Complete list of all options</option>
|
|
329 <option value="cline">6:Input parameters on the command line</option>
|
|
330 </param>
|
|
331 <when value="full">
|
|
332
|
|
333 <expand macro="optional_file_inputs" /> <!-- see macros section -->
|
|
334
|
|
335 <!-- reporting -->
|
|
336
|
|
337 <conditional name="reporting">
|
|
338 <param name="reporting_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set reporting option?" help="Sets -P --pvar option" />
|
|
339 <when value="set">
|
|
340 <param name="pvar" type="float" value="0.0" label="Report sites if the probability that there is a polymorphism at the site is greater than" help="-P --pvar; default=0.0. Note that post-filtering is generally recommended over the use of this parameter. " />
|
|
341 </when>
|
|
342 <when value="do_not_set">
|
|
343 <!-- do nothing -->
|
|
344 </when>
|
|
345 </conditional>
|
|
346
|
|
347 <!-- population model -->
|
|
348
|
|
349 <conditional name="population_model">
|
|
350 <param name="population_model_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set population model?" help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options " />
|
|
351 <when value="set">
|
|
352 <param name="T" type="float" value="0.001" label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis" help="-T --theta; default = 0.001. This serves as the single parameter to the Ewens Sampling Formula prior model." />
|
|
353 <param name="P" type="integer" value="2" label="Set ploidy for the analysis" help="-p --ploidy; default=2" />
|
|
354 <param name="J" type="boolean" truevalue="-J" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="-J --pooled-discrete; default=False. Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy." />
|
|
355 <param name="K" type="boolean" truevalue="-K" falsevalue="" checked="False" label="Output all alleles which pass input filters, regardles of genotyping outcome or model" help="-K, --poled-continuous; default=False. " />
|
|
356 </when>
|
|
357 <when value="do_not_set">
|
|
358 <!-- do nothing -->
|
|
359 </when>
|
|
360 </conditional>
|
|
361
|
|
362 <!-- reference allele -->
|
|
363
|
|
364 <conditional name="reference_allele">
|
|
365 <param name="reference_allele_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Use reference allele?" help="Sets --use-reference-allele and --reference-quality options " />
|
|
366 <when value="set">
|
|
367 <param name="Z" type="boolean" truevalue="-Z" falsevalue="" checked="False" label="Include the reference allele in the analysis as if it is another sample from the same population" help="-Z --use-reference-allele; default=False" />
|
|
368 <param name="reference_quality" type="text" size="8" value="100,60" label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" help="--reference-quality; default=100,60 " />
|
|
369 </when>
|
|
370 <when value="do_not_set">
|
|
371 <!-- do nothing -->
|
|
372 </when>
|
|
373 </conditional>
|
|
374
|
|
375 <!-- allelic scope -->
|
|
376
|
|
377 <conditional name="allele_scope">
|
|
378 <param name="allele_scope_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set allelic scope?" help="Sets -I, i, -X, -u, -n, -E, --haplotype-length, --min-repeat-length, --min-repeat-entropy, and --no-partial-observations options " />
|
|
379 <when value="set">
|
|
380 <param name="I" type="boolean" truevalue="-I" falsevalue="" checked="False" label="Ignore SNP alleles" help="-I --no-snps; default=False" />
|
|
381 <param name="i" type="boolean" truevalue="-i" falsevalue="" checked="False" label="Ignore indels alleles" help="-i --no-indels; default=False" />
|
|
382 <param name="X" type="boolean" truevalue="-X" falsevalue="" checked="False" label="Ignore multi-nucleotide polymorphisms, MNPs" help="-X --no-mnps; default=False" />
|
|
383 <param name="u" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)." help="-u --no-complex; default=False" />
|
|
384 <param name="n" type="integer" value="0" label="How many best SNP alleles to evaluate" help="-n --use-best-n-alleles; default=0 (all). Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" />
|
|
385 <param name="haplotype_length" type="integer" value="3" label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" help="--haplotype-length; default=3." />
|
|
386 <param name="min_repeat_length" type="integer" value="5" label="When assembling observations across repeats, require the total repeat length at least this many bp" help="--min-repeat-length; default=5." />
|
|
387 <param name="min_repeat_entropy" type="integer" value="0" label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" help="--min-repeat-entrpy; default=0 (off)." />
|
|
388 <param name="no_partial_observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="False" label="Exclude observations which do not fully span the dynamically-determined detection window" help="--no-partial-observations; default=use all observations, dividing partial support across matching haplotypes when generating haplotypes. " />
|
|
389 </when>
|
|
390 <when value="do_not_set">
|
|
391 <!-- do nothing -->
|
|
392 </when>
|
|
393 </conditional>
|
|
394
|
|
395 <!-- indel realignment -->
|
|
396
|
|
397 <param name="O" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Turn off left-alignment of indels?" help="-O --dont-left-align-indels; default=False (do left align). " />
|
|
398
|
|
399 <!-- input filters -->
|
|
400
|
|
401 <conditional name="input_filters">
|
|
402 <param name="input_filters_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set input filters?" help="Sets -4, -m, -q, -R, -Y, -Q, -U, -z, -$, -e, -0, -F, -C, -3, -G, and -! options " />
|
|
403 <when value="set">
|
|
404 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis." help="-4 --use-duplicate-reads; default=False (exclude duplicates marked as such in alignments)." />
|
|
405 <param name="m" type="integer" value="1" label="Exclude alignments from analysis if they have a mapping quality less than" help="-m --min-mapping-quality; default=1" />
|
|
406 <param name="q" type="integer" value="0" label="Exclude alleles from analysis if their supporting base quality less than" help="-q --min-base-quality; default=0" />
|
|
407 <param name="R" type="integer" value="0" label="Consider any allele in which the sum of qualities of supporting observations is at least" help="-R --min-supporting-allele-qsum; default=0" />
|
|
408 <param name="Y" type="integer" value="0" label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" help="-Y --min-supporting-mapping-qsum; default=0" />
|
|
409 <conditional name="mismatch_filters">
|
|
410 <param name="mismatch_filters_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Perform mismatch filtering?" help="Sets -Q, -U, -z, and $ options" />
|
|
411 <when value="set">
|
|
412 <param name="Q" type="integer" value="10" label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" help="-Q --mismatch-base-quality-threshold; default=10" />
|
|
413 <param name="U" type="integer" value="1000" optional="True" label="Exclude reads with more than N mismatches where each mismatch has base quality >= Q (option above)" help="-U --read-mismatch-limit; default=~unbound" />
|
|
414 <param name="z" type="float" value="1.0" min="0.0" max="1.0" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= Q (second option above)" help="-z --read-max-mismatch-fraction; default=1.0" />
|
|
415 <param name="read_snp_limit" type="integer" value="1000" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= Q (third option abobe)" help="-$amp; --read-snp-limit N " />
|
|
416 </when>
|
|
417 <when value="do_not_set">
|
|
418 <!-- do nothing -->
|
|
419 </when>
|
|
420 </conditional>
|
|
421 <param name="e" type="integer" value="1000" label="Exclude reads with more than this number of separate gaps" help="-e --read-snp-limit; default=~unbounded" />
|
|
422 <param name="standard_filters" type="boolean" truevalue="-0" falsevalue="" checked="False" label="Use stringent input base and mapping quality filters" help="-0 --standard-filters; default=False. Equivalent to -m 30 -q 20 -R 0 -S 0" />
|
|
423 <param name="F" type="float" value="0.2" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" help="-F --min-alternate-fraction; default=0.2" />
|
|
424 <param name="C" type="integer" value="2" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" help="-C --min-alternate-count; default=2" />
|
|
425 <param name="min_alternate_qsum" type="integer" value="0" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" help="-3 --min-alternate-qsum; default=0" />
|
|
426 <param name="G" type="integer" value="1" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" help="-G --min-alternate-total N; default=1" />
|
|
427 <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " />
|
|
428 </when>
|
|
429 <when value="do_not_set">
|
|
430 <!-- do nothing -->
|
|
431 </when>
|
|
432 </conditional>
|
|
433
|
|
434 <!-- population and mappability priors -->
|
|
435
|
|
436 <conditional name="population_mappability_priors">
|
|
437 <param name="population_mappability_priors_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set population and mappability priors?" help="Sets -k, -w, -V, and -a options " />
|
|
438 <when value="set">
|
|
439 <param name="k" type="boolean" truevalue="-k" falsevalue="" checked="False" label="No population priors" help="-k --no-population-priors; default=False. Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors." />
|
|
440 <param name="w" type="boolean" truevalue="-w" falsevalue="" checked="False" label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" help="-w --hwe-priors-off; default=False" />
|
|
441 <param name="V" type="boolean" truevalue="-V" falsevalue="" checked="False" label="Disable incorporation of prior expectations about observations" help="-V --binomial-obs-priors-off; default=False. Uses read placement probability, strand balance probability, and read position (5''-3'') probability." />
|
|
442 <param name="a" type="boolean" truevalue="-a" falsevalue="" checked="False" label="isable use of aggregate probability of observation balance between alleles as a component of the priors" help="-a --allele-balance-priors-off; default=False " />
|
|
443 </when>
|
|
444 <when value="do_not_set">
|
|
445 <!-- do nothing -->
|
|
446 </when>
|
|
447 </conditional>
|
|
448
|
|
449 <!-- genotype likelihoods -->
|
|
450
|
|
451 <conditional name="genotype_likelihoods">
|
|
452 <param name="genotype_likelihoods_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Tweak genotype likelihoods?" help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options. " />
|
|
453 <when value="set">
|
|
454 <param name="base_quality_cap" type="integer" value="0" label="Limit estimated observation quality by capping base quality at" help="--base-quality-cap" />
|
|
455 <param name="experimental_gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="False" label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual" help="--experimental-gls; Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples." />
|
|
456 <param name="prob_contamination" type="float" value="10e-9" label="An estimate of contamination to use for all samples. " help="--prob-contamination; default=10e-9." />
|
|
457 </when>
|
|
458 <when value="do_not_set">
|
|
459 <!-- do nothing -->
|
|
460 </when>
|
|
461 </conditional>
|
|
462
|
|
463 <!-- algorithmic features -->
|
|
464
|
|
465 <conditional name="algorithmic_features">
|
|
466 <param name="algorithmic_features_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Tweak agrithmic features?" help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options " />
|
|
467 <when value="set">
|
|
468 <param name="report_genotype_likelihood_max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="False" label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods." help="--report-genotype-likelihood-max; default=False" />
|
|
469 <param name="B" type="integer" value="1000" label="Iterate no more than N times during genotyping step" help="-B --genotyping-max-iterations; default=1000." />
|
|
470 <param name="genotyping_max_banddepth" type="integer" value="6" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" help="--genotyping-max-banddepth; default=6" />
|
|
471 <param name="W" type="text" size="8" value="1,3" label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" help="-W --posterior-integration-limits; default=1,3" />
|
|
472 <param name="N" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" help="-N --exclude-unobserved-genotypes; default=False" />
|
|
473 <conditional name="genotype_variant_threshold">
|
|
474 <param name="genotype_variant_threshold_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Do you want to to limit posterior integration" help="-S --genotype-variant-threshold" />
|
|
475 <when value="do_not_set">
|
|
476 <!-- do nothing -->
|
|
477 </when>
|
|
478 <when value="set">
|
|
479 <param name="S" value="" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample." help="-S --genotype-variant-threshold; default=~unbounded" />
|
|
480 </when>
|
|
481 </conditional>
|
|
482 <param name="j" type="boolean" truevalue="-j" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" help="-j --use-mapping-quality; default=False" />
|
|
483 <param name="H" type="boolean" truevalue="-H" falsevalue="" checked="False" label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel" help="-H --harmonic-indel-quality; default=use a minimum Base Quality in flanking sequence." />
|
|
484 <param name="D" type="float" value="0.9" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" help="-D --read-dependence-factor; default=0.9." />
|
|
485 <param name="genotype_qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="False" label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" help="-= --genotype-qualities; default=False " />
|
|
486 </when>
|
|
487 <when value="do_not_set">
|
|
488 <!-- do nothing -->
|
|
489 </when>
|
|
490 </conditional>
|
|
491 </when>
|
|
492 <when value="simple">
|
|
493 <!-- do nothing -->
|
|
494 </when>
|
|
495 <when value="simple_w_filters">
|
|
496 <!-- add standard-filters to command line -->
|
|
497 <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " />
|
|
498 </when>
|
|
499 <when value="naive">
|
|
500 <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic -->
|
|
501 </when>
|
|
502 <when value="naive_w_filters">
|
|
503 <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic standard-filters-->
|
|
504 <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " />
|
|
505 </when>
|
|
506 <when value="cline">
|
|
507
|
|
508 <expand macro="optional_file_inputs" /> <!-- see macros section -->
|
|
509
|
|
510 <param name="cline" size="60" type="text" value="-m 20 -q 30" label="Type command line tags here" help="All paremeters that DO NOT involve filenames can be typed here. Use "Do you want to provide additional inputs?" section above to control input and output files. For full syntax check help section below">
|
|
511 <sanitizer>
|
|
512 <valid initial="string.printable">
|
|
513 <remove value="'"/>
|
|
514 </valid>
|
|
515 <mapping initial="none">
|
|
516 <add source="'" target="__sq__"/>
|
|
517 </mapping>
|
|
518 </sanitizer>
|
|
519 </param>
|
|
520 </when>
|
|
521
|
|
522 </conditional>
|
|
523
|
|
524 </inputs>
|
|
525 <outputs>
|
|
526 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
|
|
527 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
|
|
528 <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] is True and options_type['optional_inputs']['output_failed_alleles_option'] is True</filter>
|
|
529 </data>
|
|
530 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
|
|
531 <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] is True and options_type['optional_inputs']['output_trace_option'] is True</filter>
|
|
532 </data>
|
|
533 </outputs>
|
|
534 <tests>
|
|
535 <test>
|
|
536 <param name="reference_source_selector" value="history" />
|
|
537 <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
|
|
538 <param name="input_bam" ftype="bam" value="freebayes-phix174.bam"/>
|
|
539 <param name="options_type_selector" value="simple"/>
|
|
540 <output name="output_vcf" file="freebayes-phix174-test1.vcf" compare="contains"/>
|
|
541 </test>
|
|
542 </tests>
|
|
543 <stdio>
|
|
544 <exit_code range="1:" />
|
|
545 </stdio>
|
|
546 <help>
|
|
547 **What it does**
|
|
548
|
|
549 FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and complex events (composite insertion and substitution events) smaller than the length of a short-read sequencing alignment.
|
|
550
|
|
551 See https://github.com/ekg/freebayes for details on FreeBayes.
|
|
552
|
|
553 This Galaxy instance of FreeBayes corresponds to release 8a407cf5f4416b5eba5bf27ca80144cd5e75bb80
|
|
554
|
|
555 ------
|
|
556
|
|
557 **Description**
|
|
558
|
|
559 Privided BAM file(s) and a reference. FreeBayes will provide VCF output on standard out describing SNPs, indels, and complex variants in samples in the input alignments.
|
|
560
|
|
561 By default, FreeBayes will consider variants supported by at least 2 observations in a single sample (-C) and also by at least 20% of the reads from a single sample (-F). These settings are suitable to low to high depth sequencing in haploid and diploid samples, but users working with polyploid or pooled samples may wish to adjust them depending on the characteristics of their sequencing data.
|
|
562
|
|
563 FreeBayes is capable of calling variant haplotypes shorter than a read length where multiple polymorphisms segregate on the same read. The maximum distance between polymorphisms phased in this way is determined by the --max-complex-gap, which defaults to 3bp. In practice, this can comfortably be set to half the read length.
|
|
564
|
|
565 Ploidy may be set to any level (-p), but by default all samples are assumed to be diploid. FreeBayes can model per-sample and per-region variation in copy-number (-A) using a copy-number variation map.
|
|
566
|
|
567 FreeBayes can act as a frequency-based pooled caller and describe variants and haplotypes in terms of observation frequency rather than called genotypes. To do so, use --pooled-continuous and set input filters to a suitable level. Allele observation counts will be described by AO and RO fields in the VCF output.
|
|
568
|
|
569 -------
|
|
570
|
|
571 **Galaxy-specific options**
|
|
572
|
|
573 Galaxy allows six levels of control over FreeBayes options provided by **Choose parameter selection level** menu option. These are:
|
|
574
|
|
575 1. *Simple diploid calling*: The simples possible FreeBayes application. Equvalent of using FreeBayes with only a BAM input and no other parameter options.
|
|
576 2. *Simple diploid calling with filtering and coverage*: Same as #1 plus two additional options: -0 (standard filters: --min-mapping-quality 30 --min-base-quality 20 --min-supporting-allele-qsum 0 --genotype-varinat-threshold 0) and --min-coverage.
|
|
577 3. *Frequency-based pooled calling*: This is equivalent to using FreeBayes with the following options: --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0 --pooled-continuous --report-monomorphic. This is the best choice for calling varinats in mixtures such as viral, bacterial, or organellar genomes.
|
|
578 4. *Frequency-based pooled calling with filtering and coverage*: Same as #3 but adds -0 and --min-coverage like in #2.
|
|
579 5. *Complete list of all options*: Gives you full control by exposing all FreeBayes options as Galaxy widgets.
|
|
580 6. *Input parameters on the command line*: Similar to the choice above but for those who does not like clicking. Here options can be directly typed into a text box.
|
|
581
|
|
582 -----
|
|
583
|
|
584 **FreeBayes options**
|
|
585
|
|
586 .. class:: infomark
|
|
587
|
|
588 Note that each Galaxy parameter widget corresponding to command line flags listed below:
|
|
589
|
|
590 Input and output::
|
|
591
|
|
592 -t --targets FILE
|
|
593 Limit analysis to targets listed in the BED-format FILE.
|
|
594 -r --region chrom:start_position-end_position
|
|
595 Limit analysis to the specified region, 0-base coordinates,
|
|
596 end_position included. Either '-' or '..' maybe used as a separator.
|
|
597 -s --samples FILE
|
|
598 Limit analysis to samples listed (one per line) in the FILE.
|
|
599 By default FreeBayes will analyze all samples in its input
|
|
600 BAM files.
|
|
601 --populations FILE
|
|
602 Each line of FILE should list a sample and a population which
|
|
603 it is part of. The population-based bayesian inference model
|
|
604 will then be partitioned on the basis of the populations.
|
|
605 -A --cnv-map FILE
|
|
606 Read a copy number map from the BED file FILE, which has
|
|
607 the format:
|
|
608 reference sequence, start, end, sample name, copy number
|
|
609 ... for each region in each sample which does not have the
|
|
610 default copy number as set by --ploidy.
|
|
611 --trace FILE Output an algorithmic trace to FILE.
|
|
612 --failed-alleles FILE
|
|
613 Write a BED file of the analyzed positions which do not
|
|
614 pass --pvar to FILE.
|
|
615 -@ --variant-input VCF
|
|
616 Use variants reported in VCF file as input to the algorithm.
|
|
617 Variants in this file will be treated as putative variants
|
|
618 even if there is not enough support in the data to pass
|
|
619 input filters.
|
|
620 -l --only-use-input-alleles
|
|
621 Only provide variant calls and genotype likelihoods for sites
|
|
622 and alleles which are provided in the VCF input, and provide
|
|
623 output in the VCF for all input alleles, not just those which
|
|
624 have support in the data.
|
|
625 --haplotype-basis-alleles VCF
|
|
626 When specified, only variant alleles provided in this input
|
|
627 VCF will be used for the construction of complex or haplotype
|
|
628 alleles.
|
|
629 --report-all-haplotype-alleles
|
|
630 At sites where genotypes are made over haplotype alleles,
|
|
631 provide information about all alleles in output, not only
|
|
632 those which are called.
|
|
633 --report-monomorphic
|
|
634 Report even loci which appear to be monomorphic, and report all
|
|
635 considered alleles, even those which are not in called genotypes.
|
|
636 Loci which do not have any potential alternates have '.' for ALT.
|
|
637
|
|
638 Reporting::
|
|
639
|
|
640 -P --pvar N Report sites if the probability that there is a polymorphism
|
|
641 at the site is greater than N. default: 0.0. Note that post-
|
|
642 filtering is generally recommended over the use of this parameter.
|
|
643
|
|
644 Population model::
|
|
645
|
|
646 -T --theta N The expected mutation rate or pairwise nucleotide diversity
|
|
647 among the population under analysis. This serves as the
|
|
648 single parameter to the Ewens Sampling Formula prior model
|
|
649 default: 0.001
|
|
650 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
|
|
651 -J --pooled-discrete
|
|
652 Assume that samples result from pooled sequencing.
|
|
653 Model pooled samples using discrete genotypes across pools.
|
|
654 When using this flag, set --ploidy to the number of
|
|
655 alleles in each sample or use the --cnv-map to define
|
|
656 per-sample ploidy.
|
|
657 -K --pooled-continuous
|
|
658 Output all alleles which pass input filters, regardles of
|
|
659 genotyping outcome or model.
|
|
660
|
|
661 Reference allele::
|
|
662
|
|
663 -Z --use-reference-allele
|
|
664 This flag includes the reference allele in the analysis as
|
|
665 if it is another sample from the same population.
|
|
666 --reference-quality MQ,BQ
|
|
667 Assign mapping quality of MQ to the reference allele at each
|
|
668 site and base quality of BQ. default: 100,60
|
|
669
|
|
670 Allele scope::
|
|
671
|
|
672 -I --no-snps Ignore SNP alleles.
|
|
673 -i --no-indels Ignore insertion and deletion alleles.
|
|
674 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
|
|
675 -u --no-complex Ignore complex events (composites of other classes).
|
|
676 -n --use-best-n-alleles N
|
|
677 Evaluate only the best N SNP alleles, ranked by sum of
|
|
678 supporting quality scores. (Set to 0 to use all; default: all)
|
|
679 -E --max-complex-gap N
|
|
680 --haplotype-length N
|
|
681 Allow haplotype calls with contiguous embedded matches of up
|
|
682 to this length. (default: 3)
|
|
683 --min-repeat-length N
|
|
684 When assembling observations across repeats, require the total repeat
|
|
685 length at least this many bp. (default: 5)
|
|
686 --min-repeat-entropy N
|
|
687 To detect interrupted repeats, build across sequence until it has
|
|
688 entropy > N bits per bp. (default: 0, off)
|
|
689 --no-partial-observations
|
|
690 Exclude observations which do not fully span the dynamically-determined
|
|
691 detection window. (default, use all observations, dividing partial
|
|
692 support across matching haplotypes when generating haplotypes.)
|
|
693
|
|
694 Indel realignment::
|
|
695
|
|
696 -O --dont-left-align-indels
|
|
697 Turn off left-alignment of indels, which is enabled by default.
|
|
698
|
|
699 Input filters::
|
|
700
|
|
701 -4 --use-duplicate-reads
|
|
702 Include duplicate-marked alignments in the analysis.
|
|
703 default: exclude duplicates marked as such in alignments
|
|
704 -m --min-mapping-quality Q
|
|
705 Exclude alignments from analysis if they have a mapping
|
|
706 quality less than Q. default: 1
|
|
707 -q --min-base-quality Q
|
|
708 Exclude alleles from analysis if their supporting base
|
|
709 quality is less than Q. default: 0
|
|
710 -R --min-supporting-allele-qsum Q
|
|
711 Consider any allele in which the sum of qualities of supporting
|
|
712 observations is at least Q. default: 0
|
|
713 -Y --min-supporting-mapping-qsum Q
|
|
714 Consider any allele in which and the sum of mapping qualities of
|
|
715 supporting reads is at least Q. default: 0
|
|
716 -Q --mismatch-base-quality-threshold Q
|
|
717 Count mismatches toward --read-mismatch-limit if the base
|
|
718 quality of the mismatch is >= Q. default: 10
|
|
719 -U --read-mismatch-limit N
|
|
720 Exclude reads with more than N mismatches where each mismatch
|
|
721 has base quality >= mismatch-base-quality-threshold.
|
|
722 default: ~unbounded
|
|
723 -z --read-max-mismatch-fraction N
|
|
724 Exclude reads with more than N [0,1] fraction of mismatches where
|
|
725 each mismatch has base quality >= mismatch-base-quality-threshold
|
|
726 default: 1.0
|
|
727 -$ --read-snp-limit N
|
|
728 Exclude reads with more than N base mismatches, ignoring gaps
|
|
729 with quality >= mismatch-base-quality-threshold.
|
|
730 default: ~unbounded
|
|
731 -e --read-indel-limit N
|
|
732 Exclude reads with more than N separate gaps.
|
|
733 default: ~unbounded
|
|
734 -0 --standard-filters Use stringent input base and mapping quality filters
|
|
735 Equivalent to -m 30 -q 20 -R 0 -S 0
|
|
736 -F --min-alternate-fraction N
|
|
737 Require at least this fraction of observations supporting
|
|
738 an alternate allele within a single individual in the
|
|
739 in order to evaluate the position. default: 0.2
|
|
740 -C --min-alternate-count N
|
|
741 Require at least this count of observations supporting
|
|
742 an alternate allele within a single individual in order
|
|
743 to evaluate the position. default: 2
|
|
744 -3 --min-alternate-qsum N
|
|
745 Require at least this sum of quality of observations supporting
|
|
746 an alternate allele within a single individual in order
|
|
747 to evaluate the position. default: 0
|
|
748 -G --min-alternate-total N
|
|
749 Require at least this count of observations supporting
|
|
750 an alternate allele within the total population in order
|
|
751 to use the allele in analysis. default: 1
|
|
752 -! --min-coverage N
|
|
753 Require at least this coverage to process a site. default: 0
|
|
754
|
|
755 Population priors::
|
|
756
|
|
757 -k --no-population-priors
|
|
758 Equivalent to --pooled-discrete --hwe-priors-off and removal of
|
|
759 Ewens Sampling Formula component of priors.
|
|
760
|
|
761 Mappability priors::
|
|
762
|
|
763 -w --hwe-priors-off
|
|
764 Disable estimation of the probability of the combination
|
|
765 arising under HWE given the allele frequency as estimated
|
|
766 by observation frequency.
|
|
767 -V --binomial-obs-priors-off
|
|
768 Disable incorporation of prior expectations about observations.
|
|
769 Uses read placement probability, strand balance probability,
|
|
770 and read position (5'-3') probability.
|
|
771 -a --allele-balance-priors-off
|
|
772 Disable use of aggregate probability of observation balance between alleles
|
|
773 as a component of the priors.
|
|
774
|
|
775 Genotype likelihoods::
|
|
776
|
|
777 --observation-bias FILE
|
|
778 Read length-dependent allele observation biases from FILE.
|
|
779 The format is [length] [alignment efficiency relative to reference]
|
|
780 where the efficiency is 1 if there is no relative observation bias.
|
|
781 --base-quality-cap Q
|
|
782 Limit estimated observation quality by capping base quality at Q.
|
|
783 --experimental-gls
|
|
784 Generate genotype likelihoods using 'effective base depth' metric
|
|
785 qual = 1-BaseQual * 1-MapQual. Incorporate partial observations.
|
|
786 This is the default when contamination estimates are provided.
|
|
787 Optimized for diploid samples.
|
|
788 --prob-contamination F
|
|
789 An estimate of contamination to use for all samples. default: 10e-9
|
|
790 --contamination-estimates FILE
|
|
791 A file containing per-sample estimates of contamination, such as
|
|
792 those generated by VerifyBamID. The format should be:
|
|
793 sample p(read=R|genotype=AR) p(read=A|genotype=AA)
|
|
794 Sample '*' can be used to set default contamination estimates.
|
|
795
|
|
796 Algorithmic features::
|
|
797
|
|
798 --report-genotype-likelihood-max
|
|
799 Report genotypes using the maximum-likelihood estimate provided
|
|
800 from genotype likelihoods.
|
|
801 -B --genotyping-max-iterations N
|
|
802 Iterate no more than N times during genotyping step. default: 1000.
|
|
803 --genotyping-max-banddepth N
|
|
804 Integrate no deeper than the Nth best genotype by likelihood when
|
|
805 genotyping. default: 6.
|
|
806 -W --posterior-integration-limits N,M
|
|
807 Integrate all genotype combinations in our posterior space
|
|
808 which include no more than N samples with their Mth best
|
|
809 data likelihood. default: 1,3.
|
|
810 -N --exclude-unobserved-genotypes
|
|
811 Skip sample genotypings for which the sample has no supporting reads.
|
|
812 -S --genotype-variant-threshold N
|
|
813 Limit posterior integration to samples where the second-best
|
|
814 genotype likelihood is no more than log(N) from the highest
|
|
815 genotype likelihood for the sample. default: ~unbounded
|
|
816 -j --use-mapping-quality
|
|
817 Use mapping quality of alleles when calculating data likelihoods.
|
|
818 -H --harmonic-indel-quality
|
|
819 Use a weighted sum of base qualities around an indel, scaled by the
|
|
820 distance from the indel. By default use a minimum BQ in flanking sequence.
|
|
821 -D --read-dependence-factor N
|
|
822 Incorporate non-independence of reads by scaling successive
|
|
823 observations by this factor during data likelihood
|
|
824 calculations. default: 0.9
|
|
825 -= --genotype-qualities
|
|
826 Calculate the marginal probability of genotypes and report as GQ in
|
|
827 each sample field in the VCF output.
|
|
828
|
|
829
|
|
830 ------
|
|
831
|
|
832 **Citation**
|
|
833
|
|
834 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing <http://arxiv.org/abs/1207.3907>`_.
|
|
835
|
|
836 The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko.
|
|
837
|
|
838 </help>
|
|
839 </tool>
|