comparison freebayes.xml @ 28:977a5301b66d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freebayes commit 9bbda385129b4bc34f66889d28c2570bf5bb2214
author iuc
date Tue, 06 Jun 2017 18:41:18 -0400
parents 9f164587a92f
children 156b60c1530f
comparison
equal deleted inserted replaced
27:9f164587a92f 28:977a5301b66d
1 <tool id="freebayes" name="FreeBayes" version="@DEPENDENCY_VERSION@-0"> 1 <tool id="freebayes" name="FreeBayes" version="@DEPENDENCY_VERSION@-1">
2 <description>bayesian genetic variant detector</description> 2 <description>bayesian genetic variant detector</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <expand macro="requirements">
7 <requirement type="package" version="@DEPENDENCY_VERSION@">freebayes</requirement>
8 <requirement type="package" version="0.1.19">samtools</requirement>
9 <requirement type="package" version="4.1.3">gawk</requirement> 7 <requirement type="package" version="4.1.3">gawk</requirement>
10 <requirement type="package" version="20160622">parallel</requirement> 8 <requirement type="package" version="20160622">parallel</requirement>
11 </requirements> 9 </expand>
12 <stdio> 10 <command detect_errors="exit_code"><![CDATA[
13 <exit_code range="1:" />
14 </stdio>
15 <command><![CDATA[
16 ##set up input files 11 ##set up input files
17 12
18 #set $reference_fasta_filename = "localref.fa" 13 #set $reference_fasta_filename = "localref.fa"
19 14
20 #if str( $reference_source.reference_source_selector ) == "history": 15 #if str( $reference_source.reference_source_selector ) == "history":
35 ln -s -f '${input_bam.metadata.bam_index}' 'b_${bam_count}.bam.bai' && 30 ln -s -f '${input_bam.metadata.bam_index}' 'b_${bam_count}.bam.bai' &&
36 #end for 31 #end for
37 32
38 ## Tabixize optional input_variant_vcf file (for --variant-input option) 33 ## Tabixize optional input_variant_vcf file (for --variant-input option)
39 #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and str( $options_type.optional_inputs.optional_inputs_selector ) == 'set' and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf": 34 #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and str( $options_type.optional_inputs.optional_inputs_selector ) == 'set' and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
40 ln -s -f '${options_type.optional_inputs.input_variant_type.input_variant_vcf}' 'input_variant_vcf.vcf.gz' && 35 ln -s -f '${options_type.optional_inputs.input_variant_type.input_variant_vcf}' input_variant_vcf.vcf.gz &&
41 ln -s -f '${Tabixized_input}' 'input_variant_vcf.vcf.gz.tbi' && 36 ln -s -f '${Tabixized_input}' input_variant_vcf.vcf.gz.tbi &&
42 #end if 37 #end if
43 38
44 ##if user has specified a region or target file, just use instead of calculating a set of unique regions 39 ##if the user has specified a region or target file, just use that instead of calculating a set of unique regions
45
46 #if str( $target_limit_type.target_limit_type_selector ) == "limit_by_target_file": 40 #if str( $target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
47 ln -s '${target_limit_type.input_target_bed}' regions_all.bed && 41 ln -s '${target_limit_type.input_target_bed}' regions_all.bed &&
48 #elif str( $target_limit_type.target_limit_type_selector ) == "limit_by_region": 42 #elif str( $target_limit_type.target_limit_type_selector ) == "limit_by_region":
49 printf '${target_limit_type.region_chromosome}\t${target_limit_type.region_start}\t${target_limit_type.region_end}' > regions_all.bed && 43 printf '${target_limit_type.region_chromosome}\t${target_limit_type.region_start}\t${target_limit_type.region_end}' > regions_all.bed &&
50 #else 44 #else
51 ##divide up the regions in the bam file for efficient processing 45 ##divide up the regions in the bam file for efficient processing
52 #for $bam_count, $input_bam in enumerate( $input_bamfiles ): 46 #for $bam_count, $input_bam in enumerate( $input_bamfiles ):
53 samtools view -H b_${bam_count}.bam | 47 samtools view -H b_${bam_count}.bam |
54 grep "^@SQ" | 48 grep '^@SQ' |
55 cut -f 2- | 49 cut -f 2- |
56 awk '{ gsub("^SN:","",$1); 50 awk '{ gsub("^SN:","",$1); gsub("^LN:","",$2); print $1"\t0\t"$2; }' >> regions_all.bed &&
57 gsub("^LN:","",$2);
58 print $1"\t0\t"$2; }' >> regions_all.bed &&
59 #end for 51 #end for
60 #end if 52 #end if
61 53
62 sort -u regions_all.bed > regions_uniq.bed && 54 sort -u regions_all.bed > regions_uniq.bed &&
63 ## split into even small chunks, this has some disatvantages and will not be used for the moment 55 ## split into even small chunks, this has some disatvantages and will not be used for the moment
64 ## bedtools makewindows -b regions_uniq.bed -w 10000000 -s 9990000 > regions.bed && 56 ## bedtools makewindows -b regions_uniq.bed -w 10000000 -s 9990000 > regions.bed &&
65 57
66 mkdir vcf_output && 58 mkdir vcf_output failed_alleles trace &&
67 mkdir failed_alleles &&
68 mkdir trace &&
69 59
70 ## Finished setting up inputs 60 ## Finished setting up inputs
71 61
72 for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`; 62 for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`;
73 do 63 do
74
75 echo " 64 echo "
76 65
77 ## COMMAND LINE STARTS HERE 66 ## COMMAND LINE STARTS HERE
78 67
79 freebayes 68 freebayes
88 ## Outputs 77 ## Outputs
89 --vcf './vcf_output/part_\$i.vcf' 78 --vcf './vcf_output/part_\$i.vcf'
90 79
91 ##advanced options 80 ##advanced options
92 #if str( $options_type.options_type_selector ) == "simple": 81 #if str( $options_type.options_type_selector ) == "simple":
93 ##do nothing as command like build up to this point is sufficinet for simple diploid calling 82 #pass
94
95 #elif str( $options_type.options_type_selector ) == "simple_w_filters": 83 #elif str( $options_type.options_type_selector ) == "simple_w_filters":
96 --standard-filters 84 --standard-filters
97 --min-coverage '${options_type.min_coverage}' 85 --min-coverage ${options_type.min_coverage}
98 #elif str( $options_type.options_type_selector ) == "naive": 86 #elif str( $options_type.options_type_selector ) == "naive":
99 --haplotype-length 0 87 --haplotype-length 0
100 --min-alternate-count 1 88 --min-alternate-count 1
101 --min-alternate-fraction 0 89 --min-alternate-fraction 0
102 --pooled-continuous 90 --pooled-continuous
106 --min-alternate-count 1 94 --min-alternate-count 1
107 --min-alternate-fraction 0 95 --min-alternate-fraction 0
108 --pooled-continuous 96 --pooled-continuous
109 --report-monomorphic 97 --report-monomorphic
110 --standard-filters 98 --standard-filters
111 --min-coverage '${options_type.min_coverage}' 99 --min-coverage ${options_type.min_coverage}
112
113 ## Command line direct text entry is not allowed at this time for security reasons
114 #elif str( $options_type.options_type_selector ) == "full": 100 #elif str( $options_type.options_type_selector ) == "full":
115 #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set': 101 #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set':
116 ${options_type.optional_inputs.report_monomorphic} 102 ${options_type.optional_inputs.report_monomorphic}
117 103
118 #if $options_type.optional_inputs.output_trace_option: 104 #if $options_type.optional_inputs.output_trace_option:
149 #if str( $options_type.reporting.reporting_selector ) == "set": 135 #if str( $options_type.reporting.reporting_selector ) == "set":
150 --pvar ${options_type.reporting.pvar} 136 --pvar ${options_type.reporting.pvar}
151 #end if 137 #end if
152 ## POPULATION MODEL 138 ## POPULATION MODEL
153 #if str( $options_type.population_model.population_model_selector ) == "set": 139 #if str( $options_type.population_model.population_model_selector ) == "set":
154 --theta '${options_type.population_model.T}' 140 --theta ${options_type.population_model.T}
155 --ploidy '${options_type.population_model.P}' 141 --ploidy ${options_type.population_model.P}
156 ${options_type.population_model.J} 142 ${options_type.population_model.J}
157 ${options_type.population_model.K} 143 ${options_type.population_model.K}
158 #end if 144 #end if
159 145
160 ## REFERENCE ALLELE 146 ## REFERENCE ALLELE
169 ${options_type.allele_scope.i} 155 ${options_type.allele_scope.i}
170 ${options_type.allele_scope.X} 156 ${options_type.allele_scope.X}
171 ${options_type.allele_scope.u} 157 ${options_type.allele_scope.u}
172 ${options_type.allele_scope.no_partial_observations} 158 ${options_type.allele_scope.no_partial_observations}
173 159
174 -n '${options_type.allele_scope.n}' 160 -n ${options_type.allele_scope.n}
175 161
176 --haplotype-length '${options_type.allele_scope.haplotype_length}' 162 --haplotype-length ${options_type.allele_scope.haplotype_length}
177 --min-repeat-size '${options_type.allele_scope.min_repeat_length}' 163 --min-repeat-size ${options_type.allele_scope.min_repeat_length}
178 --min-repeat-entropy '${options_type.allele_scope.min_repeat_entropy}' 164 --min-repeat-entropy ${options_type.allele_scope.min_repeat_entropy}
179 #end if 165 #end if
180 166
181 ## REALIGNMENT 167 ## REALIGNMENT
182 ${options_type.O} 168 ${options_type.O}
183 169
184 ##INPUT FILTERS 170 ##INPUT FILTERS
185 #if str( $options_type.input_filters.input_filters_selector ) == "set": 171 #if str( $options_type.input_filters.input_filters_selector ) == "set":
186 ${options_type.input_filters.use_duplicate_reads} 172 ${options_type.input_filters.use_duplicate_reads}
187 -m '${options_type.input_filters.m}' 173 -m ${options_type.input_filters.m}
188 -q '${options_type.input_filters.q}' 174 -q ${options_type.input_filters.q}
189 -R '${options_type.input_filters.R}' 175 -R ${options_type.input_filters.R}
190 -Y '${options_type.input_filters.Y}' 176 -Y ${options_type.input_filters.Y}
191 -e '${options_type.input_filters.e}' 177 -e ${options_type.input_filters.e}
192 -F '${options_type.input_filters.F}' 178 -F ${options_type.input_filters.F}
193 -C '${options_type.input_filters.C}' 179 -C ${options_type.input_filters.C}
194 -G '${options_type.input_filters.G}' 180 -G ${options_type.input_filters.G}
195 181
196 #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "set": 182 #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "set":
197 -Q '${options_type.input_filters.mismatch_filters.Q}' 183 -Q ${options_type.input_filters.mismatch_filters.Q}
198 -U '${options_type.input_filters.mismatch_filters.U}' 184 #if str($options_type.input_filters.mismatch_filters.U)
199 -z '${options_type.input_filters.mismatch_filters.z}' 185 -U ${options_type.input_filters.mismatch_filters.U}
200 186 #end if
201 --read-snp-limit '${options_type.input_filters.mismatch_filters.read_snp_limit}' 187 -z ${options_type.input_filters.mismatch_filters.z}
202 #end if 188
203 189 --read-snp-limit ${options_type.input_filters.mismatch_filters.read_snp_limit}
204 --min-coverage '${options_type.input_filters.min_coverage}' 190 #end if
205 --min-alternate-qsum "${options_type.input_filters.min_alternate_qsum}" 191
192 --min-coverage ${options_type.input_filters.min_coverage}
193 --min-alternate-qsum ${options_type.input_filters.min_alternate_qsum}
206 #end if 194 #end if
207 195
208 ## POPULATION AND MAPPABILITY PRIORS 196 ## POPULATION AND MAPPABILITY PRIORS
209 #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "set": 197 #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "set":
210 ${options_type.population_mappability_priors.k} 198 ${options_type.population_mappability_priors.k}
215 203
216 ## GENOTYPE LIKELIHOODS 204 ## GENOTYPE LIKELIHOODS
217 #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "set": 205 #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "set":
218 ${$options_type.genotype_likelihoods.experimental_gls} 206 ${$options_type.genotype_likelihoods.experimental_gls}
219 207
220 --base-quality-cap '${$options_type.genotype_likelihoods.base_quality_cap}' 208 --base-quality-cap ${$options_type.genotype_likelihoods.base_quality_cap}
221 --prob-contamination '${$options_type.genotype_likelihoods.prob_contamination}' 209 --prob-contamination ${$options_type.genotype_likelihoods.prob_contamination}
222 #end if 210 #end if
223 211
224 ## ALGORITHMIC FEATURES 212 ## ALGORITHMIC FEATURES
225 #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "set": 213 #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "set":
226 -B '${options_type.algorithmic_features.B}' 214 -B '${options_type.algorithmic_features.B}'
227 -W '${options_type.algorithmic_features.W}' 215 -W '${options_type.algorithmic_features.W}'
228 -D '${options_type.algorithmic_features.D}' 216 -D '${options_type.algorithmic_features.D}'
229 217
230 #if str( $options_type.algorithmic_features.genotype_variant_threshold.genotype_variant_threshold_selector ) == "set": 218 #if str($options_type.algorithmic_features.genotype_variant_threshold)
231 -S '${options_type.algorithmic_features.genotype_variant_threshold.S}' 219 -S ${options_type.algorithmic_features.genotype_variant_threshold}
232 #end if 220 #end if
233 221
234 ${options_type.algorithmic_features.N} 222 ${options_type.algorithmic_features.N}
235 ${options_type.algorithmic_features.j} 223 ${options_type.algorithmic_features.j}
236 ${options_type.algorithmic_features.H} 224 ${options_type.algorithmic_features.H}
237 ${options_type.algorithmic_features.genotype_qualities} 225 ${options_type.algorithmic_features.genotype_qualities}
238 ${options_type.algorithmic_features.report_genotype_likelihood_max} 226 ${options_type.algorithmic_features.report_genotype_likelihood_max}
239 227
240 --genotyping-max-banddepth '${options_type.algorithmic_features.genotyping_max_banddepth}' 228 --genotyping-max-banddepth ${options_type.algorithmic_features.genotyping_max_banddepth}
241 #end if 229 #end if
242 #end if 230 #end if
243 231
244 "; 232 ";
245 done > freebayes_commands.sh && 233 done > freebayes_commands.sh &&
246 234
247 cat freebayes_commands.sh | 235 cat freebayes_commands.sh |
248 parallel --no-notice -j \${GALAXY_SLOTS:-1} && 236 parallel --will-cite -j \${GALAXY_SLOTS:-1} &&
249 237
250 ## make VCF header 238 ## make VCF header
251 grep "^#" "./vcf_output/part_\$i.vcf" > header.txt && 239 grep "^#" "./vcf_output/part_\$i.vcf" > header.txt &&
252 240
253 for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`; 241 for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`;
290 <expand macro="input_bam"> 278 <expand macro="input_bam">
291 <expand macro="validation" /> 279 <expand macro="validation" />
292 </expand> 280 </expand>
293 <param name="ref_file" type="select" label="Using reference genome"> 281 <param name="ref_file" type="select" label="Using reference genome">
294 <options from_data_table="fasta_indexes" /> 282 <options from_data_table="fasta_indexes" />
295 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> 283 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input dataset"/>
296 </param> 284 </param>
297 </when> 285 </when>
298 <when value="history"> <!-- FIX ME!!!! --> 286 <when value="history"> <!-- FIX ME!!!! -->
299 <expand macro="input_bam" /> 287 <expand macro="input_bam" />
300 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" 288 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence"
301 help="You can upload a FASTA sequence to the history and use it as reference" /> 289 help="You can upload a FASTA sequence to the history and use it as reference" />
302 </when> 290 </when>
303 </conditional> 291 </conditional>
304 <conditional name="target_limit_type"> 292 <conditional name="target_limit_type">
305 <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options"> 293 <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options">
306 <option value="do_not_limit" selected="True">Do not limit</option> 294 <option value="do_not_limit" selected="true">Do not limit</option>
307 <option value="limit_by_target_file">Limit by target file</option> 295 <option value="limit_by_target_file">Limit by target file</option>
308 <option value="limit_by_region">Limit to region</option> 296 <option value="limit_by_region">Limit to region</option>
309 </param> 297 </param>
310 <when value="do_not_limit" /><!-- Do nothing here --> 298 <when value="do_not_limit" />
311 <when value="limit_by_target_file"> 299 <when value="limit_by_target_file">
312 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to regions in a file (BED-format)." argument="--targets"/> 300 <param name="input_target_bed" argument="--targets" type="data" format="bed" label="Limit analysis to regions in this BED dataset" />
313 </when> 301 </when>
314 <when value="limit_by_region"> 302 <when value="limit_by_region">
315 <param name="region_chromosome" type="text" label="Region Chromosome" value="" argument="--region"/> <!--only once? --> 303 <param name="region_chromosome" argument="--region" type="text" label="Region Chromosome" value="" /> <!--only once? -->
316 <param name="region_start" type="integer" label="Region Start" value="" /> 304 <param name="region_start" type="integer" label="Region Start" value="" />
317 <param name="region_end" type="integer" label="Region End" value="" /> 305 <param name="region_end" type="integer" label="Region End" value="" />
318 </when> 306 </when>
319 </conditional> 307 </conditional>
320 <conditional name="options_type"> 308 <conditional name="options_type">
321 <param name="options_type_selector" type="select" label="Choose parameter selection level" 309 <param name="options_type_selector" type="select" label="Choose parameter selection level"
322 help="Select how much control over the freebayes run you need" > 310 help="Select how much control over the freebayes run you need">
323 <option value="simple" selected="True">1. Simple diploid calling</option> 311 <option value="simple" selected="true">1. Simple diploid calling</option>
324 <option value="simple_w_filters">2. Simple diploid calling with filtering and coverage</option> 312 <option value="simple_w_filters">2. Simple diploid calling with filtering and coverage</option>
325 <option value="naive">3. Frequency-based pooled calling</option> 313 <option value="naive">3. Frequency-based pooled calling</option>
326 <option value="naive_w_filters">4. Frequency-based pooled calling with filtering and coverage</option> 314 <option value="naive_w_filters">4. Frequency-based pooled calling with filtering and coverage</option>
327 <option value="full">5. Full list of options</option> 315 <option value="full">5. Full list of options</option>
328 </param> 316 </param>
329 <when value="full"> 317 <when value="full">
330 <conditional name="optional_inputs"> 318 <conditional name="optional_inputs">
331 <param name="optional_inputs_selector" type="select" label="Additional inputs" 319 <param name="optional_inputs_selector" type="select" label="Additional inputs"
332 help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --varinat-input, --only-use-input-alleles, --haplotype-basis-alleles, 320 help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --variant-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates">
333 --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates">
334 <option value="do_not_set" selected="true">Do not provide additional inputs</option> 321 <option value="do_not_set" selected="true">Do not provide additional inputs</option>
335 <option value="set">Provide additional inputs</option> 322 <option value="set">Provide additional inputs</option>
336 </param> 323 </param>
337 <when value="set"> 324 <when value="set">
338 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" 325 <param name="output_failed_alleles_option" argument="--failed-alleles" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="false"
339 label="Write out failed alleles file" argument="--failed-alleles" /> 326 label="Write out failed alleles file" />
340 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" 327 <param name="output_trace_option" argument="--trace" type="boolean" truevalue="--trace" falsevalue="" checked="false"
341 label="Write out algorithm trace file" argument="--trace"/> 328 label="Write out algorithm trace file" />
342 <param name="samples" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" 329 <param argument="--samples" type="data" format="txt"
343 help="default=By default FreeBayes will analyze all samples in its input BAM files" argument="--samples"/> 330 label="Limit analysis to samples listed (one per line) in this dataset" optional="true"
344 <param name="populations" type="data" format="txt" label="Populations File" optional="True" 331 help="By default FreeBayes will analyze all samples in its input BAM datasets" />
345 help="Each line of FILE should list a sample and a population which it is part of. The population-based bayesian inference model will 332 <param argument="--populations" type="data" format="txt" optional="true"
346 then be partitioned on the basis of the populations. [default=False]" 333 label="Populations dataset"
347 argument="--populations" /> 334 help="Each line of this dataset should list a sample and a population which it is part of. The population-based bayesian inference model will then be partitioned on the basis of the populations" />
348 <param name="A" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" 335 <param name="A" argument="--cnv-map" type="data" format="bed" optional="true"
349 help="default=copy number is set to as specified by --ploidy. Read a copy number map from the BED file FILE, which has the format: 336 label="Read a copy number map from a BED dataset"
350 reference sequence, start, end, sample name, copy number ... for each region in each sample which does not have the default copy number as set by --ploidy." 337 help="The BED dataset should have the format: 'reference sequence, start, end, sample name, copy number' for each region in each sample which does not have the default copy number as set by --ploidy. If not specified, copy number is set to as specified by --ploidy" />
351 argument="--cnv-map" />
352 <conditional name="input_variant_type"> 338 <conditional name="input_variant_type">
353 <param name="input_variant_type_selector" type="select" label="Provide variants file"> 339 <param name="input_variant_type_selector" type="select" label="Provide variants dataset">
354 <option value="do_not_provide" selected="True">Do not provide</option> 340 <option value="do_not_provide" selected="true">Do not provide</option>
355 <option value="provide_vcf">Provide VCF file</option> 341 <option value="provide_vcf">Provide VCF dataset</option>
356 </param> 342 </param>
357 <when value="do_not_provide" /><!-- Do nothing here --> 343 <when value="do_not_provide" />
358 <when value="provide_vcf"> 344 <when value="provide_vcf">
359 <param name="input_variant_vcf" type="data" format="vcf_bgzip" label="Use variants reported in VCF file as input to the algorithm" argument="--variant-input"> 345 <param name="input_variant_vcf" argument="--variant-input" type="data" format="vcf_bgzip"
346 label="Use variants reported in this VCF dataset as input to the algorithm">
360 <conversion name="Tabixized_input" type="tabix" /> 347 <conversion name="Tabixized_input" type="tabix" />
361 </param> 348 </param>
362 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" 349 <param name="only_use_input_alleles" argument="--only-use-input-alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="false"
363 label="Only provide variant calls and genotype likelihoods for sites in VCF" argument="--only-use-input-alleles" /> 350 label="Only provide variant calls and genotype likelihoods for sites in VCF" />
364 </when> 351 </when>
365 </conditional> 352 </conditional>
366 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" 353 <param name="haplotype_basis_alleles" argument="--haplotype-basis-alleles" type="data" format="vcf" optional="true"
367 argument="--haplotype-basis-alleles" /> 354 label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" />
368 <param name="report_monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="False" 355 <param name="report_monomorphic" argument="--report-monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="false"
369 label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes." 356 label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes" />
370 argument="--report-monomorphic" /> 357 <param name="observation_bias" argument="--observation-bias" type="data" format="tabular" optional="true"
371 <param name="observation_bias" optional="True" type="data" format="tabular" label="Load read length-dependent allele observation biases from" 358 label="Load read length-dependent allele observation biases from"
372 help="The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" 359 help="The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" />
373 argument="--observation-bias" /> 360 <param name="contamination_estimates" argument="--contamination-estimates" type="data" format="tabular" optional="true"
374 <param name="contamination_estimates" optional="True" type="data" format="tabular" label="Upload per-sample estimates of contamination from" 361 label="Upload per-sample estimates of contamination from"
375 help="The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates." 362 help="The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates" />
376 argument="--contamination-estimates" /> 363 </when>
377 </when> 364 <when value="do_not_set" />
378 <when value="do_not_set" /><!-- do nothing -->
379 </conditional> 365 </conditional>
380 366
381 <!-- reporting --> 367 <!-- reporting -->
382 <conditional name="reporting"> 368 <conditional name="reporting">
383 <param name="reporting_selector" type="select" label="Reporting options" help="Sets -P --pvar option"> 369 <param name="reporting_selector" type="select" label="Reporting options" help="Sets -P --pvar option">
384 <option value="do_not_set" selected="True">Use defaults</option> 370 <option value="do_not_set" selected="true">Use defaults</option>
385 <option value="set">Set reporting options</option> 371 <option value="set">Set reporting options</option>
386 </param> 372 </param>
387 <when value="set"> 373 <when value="set">
388 <param name="pvar" type="float" value="0.0" label="Report sites if the probability that there is a polymorphism at the site is greater than" 374 <param argument="--pvar" type="float" value="0.0"
389 help="Note that post-filtering is generally recommended over the use of this parameter. [default=0.0]" 375 label="Report sites if the probability that there is a polymorphism at the site is greater than"
390 argument="--pvar" /> 376 help="Note that post-filtering is generally recommended over the use of this parameter" />
391 </when> 377 </when>
392 <when value="do_not_set" /><!-- do nothing --> 378 <when value="do_not_set" />
393 </conditional> 379 </conditional>
394 380
395 <!-- population model --> 381 <!-- population model -->
396 <conditional name="population_model"> 382 <conditional name="population_model">
397 <param name="population_model_selector" type="select" label="Population model options" 383 <param name="population_model_selector" type="select" label="Population model options"
398 help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options " > 384 help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options">
399 <option value="do_not_set" selected="true">Use defaults</option> 385 <option value="do_not_set" selected="true">Use defaults</option>
400 <option value="set">Set population model options</option> 386 <option value="set">Set population model options</option>
401 </param> 387 </param>
402 <when value="set"> 388 <when value="set">
403 <param name="T" type="float" value="0.001" label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis" 389 <param name="T" argument="--theta" type="float" value="0.001"
404 help="This serves as the single parameter to the Ewens Sampling Formula prior model. [default = 0.001]" argument='--theta'/> 390 label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis"
405 <param name="P" type="integer" value="2" label="Set ploidy for the analysis" 391 help="This serves as the single parameter to the Ewens Sampling Formula prior model" />
406 help="default=2" argument='--ploidy' /> 392 <param name="P" argument="--ploidy" type="integer" value="2"
407 <param name="J" type="boolean" truevalue="-J" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" 393 label="Set ploidy for the analysis" />
408 help="Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy. [default=False]" 394 <param name="J" argument="--pooled-discrete" type="boolean" truevalue="-J" falsevalue="" checked="false"
409 argument="--pooled-discrete"/> 395 label="Assume that samples result from pooled sequencing"
410 <param name="K" type="boolean" truevalue="-K" falsevalue="" checked="False" label="Output all alleles which pass input filters, regardles of genotyping outcome or model" 396 help="Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy" />
411 help="default=False." argument="--poled-continuous" /> 397 <param name="K" argument="--poled-continuous" type="boolean" truevalue="-K" falsevalue="" checked="false"
412 </when> 398 label="Output all alleles which pass input filters, regardles of genotyping outcome or model" />
413 <when value="do_not_set" /><!-- do nothing --> 399 </when>
400 <when value="do_not_set" />
414 </conditional> 401 </conditional>
415 402
416 <!-- reference allele --> 403 <!-- reference allele -->
417 <conditional name="reference_allele"> 404 <conditional name="reference_allele">
418 <param name="reference_allele_selector" type="select" label="Reference allele options" 405 <param name="reference_allele_selector" type="select" label="Reference allele options"
419 help="Sets --use-reference-allele and --reference-quality options."> 406 help="Sets --use-reference-allele and --reference-quality options">
420 <option value="do_not_set" selected="true">Use defaults</option> 407 <option value="do_not_set" selected="true">Use defaults</option>
421 <option value="set">Set reference allele options</option> 408 <option value="set">Set reference allele options</option>
422 </param> 409 </param>
423 <when value="set"> 410 <when value="set">
424 <param name="Z" type="boolean" truevalue="-Z" falsevalue="" checked="False" label="Include the reference allele in the analysis as if it is another sample from the same population" 411 <param name="Z" argument="--use-reference-allele" type="boolean" truevalue="-Z" falsevalue="" checked="false"
425 help="default=False" argument="--use-reference-allele" /> 412 label="Include the reference allele in the analysis as if it is another sample from the same population" />
426 <param name="reference_quality" type="text" value="100,60" label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" 413 <param name="reference_quality" argument="--reference-quality" type="text" value="100,60"
427 help="default=100,60" argument="--reference-quality" /> 414 label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" />
428 </when> 415 </when>
429 <when value="do_not_set" /><!-- do nothing --> 416 <when value="do_not_set" />
430 </conditional> 417 </conditional>
431 418
432 <!-- allelic scope --> 419 <!-- allelic scope -->
433 <conditional name="allele_scope"> 420 <conditional name="allele_scope">
434 <param name="allele_scope_selector" type="select" label="Allelic scope options" 421 <param name="allele_scope_selector" type="select" label="Allelic scope options"
435 help="Sets -I, i, -X, -u, -n, --haplotype-length, --min-repeat-size, --min-repeat-entropy, and --no-partial-observations options."> 422 help="Sets -I, i, -X, -u, -n, --haplotype-length, --min-repeat-size, --min-repeat-entropy, and --no-partial-observations options">
436 <option value="do_not_set" selected="true">Use defaults</option> 423 <option value="do_not_set" selected="true">Use defaults</option>
437 <option value="set">Set alleic scope options</option> 424 <option value="set">Set alleic scope options</option>
438 </param> 425 </param>
439 <when value="set"> 426 <when value="set">
440 <param name="I" type="boolean" truevalue="-I" falsevalue="" checked="False" label="Ignore SNP alleles" 427 <param name="I" argument="--no-snps" type="boolean" truevalue="-I" falsevalue="" checked="false"
441 help="default=False" argument="--no-snps" /> 428 label="Ignore SNP alleles" />
442 <param name="i" type="boolean" truevalue="-i" falsevalue="" checked="False" label="Ignore indels alleles" 429 <param name="i" argument="--no-indels" type="boolean" truevalue="-i" falsevalue="" checked="false"
443 help="default=False" argument="--no-indels" /> 430 label="Ignore indels alleles" />
444 <param name="X" type="boolean" truevalue="-X" falsevalue="" checked="False" label="Ignore multi-nucleotide polymorphisms, MNPs" 431 <param name="X" argument="--no-mnps" type="boolean" truevalue="-X" falsevalue="" checked="false"
445 help="default=False" argument="--no-mnps" /> 432 label="Ignore multi-nucleotide polymorphisms, MNPs" />
446 <param name="u" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)." 433 <param name="u" argument="--no-complex" type="boolean" truevalue="-u" falsevalue="" checked="false"
447 help="default=False" argument="--no-complex" /> 434 label="Ignore complex events (composites of other classes)" />
448 <param name="n" type="integer" value="0" label="How many best SNP alleles to evaluate" 435 <param name="n" argument="--use-best-n-alleles" type="integer" value="0"
449 help="Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all. [default=0 (all)]" 436 label="How many best SNP alleles to evaluate"
450 argument="--use-best-n-alleles" /> 437 help="Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" />
451 <param name="haplotype_length" type="integer" value="3" label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" 438 <param name="haplotype_length" argument="--haplotype-length" type="integer" value="3"
452 help="-E --max-complex-gap --haplotype-length; default=3." /> 439 label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" />
453 <param name="min_repeat_length" type="integer" value="5" label="When assembling observations across repeats, require the total repeat length at least this many bp" 440 <param name="min_repeat_length" argument="--min-repeat-size" type="integer" value="5"
454 help="default=5." argument="--min-repeat-size" /> 441 label="When assembling observations across repeats, require the total repeat length at least this many bp" />
455 <param name="min_repeat_entropy" type="integer" value="0" label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" 442 <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="0"
456 help="default=0 (off)." argument="--min-repeat-entropy" /> 443 label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" />
457 <param name="no_partial_observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="False" 444 <param name="no_partial_observations" argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false"
458 label="Exclude observations which do not fully span the dynamically-determined detection window" 445 label="Exclude observations which do not fully span the dynamically-determined detection window"
459 help="default=use all observations, dividing partial support across matching haplotypes when generating haplotypes." 446 help="By default, FreeBayes uses all observations, dividing partial support across matching haplotypes when generating haplotypes" />
460 argument="--no-partial-observations" /> 447 </when>
461 </when> 448 <when value="do_not_set" />
462 <when value="do_not_set" /><!-- do nothing -->
463 </conditional> 449 </conditional>
464 450
465 <!-- indel realignment --> 451 <!-- indel realignment -->
466 <param name="O" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Turn off left-alignment of indels" 452 <param name="O" argument="--dont-left-align-indels" type="boolean" truevalue="-O" falsevalue="" checked="false"
467 help="default=False (do left align)." argument="--dont-left-align-indels" /> 453 label="Turn off left-alignment of indels" />
468 454
469 <!-- input filters --> 455 <!-- input filters -->
470 <conditional name="input_filters"> 456 <conditional name="input_filters">
471 <param name="input_filters_selector" type="select" label="Input filters" 457 <param name="input_filters_selector" type="select" label="Input filters"
472 help="Sets -4, -m, -q, -R, -Y, -Q, -U, -z, -&#36;, -e, -0, -F, -C, -3, -G, and -&#33; options."> 458 help="Sets -4, -m, -q, -R, -Y, -Q, -U, -z, -&#36;, -e, -0, -F, -C, -3, -G, and -&#33; options">
473 <option value="do_not_set" selected="true">No input filters (default)</option> 459 <option value="do_not_set" selected="true">No input filters (default)</option>
474 <option value="set">Set input filters</option> 460 <option value="set">Set input filters</option>
475 </param> 461 </param>
476 <when value="set"> 462 <when value="set">
477 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" 463 <param name="use_duplicate_reads" argument="--use-duplicate-reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="false"
478 label="Include duplicate-marked alignments in the analysis." 464 label="Include duplicate-marked alignments in the analysis" />
479 help="default=False (exclude duplicates marked as such in alignments)." argument="--use-duplicate-reads" /> 465 <param name="m" argument="--min-mapping-quality" type="integer" value="1"
480 <param name="m" type="integer" value="1" label="Exclude alignments from analysis if they have a mapping quality less than" 466 label="Exclude alignments from analysis if they have a mapping quality less than" />
481 help="default=1" argument="--min-mapping-quality" /> 467 <param name="q" argument="--min-base-quality" type="integer" value="0"
482 <param name="q" type="integer" value="0" label="Exclude alleles from analysis if their supporting base quality less than" 468 label="Exclude alleles from analysis if their supporting base quality less than" />
483 help="default=0" argument="--min-base-quality" /> 469 <param name="R" argument="--min-supporting-allele-qsum" type="integer" value="0"
484 <param name="R" type="integer" value="0" label="Consider any allele in which the sum of qualities of supporting observations is at least" 470 label="Consider any allele in which the sum of qualities of supporting observations is at least" />
485 help="default=0" argument="--min-supporting-allele-qsum" /> 471 <param name="Y" argument="--min-supporting-mapping-qsum" type="integer" value="0"
486 <param name="Y" type="integer" value="0" label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" 472 label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" />
487 help="default=0" argument="--min-supporting-mapping-qsum" />
488 <conditional name="mismatch_filters"> 473 <conditional name="mismatch_filters">
489 <param name="mismatch_filters_selector" type="select" label="Mismatch filters" 474 <param name="mismatch_filters_selector" type="select" label="Mismatch filters"
490 help="Sets -Q, -U, -z, and &#36; options"> 475 help="Sets -Q, -U, -z, and &#36; options">
491 <option value="do_not_set" selected="true">No mismatch filters (default)</option> 476 <option value="do_not_set" selected="true">No mismatch filters (default)</option>
492 <option value="set">Set mismatch filters</option> 477 <option value="set">Set mismatch filters</option>
493 </param> 478 </param>
494 <when value="set"> 479 <when value="set">
495 <param name="Q" type="integer" value="10" 480 <param name="Q" argument="--mismatch-base-quality-threshold" type="integer" value="10"
496 label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" 481 label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" />
497 help="default=10" argument="--mismatch-base-quality-threshold" /> 482 <param name="U" type="integer" argument="--read-mismatch-limit" value="1000" optional="true"
498 <param name="U" type="integer" value="1000" optional="True" 483 label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (option above)"
499 label="Exclude reads with more than N mismatches where each mismatch has base quality >= Q (option above)" 484 help="default=~unbounded" />
500 help="default=~unbound" argument="--read-mismatch-limit" /> 485 <param name="z" argument="--read-max-mismatch-fraction" type="float" value="1.0" min="0.0" max="1.0"
501 <param name="z" type="float" value="1.0" min="0.0" max="1.0" 486 label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (second option above)" />
502 label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= Q (second option above)" 487 <param name="read_snp_limit" argument="--read-snp-limit" type="integer" value="1000"
503 help="default=1.0" argument="--read-max-mismatch-fraction" /> 488 label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold (third option above)"
504 <param name="read_snp_limit" type="integer" 489 help="default=~unbounded" />
505 value="1000" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= Q (third option abobe)"
506 argument="--read-snp-limit" />
507 </when> 490 </when>
508 <when value="do_not_set" /><!-- do nothing --> 491 <when value="do_not_set" />
509 </conditional> 492 </conditional>
510 <param name="e" type="integer" value="1000" label="Exclude reads with more than this number of separate gaps" 493 <param name="e" argument="--read-indel-limit" type="integer" value="1000"
511 help="default=~unbounded" argument="--read-snp-limit" /> 494 label="Exclude reads with more than this number of separate gaps"
512 <param name="standard_filters" type="boolean" truevalue="-0" falsevalue="" checked="False" 495 help="default=~unbounded" />
496 <param name="standard_filters" argument="--standard-filters" type="boolean" truevalue="-0" falsevalue="" checked="false"
513 label="Use stringent input base and mapping quality filters" 497 label="Use stringent input base and mapping quality filters"
514 help="default=False. Equivalent to -m 30 -q 20 -R 0 -S 0" argument="--standard-filters"/> 498 help="Equivalent to -m 30 -q 20 -R 0 -S 0" />
515 <param name="F" type="float" value="0.2" 499 <param name="F" argument="--min-alternate-fraction" type="float" value="0.2"
516 label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" 500 label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" />
517 help="default=0.2" argument="--min-alternate-fraction" /> 501 <param name="C" argument="--min-alternate-count" type="integer" value="2"
518 <param name="C" type="integer" value="2" 502 label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" />
519 label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" 503 <param name="min_alternate_qsum" argument="--min-alternate-qsum" type="integer" value="0"
520 help="default=2" argument="--min-alternate-count" /> 504 label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" />
521 <param name="min_alternate_qsum" type="integer" value="0" 505 <param name="G" argument="--min-alternate-total" type="integer" value="1"
522 label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" 506 label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" />
523 help="default=0" argument="--min-alternate-qsum" />
524 <param name="G" type="integer" value="1"
525 label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis"
526 help="default=1" argument="--min-alternate-total" />
527 <expand macro="par_min_cov" /> 507 <expand macro="par_min_cov" />
528 </when> 508 </when>
529 <when value="do_not_set" /><!-- do nothing --> 509 <when value="do_not_set" />
530 </conditional> 510 </conditional>
531 511
532 <!-- population and mappability priors --> 512 <!-- population and mappability priors -->
533 <conditional name="population_mappability_priors"> 513 <conditional name="population_mappability_priors">
534 <param name="population_mappability_priors_selector" type="select" label="Population and mappability priors" 514 <param name="population_mappability_priors_selector" type="select" label="Population and mappability priors"
535 help="Sets -k, -w, -V, and -a options."> 515 help="Sets -k, -w, -V, and -a options">
536 <option value="do_not_set" selected="true">Use defaults</option> 516 <option value="do_not_set" selected="true">Use defaults</option>
537 <option value="set">Set population and mappability priors</option> 517 <option value="set">Set population and mappability priors</option>
538 </param> 518 </param>
539 <when value="set"> 519 <when value="set">
540 <param name="k" type="boolean" truevalue="-k" falsevalue="" checked="False" label="No population priors" 520 <param name="k" argument="--no-population-priors" type="boolean" truevalue="-k" falsevalue="" checked="false"
541 help="default=False. Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors." 521 label="No population priors"
542 argument="--no-population-priors" /> 522 help="Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors" />
543 <param name="w" type="boolean" truevalue="-w" falsevalue="" checked="False" 523 <param name="w" argument="--hwe-priors-off" type="boolean" truevalue="-w" falsevalue="" checked="false"
544 label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" 524 label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
545 help="default=False" argument="--hwe-priors-off" /> 525 <param name="V" argument="--binomial-obs-priors-off" type="boolean" truevalue="-V" falsevalue="" checked="false"
546 <param name="V" type="boolean" truevalue="-V" falsevalue="" checked="False" label="Disable incorporation of prior expectations about observations" 526 label="Disable incorporation of prior expectations about observations"
547 help="default=False. Uses read placement probability, strand balance probability, and read position (5&#39;'-3&#39;') probability." 527 help="Uses read placement probability, strand balance probability, and read position (5&#39;'-3&#39;') probability" />
548 argument="--binomial-obs-priors-off" /> 528 <param name="a" argument="--allele-balance-priors-off" type="boolean" truevalue="-a" falsevalue="" checked="false"
549 <param name="a" type="boolean" truevalue="-a" falsevalue="" checked="False" 529 label="Disable use of aggregate probability of observation balance between alleles as a component of the priors" />
550 label="Disable use of aggregate probability of observation balance between alleles as a component of the priors" 530 </when>
551 help="default=False" 531 <when value="do_not_set" />
552 argument="--allele-balance-priors-off" />
553 </when>
554 <when value="do_not_set" /><!-- do nothing -->
555 </conditional> 532 </conditional>
556 533
557 <!-- genotype likelihoods --> 534 <!-- genotype likelihoods -->
558 <conditional name="genotype_likelihoods"> 535 <conditional name="genotype_likelihoods">
559 <param name="genotype_likelihoods_selector" type="select" label="Genotype likelihood options" 536 <param name="genotype_likelihoods_selector" type="select" label="Genotype likelihood options"
560 help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options."> 537 help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options">
561 <option value="do_not_set" selected="true">Use defaults</option> 538 <option value="do_not_set" selected="true">Use defaults</option>
562 <option value="set">Set genotype likelihood options</option> 539 <option value="set">Set genotype likelihood options</option>
563 </param> 540 </param>
564 <when value="set"> 541 <when value="set">
565 <param name="base_quality_cap" type="integer" value="0" label="Limit estimated observation quality by capping base quality at" 542 <param name="base_quality_cap" argument="--base-quality-cap" type="integer" value="0"
566 argument="--base-quality-cap" /> 543 label="Limit estimated observation quality by capping base quality at" />
567 <param name="experimental_gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="False" 544 <param name="experimental_gls" argument="--experimental-gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="false"
568 label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual" 545 label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual"
569 help="Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples." 546 help="Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples" />
570 argument="--experimental-gls" /> 547 <param name="prob_contamination" argument="--prob-contamination" type="float" value="10e-9"
571 <param name="prob_contamination" type="float" value="10e-9" label="An estimate of contamination to use for all samples" 548 label="An estimate of contamination to use for all samples" />
572 help="default=10e-9." argument="--prob-contamination" /> 549 </when>
573 </when> 550 <when value="do_not_set" />
574 <when value="do_not_set" /><!-- do nothing -->
575 </conditional> 551 </conditional>
576 552
577 <!-- algorithmic features --> 553 <!-- algorithmic features -->
578 <conditional name="algorithmic_features"> 554 <conditional name="algorithmic_features">
579 <param name="algorithmic_features_selector" type="select" label="Algorithmic features" 555 <param name="algorithmic_features_selector" type="select" label="Algorithmic features"
580 help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options"> 556 help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options">
581 <option value="do_not_set" selected="true">Use defaults</option> 557 <option value="do_not_set" selected="true">Use defaults</option>
582 <option value="set">Set algorithmic features</option> 558 <option value="set">Set algorithmic features</option>
583 </param> 559 </param>
584 <when value="set"> 560 <when value="set">
585 <param name="report_genotype_likelihood_max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="False" 561 <param name="report_genotype_likelihood_max" argument="--report-genotype-likelihood-max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="false"
586 label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods." 562 label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods" />
587 help="default=False" argument="--report-genotype-likelihood-max" /> 563 <param name="B" argument="--genotyping-max-iterations" type="integer" value="1000"
588 <param name="B" type="integer" value="1000" label="Iterate no more than N times during genotyping step" 564 label="Iterate no more than N times during genotyping step" />
589 help="default=1000." argument="--genotyping-max-iterations" /> 565 <param name="genotyping_max_banddepth" argument="--genotyping-max-banddepth" type="integer" value="6"
590 <param name="genotyping_max_banddepth" type="integer" value="6" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" 566 label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" />
591 help="default=6" argument="--genotyping-max-banddepth" /> 567 <param name="W" argument="--posterior-integration-limits" type="text" value="1,3"
592 <param name="W" type="text" value="1,3" 568 label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" />
593 label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" 569 <param name="N" argument="--exclude-unobserved-genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="false"
594 help="default=1,3" argument="--posterior-integration-limits" /> 570 label="Skip sample genotypings for which the sample has no supporting reads" />
595 <param name="N" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" 571 <param name="genotype_variant_threshold" argument="--genotype-variant-threshold" type="integer" value="" optional="true"
596 label="Skip sample genotypings for which the sample has no supporting reads" 572 label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample"
597 help="default=False" argument="--exclude-unobserved-genotypes" /> 573 help="default=~unbounded" />
598 <conditional name="genotype_variant_threshold"> 574 <param name="j" argument="--use-mapping-quality" type="boolean" truevalue="-j" falsevalue="" checked="false"
599 <param name="genotype_variant_threshold_selector" type="select" 575 label="Use mapping quality of alleles when calculating data likelihoods" />
600 label="Limit posterior integration" argument="--genotype-variant-threshold"> 576 <param name="H" argument="--harmonic-indel-quality" type="boolean" truevalue="-H" falsevalue="" checked="false"
601 <option value="do_not_set" selected="true">Do not limit posterior integration</option>
602 <option value="set">Set posterior integration limit</option>
603 </param>
604 <when value="do_not_set" /><!-- do nothing -->
605 <when value="set">
606 <param name="S" value="" type="integer"
607 label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample."
608 help="default=~unbounded" argument="--genotype-variant-threshold" />
609 </when>
610 </conditional>
611 <param name="j" type="boolean" truevalue="-j" falsevalue="" checked="False"
612 label="Use mapping quality of alleles when calculating data likelihoods"
613 help="default=False" argument="--use-mapping-quality" />
614 <param name="H" type="boolean" truevalue="-H" falsevalue="" checked="False"
615 label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel" 577 label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel"
616 help="default=use a minimum Base Quality in flanking sequence." argument="--harmonic-indel-quality" /> 578 help="By default, FreeBayes uses a minimum Base Quality in flanking sequence" />
617 <param name="D" type="float" value="0.9" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" 579 <param name="D" argument="--read-dependence-factor" type="float" value="0.9"
618 help="default=0.9." argument="--read-dependence-factor" /> 580 label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" />
619 <param name="genotype_qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="False" 581 <param name="genotype_qualities" argument="--genotype-qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="false"
620 label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" 582 label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" />
621 help="-= --genotype-qualities; default=False " /> 583 </when>
622 </when> 584 <when value="do_not_set" />
623 <when value="do_not_set" /><!-- do nothing --> 585 </conditional>
624 </conditional> 586 </when>
625 </when> 587 <when value="simple" />
626 <when value="simple" /><!-- do nothing -->
627 <when value="simple_w_filters"> 588 <when value="simple_w_filters">
628 <!-- add standard-filters to command line --> 589 <!-- add standard-filters to command line -->
629 <expand macro="par_min_cov" /> 590 <expand macro="par_min_cov" />
630 </when> 591 </when>
631 <when value="naive"> 592 <when value="naive">
632 <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic --> 593 <!-- build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic -->
633 </when> 594 </when>
634 <when value="naive_w_filters"> 595 <when value="naive_w_filters">
635 <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic standard-filters--> 596 <!-- build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic standard-filters-->
636 <expand macro="par_min_cov" /> 597 <expand macro="par_min_cov" />
637 </when> 598 </when>
638 </conditional> 599 </conditional>
639 </inputs> 600 </inputs>
640 <outputs> 601 <outputs>
693 654
694 ------ 655 ------
695 656
696 **Description** 657 **Description**
697 658
698 Privided BAM file(s) and a reference. FreeBayes will provide VCF output on standard out describing SNPs, indels, and complex variants in samples in the input alignments. 659 Provided some BAM dataset(s) and a reference sequence, FreeBayes will produce a VCF dataset describing SNPs, indels, and complex variants in samples in the input alignments.
699 660
700 By default, FreeBayes will consider variants supported by at least 2 observations in a single sample (-C) and also by at least 20% of the reads from a single sample (-F). These settings are suitable to low to high depth sequencing in haploid and diploid samples, but users working with polyploid or pooled samples may wish to adjust them depending on the characteristics of their sequencing data. 661 By default, FreeBayes will consider variants supported by at least 2 observations in a single sample (-C) and also by at least 20% of the reads from a single sample (-F). These settings are suitable to low to high depth sequencing in haploid and diploid samples, but users working with polyploid or pooled samples may wish to adjust them depending on the characteristics of their sequencing data.
701 662
702 FreeBayes is capable of calling variant haplotypes shorter than a read length where multiple polymorphisms segregate on the same read. The maximum distance between polymorphisms phased in this way is determined by the --max-complex-gap, which defaults to 3bp. In practice, this can comfortably be set to half the read length. 663 FreeBayes is capable of calling variant haplotypes shorter than a read length where multiple polymorphisms segregate on the same read. The maximum distance between polymorphisms phased in this way is determined by the --max-complex-gap, which defaults to 3bp. In practice, this can comfortably be set to half the read length.
703 664
707 668
708 ------- 669 -------
709 670
710 **Galaxy-specific options** 671 **Galaxy-specific options**
711 672
712 Galaxy allows five levels of control over FreeBayes options provided by **Choose parameter selection level** menu option. These are: 673 Galaxy allows five levels of control over FreeBayes options, provided by the **Choose parameter selection level** menu option. These are:
713 674
714 1. *Simple diploid calling*: The simples possible FreeBayes application. Equvalent of using FreeBayes with only a BAM input and no other parameter options. 675 1. *Simple diploid calling*: The simplest possible FreeBayes application. Equivalent to using FreeBayes with only a BAM input and no other parameter options.
715 2. *Simple diploid calling with filtering and coverage*: Same as #1 plus two additional options: -0 (standard filters: --min-mapping-quality 30 --min-base-quality 20 --min-supporting-allele-qsum 0 --genotype-varinat-threshold 0) and --min-coverage. 676 2. *Simple diploid calling with filtering and coverage*: Same as #1 plus two additional options: -0 (standard filters: --min-mapping-quality 30 --min-base-quality 20 --min-supporting-allele-qsum 0 --genotype-variant-threshold 0) and --min-coverage.
716 3. *Frequency-based pooled calling*: This is equivalent to using FreeBayes with the following options: --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0 --pooled-continuous --report-monomorphic. This is the best choice for calling varinats in mixtures such as viral, bacterial, or organellar genomes. 677 3. *Frequency-based pooled calling*: This is equivalent to using FreeBayes with the following options: --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0 --pooled-continuous --report-monomorphic. This is the best choice for calling variants in mixtures such as viral, bacterial, or organellar genomes.
717 4. *Frequency-based pooled calling with filtering and coverage*: Same as #3 but adds -0 and --min-coverage like in #2. 678 4. *Frequency-based pooled calling with filtering and coverage*: Same as #3 but adds -0 and --min-coverage like in #2.
718 5. *Complete list of all options*: Gives you full control by exposing all FreeBayes options as Galaxy widgets. 679 5. *Complete list of all options*: Gives you full control by exposing all FreeBayes options as Galaxy parameters.
719
720 -----
721
722 **FreeBayes options**
723
724 .. class:: infomark
725
726 Note that each Galaxy parameter widget corresponding to command line flags listed below:
727
728 Input and output::
729
730 -t --targets FILE
731 Limit analysis to targets listed in the BED-format FILE.
732 -r --region chrom:start_position-end_position
733 Limit analysis to the specified region, 0-base coordinates,
734 end_position included. Either '-' or '..' maybe used as a separator.
735 -s --samples FILE
736 Limit analysis to samples listed (one per line) in the FILE.
737 By default FreeBayes will analyze all samples in its input
738 BAM files.
739 --populations FILE
740 Each line of FILE should list a sample and a population which
741 it is part of. The population-based bayesian inference model
742 will then be partitioned on the basis of the populations.
743 -A --cnv-map FILE
744 Read a copy number map from the BED file FILE, which has
745 the format:
746 reference sequence, start, end, sample name, copy number
747 ... for each region in each sample which does not have the
748 default copy number as set by --ploidy.
749 --trace FILE Output an algorithmic trace to FILE.
750 --failed-alleles FILE
751 Write a BED file of the analyzed positions which do not
752 pass --pvar to FILE.
753 -@ --variant-input VCF
754 Use variants reported in VCF file as input to the algorithm.
755 Variants in this file will be treated as putative variants
756 even if there is not enough support in the data to pass
757 input filters.
758 -l --only-use-input-alleles
759 Only provide variant calls and genotype likelihoods for sites
760 and alleles which are provided in the VCF input, and provide
761 output in the VCF for all input alleles, not just those which
762 have support in the data.
763 --haplotype-basis-alleles VCF
764 When specified, only variant alleles provided in this input
765 VCF will be used for the construction of complex or haplotype
766 alleles.
767 --report-all-haplotype-alleles
768 At sites where genotypes are made over haplotype alleles,
769 provide information about all alleles in output, not only
770 those which are called.
771 --report-monomorphic
772 Report even loci which appear to be monomorphic, and report all
773 considered alleles, even those which are not in called genotypes.
774 Loci which do not have any potential alternates have '.' for ALT.
775
776 Reporting::
777
778 -P --pvar N Report sites if the probability that there is a polymorphism
779 at the site is greater than N. default: 0.0. Note that post-
780 filtering is generally recommended over the use of this parameter.
781
782 Population model::
783
784 -T --theta N The expected mutation rate or pairwise nucleotide diversity
785 among the population under analysis. This serves as the
786 single parameter to the Ewens Sampling Formula prior model
787 default: 0.001
788 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
789 -J --pooled-discrete
790 Assume that samples result from pooled sequencing.
791 Model pooled samples using discrete genotypes across pools.
792 When using this flag, set --ploidy to the number of
793 alleles in each sample or use the --cnv-map to define
794 per-sample ploidy.
795 -K --pooled-continuous
796 Output all alleles which pass input filters, regardles of
797 genotyping outcome or model.
798
799 Reference allele::
800
801 -Z --use-reference-allele
802 This flag includes the reference allele in the analysis as
803 if it is another sample from the same population.
804 --reference-quality MQ,BQ
805 Assign mapping quality of MQ to the reference allele at each
806 site and base quality of BQ. default: 100,60
807
808 Allele scope::
809
810 -I --no-snps Ignore SNP alleles.
811 -i --no-indels Ignore insertion and deletion alleles.
812 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
813 -u --no-complex Ignore complex events (composites of other classes).
814 -n --use-best-n-alleles N
815 Evaluate only the best N SNP alleles, ranked by sum of
816 supporting quality scores. (Set to 0 to use all; default: all)
817 -E --max-complex-gap N
818 --haplotype-length N
819 Allow haplotype calls with contiguous embedded matches of up
820 to this length. (default: 3)
821 --min-repeat-size N
822 When assembling observations across repeats, require the total repeat
823 length at least this many bp. (default: 5)
824 --min-repeat-entropy N
825 To detect interrupted repeats, build across sequence until it has
826 entropy > N bits per bp. (default: 0, off)
827 --no-partial-observations
828 Exclude observations which do not fully span the dynamically-determined
829 detection window. (default, use all observations, dividing partial
830 support across matching haplotypes when generating haplotypes.)
831
832 Indel realignment::
833
834 -O --dont-left-align-indels
835 Turn off left-alignment of indels, which is enabled by default.
836
837 Input filters::
838
839 -4 --use-duplicate-reads
840 Include duplicate-marked alignments in the analysis.
841 default: exclude duplicates marked as such in alignments
842 -m --min-mapping-quality Q
843 Exclude alignments from analysis if they have a mapping
844 quality less than Q. default: 1
845 -q --min-base-quality Q
846 Exclude alleles from analysis if their supporting base
847 quality is less than Q. default: 0
848 -R --min-supporting-allele-qsum Q
849 Consider any allele in which the sum of qualities of supporting
850 observations is at least Q. default: 0
851 -Y --min-supporting-mapping-qsum Q
852 Consider any allele in which and the sum of mapping qualities of
853 supporting reads is at least Q. default: 0
854 -Q --mismatch-base-quality-threshold Q
855 Count mismatches toward --read-mismatch-limit if the base
856 quality of the mismatch is >= Q. default: 10
857 -U --read-mismatch-limit N
858 Exclude reads with more than N mismatches where each mismatch
859 has base quality >= mismatch-base-quality-threshold.
860 default: ~unbounded
861 -z --read-max-mismatch-fraction N
862 Exclude reads with more than N [0,1] fraction of mismatches where
863 each mismatch has base quality >= mismatch-base-quality-threshold
864 default: 1.0
865 -$ --read-snp-limit N
866 Exclude reads with more than N base mismatches, ignoring gaps
867 with quality >= mismatch-base-quality-threshold.
868 default: ~unbounded
869 -e --read-indel-limit N
870 Exclude reads with more than N separate gaps.
871 default: ~unbounded
872 -0 --standard-filters Use stringent input base and mapping quality filters
873 Equivalent to -m 30 -q 20 -R 0 -S 0
874 -F --min-alternate-fraction N
875 Require at least this fraction of observations supporting
876 an alternate allele within a single individual in the
877 in order to evaluate the position. default: 0.2
878 -C --min-alternate-count N
879 Require at least this count of observations supporting
880 an alternate allele within a single individual in order
881 to evaluate the position. default: 2
882 -3 --min-alternate-qsum N
883 Require at least this sum of quality of observations supporting
884 an alternate allele within a single individual in order
885 to evaluate the position. default: 0
886 -G --min-alternate-total N
887 Require at least this count of observations supporting
888 an alternate allele within the total population in order
889 to use the allele in analysis. default: 1
890 -! --min-coverage N
891 Require at least this coverage to process a site. default: 0
892
893 Population priors::
894
895 -k --no-population-priors
896 Equivalent to --pooled-discrete --hwe-priors-off and removal of
897 Ewens Sampling Formula component of priors.
898
899 Mappability priors::
900
901 -w --hwe-priors-off
902 Disable estimation of the probability of the combination
903 arising under HWE given the allele frequency as estimated
904 by observation frequency.
905 -V --binomial-obs-priors-off
906 Disable incorporation of prior expectations about observations.
907 Uses read placement probability, strand balance probability,
908 and read position (5'-3') probability.
909 -a --allele-balance-priors-off
910 Disable use of aggregate probability of observation balance between alleles
911 as a component of the priors.
912
913 Genotype likelihoods::
914
915 --observation-bias FILE
916 Read length-dependent allele observation biases from FILE.
917 The format is [length] [alignment efficiency relative to reference]
918 where the efficiency is 1 if there is no relative observation bias.
919 --base-quality-cap Q
920 Limit estimated observation quality by capping base quality at Q.
921 --experimental-gls
922 Generate genotype likelihoods using 'effective base depth' metric
923 qual = 1-BaseQual * 1-MapQual. Incorporate partial observations.
924 This is the default when contamination estimates are provided.
925 Optimized for diploid samples.
926 --prob-contamination F
927 An estimate of contamination to use for all samples. default: 10e-9
928 --contamination-estimates FILE
929 A file containing per-sample estimates of contamination, such as
930 those generated by VerifyBamID. The format should be:
931 sample p(read=R|genotype=AR) p(read=A|genotype=AA)
932 Sample '*' can be used to set default contamination estimates.
933
934 Algorithmic features::
935
936 --report-genotype-likelihood-max
937 Report genotypes using the maximum-likelihood estimate provided
938 from genotype likelihoods.
939 -B --genotyping-max-iterations N
940 Iterate no more than N times during genotyping step. default: 1000.
941 --genotyping-max-banddepth N
942 Integrate no deeper than the Nth best genotype by likelihood when
943 genotyping. default: 6.
944 -W --posterior-integration-limits N,M
945 Integrate all genotype combinations in our posterior space
946 which include no more than N samples with their Mth best
947 data likelihood. default: 1,3.
948 -N --exclude-unobserved-genotypes
949 Skip sample genotypings for which the sample has no supporting reads.
950 -S --genotype-variant-threshold N
951 Limit posterior integration to samples where the second-best
952 genotype likelihood is no more than log(N) from the highest
953 genotype likelihood for the sample. default: ~unbounded
954 -j --use-mapping-quality
955 Use mapping quality of alleles when calculating data likelihoods.
956 -H --harmonic-indel-quality
957 Use a weighted sum of base qualities around an indel, scaled by the
958 distance from the indel. By default use a minimum BQ in flanking sequence.
959 -D --read-dependence-factor N
960 Incorporate non-independence of reads by scaling successive
961 observations by this factor during data likelihood
962 calculations. default: 0.9
963 -= --genotype-qualities
964 Calculate the marginal probability of genotypes and report as GQ in
965 each sample field in the VCF output.
966
967 680
968 ------ 681 ------
969 682
970 **Acknowledgments** 683 **Acknowledgments**
971 684
972 The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko. 685 The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko.
973 TNG was developed by Bjoern Gruening 686 TNG was developed by Bjoern Gruening.
974 </help> 687 </help>
975 <expand macro="citations" /> 688 <expand macro="citations" />
976 </tool> 689 </tool>