Mercurial > repos > devteam > freebayes
diff freebayes.xml @ 23:52aed7d9ed2b draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/freebayes commit cf4a70e780f104bc724323912b3b87fb37f887dd
author | devteam |
---|---|
date | Sun, 25 Sep 2016 09:48:24 -0400 |
parents | 99684adf84de |
children | da6e10dee68b |
line wrap: on
line diff
--- a/freebayes.xml Fri Oct 09 17:20:08 2015 -0400 +++ b/freebayes.xml Sun Sep 25 09:48:24 2016 -0400 @@ -1,537 +1,674 @@ -<?xml version="1.0"?> -<tool id="freebayes" name="FreeBayes" version="0.4.1"> - <requirements> - <requirement type="package" version="0_9_20_b040236">freebayes</requirement> - <requirement type="package" version="0.1.18">samtools</requirement> - </requirements> - <description> - bayesian genetic variant detector</description> - <command> +<tool id="freebayes" name="FreeBayes" version="1.0.2.29--1"> + <description> - bayesian genetic variant detector</description> + <requirements> + <requirement type="package" version="1.0.2.29">freebayes</requirement> + <requirement type="package" version="0.1.19">samtools</requirement> + <requirement type="package" version="4.1.3">gawk</requirement> + <requirement type="package" version="20160622">parallel</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> +<![CDATA[ ##set up input files #set $reference_fasta_filename = "localref.fa" - + #if str( $reference_source.reference_source_selector ) == "history": - ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && - samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 && + ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && + samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 && #else: #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) #end if - + #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): - ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" && - ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && + ln -s "${input_bam}" "b_${bam_count}.bam" && + ln -s "${input_bam.metadata.bam_index}" "b_${bam_count}.bam.bai" && #end for - + ## Tabixize optional input_varinat_vcf file (for --variant-input option) - - #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and $options_type.optional_inputs.optional_inputs_selector and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf": - ln -s "${options_type.optional_inputs.input_variant_type.input_variant_vcf}" "input_variant_vcf.vcf.gz" && - ln -s "${Tabixized_input}" "input_variant_vcf.vcf.gz.tbi" && + #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and str( $options_type.optional_inputs.optional_inputs_selector ) == 'set' and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf": + ln -s "${options_type.optional_inputs.input_variant_type.input_variant_vcf}" "input_variant_vcf.vcf.gz" && + ln -s "${Tabixized_input}" "input_variant_vcf.vcf.gz.tbi" && #end if - - ##finished setting up inputs - - ##COMMAND LINE STARTS HERE - - freebayes + #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): - --bam "localbam_${bam_count}.bam" + samtools view -H b_${bam_count}.bam | grep "^@SQ" | cut -f 2- | awk '{ gsub("^SN:","",$1); gsub("^LN:","",$2); print $1"\t0\t"$2; }' >> regions_all.bed && #end for - --fasta-reference "${reference_fasta_filename}" - - ##outputs - --vcf "${output_vcf}" - + + sort -u regions_all.bed > regions_uniq.bed && + ## split into even small chunks, this has some disatvantages and will not be used for the moment + ## bedtools makewindows -b regions_uniq.bed -w 10000000 -s 9990000 > regions.bed && + + mkdir vcf_output && + mkdir failed_alleles && + mkdir trace && + + ## Finished setting up inputs + + for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`; + do + + echo " + + ## COMMAND LINE STARTS HERE + + freebayes + + --region '\$i' + + #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): + --bam 'b_${bam_count}.bam' + #end for + --fasta-reference '${reference_fasta_filename}' + + ## Outputs + --vcf './vcf_output/part_\$i.vcf' + #if str( $target_limit_type.target_limit_type_selector ) == "limit_by_target_file": - --targets "${target_limit_type.input_target_bed}" + --targets '${target_limit_type.input_target_bed}' #elif str( $target_limit_type.target_limit_type_selector ) == "limit_by_region": - --region "${target_limit_type.region_chromosome}:${target_limit_type.region_start}..${target_limit_type.region_end}" + --region '${target_limit_type.region_chromosome}:${target_limit_type.region_start}..${target_limit_type.region_end}' #end if - + ##advanced options #if str( $options_type.options_type_selector ) == "simple": - ##do nothing as command like build up to this point is sufficinet for simple diploid calling - + ##do nothing as command like build up to this point is sufficinet for simple diploid calling + #elif str( $options_type.options_type_selector ) == "simple_w_filters": - - --standard-filters - --min-coverage "${options_type.min_coverage}" - + --standard-filters + --min-coverage '${options_type.min_coverage}' #elif str( $options_type.options_type_selector ) == "naive": - - --haplotype-length 0 - --min-alternate-count 1 - --min-alternate-fraction 0 - --pooled-continuous - --report-monomorphic - + --haplotype-length 0 + --min-alternate-count 1 + --min-alternate-fraction 0 + --pooled-continuous + --report-monomorphic #elif str( $options_type.options_type_selector ) == "naive_w_filters": + --haplotype-length 0 + --min-alternate-count 1 + --min-alternate-fraction 0 + --pooled-continuous + --report-monomorphic + --standard-filters + --min-coverage '${options_type.min_coverage}' - --haplotype-length 0 - --min-alternate-count 1 - --min-alternate-fraction 0 - --pooled-continuous - --report-monomorphic - --standard-filters - --min-coverage "${options_type.min_coverage}" - -## Command line direct text entry is not allowed at this time for security reasons - + ## Command line direct text entry is not allowed at this time for security reasons #elif str( $options_type.options_type_selector ) == "full": - - #if $options_type.optional_inputs.optional_inputs_selector: - - ${options_type.optional_inputs.report_monomorphic} - - #if $options_type.optional_inputs.output_trace_option: - --trace "${output_trace}" - #end if - - #if $options_type.optional_inputs.output_failed_alleles_option: - --failed-alleles "${output_failed_alleles_bed}" - #end if - - #if $options_type.optional_inputs.samples: - --samples "${options_type.optional_inputs.samples}" - #end if - - #if $options_type.optional_inputs.populations: - --populations "${options_type.optional_inputs.populations}" - #end if - - #if $options_type.optional_inputs.A: - --cnv-map "${options_type.optional_inputs.A}" - #end if - - #if str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf": - --variant-input "input_variant_vcf.vcf.gz" ## input_variant_vcf.vcf.gz is symlinked to a galaxy-generated dataset in "Tabixize optional input_varinat_vcf file" section of the command line above - ${options_type.optional_inputs.input_variant_type.only_use_input_alleles} - #end if - - #if $options_type.optional_inputs.haplotype_basis_alleles: - --haplotype-basis-alleles "${options_type.optional_inputs.haplotype_basis_alleles}" - #end if - - #if $options_type.optional_inputs.observation_bias: - --observation-bias "${options_type.optional_inputs.observation_bias}" - #end if - - #if $options_type.optional_inputs.contamination_estimates: - --contamination-estimates "${options_type.optional_inputs.contamination_estimates}" - #end if - + #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set': + ${options_type.optional_inputs.report_monomorphic} + + #if $options_type.optional_inputs.output_trace_option: + --trace ./trace/part_'\$i'.txt + #end if + #if $options_type.optional_inputs.output_failed_alleles_option: + --failed-alleles ./failed_alleles/part_'\$i'.bed + #end if + #if $options_type.optional_inputs.samples: + --samples '${options_type.optional_inputs.samples}' + #end if + #if $options_type.optional_inputs.populations: + --populations '${options_type.optional_inputs.populations}' + #end if + #if $options_type.optional_inputs.A: + --cnv-map '${options_type.optional_inputs.A}' + #end if + #if str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf": + --variant-input 'input_variant_vcf.vcf.gz' ## input_variant_vcf.vcf.gz is symlinked to a galaxy-generated dataset in "Tabixize optional input_varinat_vcf file" section of the command line above + ${options_type.optional_inputs.input_variant_type.only_use_input_alleles} + #end if + #if $options_type.optional_inputs.haplotype_basis_alleles: + --haplotype-basis-alleles '${options_type.optional_inputs.haplotype_basis_alleles}' + #end if + #if $options_type.optional_inputs.observation_bias: + --observation-bias '${options_type.optional_inputs.observation_bias}' + #end if + #if $options_type.optional_inputs.contamination_estimates: + --contamination-estimates '${options_type.optional_inputs.contamination_estimates}' + #end if #end if - -## REPORTING - - #if str( $options_type.reporting.reporting_selector ) == "True": + ## REPORTING + #if str( $options_type.reporting.reporting_selector ) == "set": --pvar ${options_type.reporting.pvar} #end if - -## POPULATION MODEL - - #if str( $options_type.population_model.population_model_selector ) == "True": - --theta "${options_type.population_model.T}" - --ploidy "${options_type.population_model.P}" + ## POPULATION MODEL + #if str( $options_type.population_model.population_model_selector ) == "set": + --theta '${options_type.population_model.T}' + --ploidy '${options_type.population_model.P}' ${options_type.population_model.J} ${options_type.population_model.K} - #end if - -## REFERENCE ALLELE - - #if str( $options_type.reference_allele.reference_allele_selector ) == "True": + + ## REFERENCE ALLELE + #if str( $options_type.reference_allele.reference_allele_selector ) == "set": ${options_type.reference_allele.Z} - --reference-quality "${options_type.reference_allele.reference_quality}" + --reference-quality '${options_type.reference_allele.reference_quality}' #end if - -## ALLELE SCOPE - - #if str( $options_type.allele_scope.allele_scope_selector ) == "True": + + ## ALLELE SCOPE + #if str( $options_type.allele_scope.allele_scope_selector ) == "set": ${options_type.allele_scope.I} ${options_type.allele_scope.i} ${options_type.allele_scope.X} ${options_type.allele_scope.u} - -n "${options_type.allele_scope.n}" - --haplotype-length "${options_type.allele_scope.haplotype_length}" - --min-repeat-size "${options_type.allele_scope.min_repeat_length}" - --min-repeat-entropy "${options_type.allele_scope.min_repeat_entropy}" + -n '${options_type.allele_scope.n}' + --haplotype-length '${options_type.allele_scope.haplotype_length}' + --min-repeat-size '${options_type.allele_scope.min_repeat_length}' + --min-repeat-entropy '${options_type.allele_scope.min_repeat_entropy}' ${options_type.allele_scope.no_partial_observations} #end if - -## REALIGNMENT - + + ## REALIGNMENT ${options_type.O} - -##INPUT FILTERS - #if str( $options_type.input_filters.input_filters_selector ) == "True": + ##INPUT FILTERS + #if str( $options_type.input_filters.input_filters_selector ) == "set": ${options_type.input_filters.use_duplicate_reads} - -m "${options_type.input_filters.m}" - -q "${options_type.input_filters.q}" - -R "${options_type.input_filters.R}" - -Y "${options_type.input_filters.Y}" - - #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "True": - -Q "${options_type.input_filters.mismatch_filters.Q}" - -U "${options_type.input_filters.mismatch_filters.U}" - -z "${options_type.input_filters.mismatch_filters.z}" - --read-snp-limit "${options_type.input_filters.mismatch_filters.read_snp_limit}" + -m '${options_type.input_filters.m}' + -q '${options_type.input_filters.q}' + -R '${options_type.input_filters.R}' + -Y '${options_type.input_filters.Y}' + + #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "set": + -Q '${options_type.input_filters.mismatch_filters.Q}' + -U '${options_type.input_filters.mismatch_filters.U}' + -z '${options_type.input_filters.mismatch_filters.z}' + --read-snp-limit '${options_type.input_filters.mismatch_filters.read_snp_limit}' #end if - - -e "${options_type.input_filters.e}" - -F "${options_type.input_filters.F}" - -C "${options_type.input_filters.C}" + + -e '${options_type.input_filters.e}' + -F '${options_type.input_filters.F}' + -C '${options_type.input_filters.C}' --min-alternate-qsum "${options_type.input_filters.min_alternate_qsum}" - -G "${options_type.input_filters.G}" - --min-coverage "${options_type.input_filters.min_coverage}" + -G '${options_type.input_filters.G}' + --min-coverage '${options_type.input_filters.min_coverage}' #end if - -## POPULATION AND MAPPABILITY PRIORS - - #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "True": + + ## POPULATION AND MAPPABILITY PRIORS + #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "set": ${options_type.population_mappability_priors.k} ${options_type.population_mappability_priors.w} ${options_type.population_mappability_priors.V} ${options_type.population_mappability_priors.a} #end if - -## GENOTYPE LIKELIHOODS - - #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "True": - --base-quality-cap "${$options_type.genotype_likelihoods.base_quality_cap}" + + ## GENOTYPE LIKELIHOODS + #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "set": + --base-quality-cap '${$options_type.genotype_likelihoods.base_quality_cap}' ${$options_type.genotype_likelihoods.experimental_gls} - --prob-contamination "${$options_type.genotype_likelihoods.prob_contamination}" + --prob-contamination '${$options_type.genotype_likelihoods.prob_contamination}' #end if - -## ALGORITHMIC FEATURES - - #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "True": + + ## ALGORITHMIC FEATURES + #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "set": ${options_type.algorithmic_features.report_genotype_likelihood_max} - -B "${options_type.algorithmic_features.B}" - --genotyping-max-banddepth "${options_type.algorithmic_features.genotyping_max_banddepth}" - -W "${options_type.algorithmic_features.W}" + -B '${options_type.algorithmic_features.B}' + --genotyping-max-banddepth '${options_type.algorithmic_features.genotyping_max_banddepth}' + -W '${options_type.algorithmic_features.W}' ${options_type.algorithmic_features.N} - - #if str( $options_type.algorithmic_features.genotype_variant_threshold.genotype_variant_threshold_selector ) == "True": - -S "${options_type.algorithmic_features.genotype_variant_threshold.S}" + + #if str( $options_type.algorithmic_features.genotype_variant_threshold.genotype_variant_threshold_selector ) == "set": + -S '${options_type.algorithmic_features.genotype_variant_threshold.S}' #end if - + ${options_type.algorithmic_features.j} ${options_type.algorithmic_features.H} - -D "${options_type.algorithmic_features.D}" + -D '${options_type.algorithmic_features.D}' ${options_type.algorithmic_features.genotype_qualities} #end if #end if - - </command> - - <inputs> - <conditional name="reference_source"> - <param name="reference_source_selector" type="select" label="Load reference genome from"> - <option value="cached">Local cache</option> - <option value="history">History</option> - </param> - <when value="cached"> - <repeat name="input_bams" title="Sample BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> - </param> - </repeat> - - <param name="ref_file" type="select" label="Using reference genome"> - <options from_data_table="fasta_indexes"></options> - <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> - </param> - </when> - <when value="history"> <!-- FIX ME!!!! --> - <repeat name="input_bams" title="Sample BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file" /> - </repeat> - <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> - </when> - </conditional> - - <conditional name="target_limit_type"> - <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options"> - <option value="do_not_limit" selected="True">Do not limit</option> - <option value="limit_by_target_file">Limit by target file</option> - <option value="limit_by_region">Limit to region</option> - </param> - <when value="do_not_limit"> - <!-- Do nothing here --> - </when> - <when value="limit_by_target_file"> - <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." help="-t --targets"/> - </when> - <when value="limit_by_region"> - <param name="region_chromosome" type="text" label="Region Chromosome" value="" help="-r --region"/> <!--only once? --> - <param name="region_start" type="integer" label="Region Start" value="" /> - <param name="region_end" type="integer" label="Region End" value="" /> - </when> - </conditional> - - <conditional name="options_type"> - <param name="options_type_selector" type="select" label="Choose parameter selection level" help="Select how much control over the freebayes run you need" > - <option value="simple" selected="True">1:Simple diploid calling</option> - <option value="simple_w_filters">2:Simple diploid calling with filtering and coverage</option> - <option value="naive">3:Frequency-based pooled calling</option> - <option value="naive_w_filters">4:Frequency-based pooled calling with filtering and coverage</option> - <option value="full">5:Complete list of all options</option> - <!-- We will not alloow command line text boxes at this time - <option value="cline">6:Input parameters on the command line</option> - --> - </param> - <when value="full"> - - <conditional name="optional_inputs"> - <param name="optional_inputs_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Do you want to provide additional inputs?" help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --varinat-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates" /> - <when value="set"> - <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" help="--failed-alleles" /> - <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" help="--trace"/> - <param name="samples" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" help="-s --samples; default=By default FreeBayes will analyze all samples in its input BAM files"/> - <param name="populations" type="data" format="txt" label="Populations File" optional="True" help="--populations; default=False. Each line of FILE should list a sample and a population which it is part of. The population-based bayesian inference model will then be partitioned on the basis of the populations" /> - <param name="A" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" help="-A --cnv-map; default=copy number is set to as specified by --ploidy. Read a copy number map from the BED file FILE, which has the format: reference sequence, start, end, sample name, copy number ... for each region in each sample which does not have the default copy number as set by --ploidy."/> - <conditional name="input_variant_type"> - <param name="input_variant_type_selector" type="select" label="Provide variants file"> - <option value="do_not_provide" selected="True">Do not provide</option> - <option value="provide_vcf">Provide VCF file</option> - </param> - <when value="do_not_provide"> - <!-- Do nothing here --> - </when> - <when value="provide_vcf"> - <param name="input_variant_vcf" type="data" format="vcf_bgzip" label="Use variants reported in VCF file as input to the algorithm"> - <conversion name="Tabixized_input" type="tabix" /> - </param> - <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" /> - </when> - </conditional> - <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" help="--haplotype-basis-alleles" /> - <param name="report_monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="False" label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes." help="--report-monomorphic " /> - <param name="observation_bias" optional="True" type="data" format="tabular" label="Load read length-dependent allele observation biases from" help="--observation-bias; The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" /> - <param name="contamination_estimates" optional="True" type="data" format="tabular" label="Upload per-sample estimates of contamination from" help="--contamination-estimates; The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates." /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - - <!-- reporting --> - - <conditional name="reporting"> - <param name="reporting_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set reporting option?" help="Sets -P --pvar option" /> - <when value="set"> - <param name="pvar" type="float" value="0.0" label="Report sites if the probability that there is a polymorphism at the site is greater than" help="-P --pvar; default=0.0. Note that post-filtering is generally recommended over the use of this parameter. " /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - - <!-- population model --> - - <conditional name="population_model"> - <param name="population_model_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set population model?" help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options " /> - <when value="set"> - <param name="T" type="float" value="0.001" label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis" help="-T --theta; default = 0.001. This serves as the single parameter to the Ewens Sampling Formula prior model." /> - <param name="P" type="integer" value="2" label="Set ploidy for the analysis" help="-p --ploidy; default=2" /> - <param name="J" type="boolean" truevalue="-J" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="-J --pooled-discrete; default=False. Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy." /> - <param name="K" type="boolean" truevalue="-K" falsevalue="" checked="False" label="Output all alleles which pass input filters, regardles of genotyping outcome or model" help="-K, --poled-continuous; default=False. " /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> + + "; + done > freebayes_commands.sh && + cat freebayes_commands.sh | parallel --no-notice -j \${GALAXY_SLOTS:-1} && + + ## make VCF header + + grep "^#" "./vcf_output/part_\$i.vcf" > header.txt && + + for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`; + do + ## if this fails then it bails out the script + cat "./vcf_output/part_\$i.vcf" | grep -v "^#" || true + ; + done | sort -k1,1 -k2,2n -k5,5 -u | cat header.txt - > "${output_vcf}" - <!-- reference allele --> + #if str( $options_type.options_type_selector ) == "full": + #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set': + #if $options_type.optional_inputs.output_failed_alleles_option: + && + for i in `cat regions.bed | awk '{print $1":"$2".."$3}'`; + do + cat "./failed_alleles/part_\$i.bed" + ; + done > '${output_failed_alleles_bed}' + #end if - <conditional name="reference_allele"> - <param name="reference_allele_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Use reference allele?" help="Sets --use-reference-allele and --reference-quality options " /> - <when value="set"> - <param name="Z" type="boolean" truevalue="-Z" falsevalue="" checked="False" label="Include the reference allele in the analysis as if it is another sample from the same population" help="-Z --use-reference-allele; default=False" /> - <param name="reference_quality" type="text" value="100,60" label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" help="--reference-quality; default=100,60 " /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - - <!-- allelic scope --> + #if $options_type.optional_inputs.output_trace_option: + && + for i in `cat regions.bed | awk '{print $1":"$2".."$3}'`; + do + cat './trace/part_\$i.txt' + ; + done > '${output_trace}' + #end if + #end if + #end if +]]> + </command> - <conditional name="allele_scope"> - <param name="allele_scope_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set allelic scope?" help="Sets -I, i, -X, -u, -n, --haplotype-length, --min-repeat-size, --min-repeat-entropy, and --no-partial-observations options " /> - <when value="set"> - <param name="I" type="boolean" truevalue="-I" falsevalue="" checked="False" label="Ignore SNP alleles" help="-I --no-snps; default=False" /> - <param name="i" type="boolean" truevalue="-i" falsevalue="" checked="False" label="Ignore indels alleles" help="-i --no-indels; default=False" /> - <param name="X" type="boolean" truevalue="-X" falsevalue="" checked="False" label="Ignore multi-nucleotide polymorphisms, MNPs" help="-X --no-mnps; default=False" /> - <param name="u" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)." help="-u --no-complex; default=False" /> - <param name="n" type="integer" value="0" label="How many best SNP alleles to evaluate" help="-n --use-best-n-alleles; default=0 (all). Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" /> - <param name="haplotype_length" type="integer" value="3" label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" help="-E --max-complex-gap --haplotype-length; default=3." /> - <param name="min_repeat_length" type="integer" value="5" label="When assembling observations across repeats, require the total repeat length at least this many bp" help="--min-repeat-size; default=5." /> - <param name="min_repeat_entropy" type="integer" value="0" label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" help="--min-repeat-entropy; default=0 (off)." /> - <param name="no_partial_observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="False" label="Exclude observations which do not fully span the dynamically-determined detection window" help="--no-partial-observations; default=use all observations, dividing partial support across matching haplotypes when generating haplotypes. " /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - - <!-- indel realignment --> - - <param name="O" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Turn off left-alignment of indels?" help="-O --dont-left-align-indels; default=False (do left align). " /> - - <!-- input filters --> - - <conditional name="input_filters"> - <param name="input_filters_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set input filters?" help="Sets -4, -m, -q, -R, -Y, -Q, -U, -z, -$, -e, -0, -F, -C, -3, -G, and -! options " /> - <when value="set"> - <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis." help="-4 --use-duplicate-reads; default=False (exclude duplicates marked as such in alignments)." /> - <param name="m" type="integer" value="1" label="Exclude alignments from analysis if they have a mapping quality less than" help="-m --min-mapping-quality; default=1" /> - <param name="q" type="integer" value="0" label="Exclude alleles from analysis if their supporting base quality less than" help="-q --min-base-quality; default=0" /> - <param name="R" type="integer" value="0" label="Consider any allele in which the sum of qualities of supporting observations is at least" help="-R --min-supporting-allele-qsum; default=0" /> - <param name="Y" type="integer" value="0" label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" help="-Y --min-supporting-mapping-qsum; default=0" /> - <conditional name="mismatch_filters"> - <param name="mismatch_filters_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Perform mismatch filtering?" help="Sets -Q, -U, -z, and $ options" /> - <when value="set"> - <param name="Q" type="integer" value="10" label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" help="-Q --mismatch-base-quality-threshold; default=10" /> - <param name="U" type="integer" value="1000" optional="True" label="Exclude reads with more than N mismatches where each mismatch has base quality >= Q (option above)" help="-U --read-mismatch-limit; default=~unbound" /> - <param name="z" type="float" value="1.0" min="0.0" max="1.0" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= Q (second option above)" help="-z --read-max-mismatch-fraction; default=1.0" /> - <param name="read_snp_limit" type="integer" value="1000" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= Q (third option abobe)" help="-$amp; --read-snp-limit N " /> + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Load reference genome from"> + <option value="cached">Local cache</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="input_bams" type="data" format="bam" multiple="True" label="BAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </param> + <param name="ref_file" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes"></options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> </when> - <when value="do_not_set"> - <!-- do nothing --> + <when value="history"> <!-- FIX ME!!!! --> + <param name="input_bams" type="data" format="bam" multiple="True" label="BAM file" /> + <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" + help="You can upload a FASTA sequence to the history and use it as reference" /> + </when> + </conditional> + <conditional name="target_limit_type"> + <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options"> + <option value="do_not_limit" selected="True">Do not limit</option> + <option value="limit_by_target_file">Limit by target file</option> + <option value="limit_by_region">Limit to region</option> + </param> + <when value="do_not_limit"> + <!-- Do nothing here --> + </when> + <when value="limit_by_target_file"> + <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." help="-t --targets"/> + </when> + <when value="limit_by_region"> + <param name="region_chromosome" type="text" label="Region Chromosome" value="" help="-r --region"/> <!--only once? --> + <param name="region_start" type="integer" label="Region Start" value="" /> + <param name="region_end" type="integer" label="Region End" value="" /> </when> - </conditional> - <param name="e" type="integer" value="1000" label="Exclude reads with more than this number of separate gaps" help="-e --read-snp-limit; default=~unbounded" /> - <param name="standard_filters" type="boolean" truevalue="-0" falsevalue="" checked="False" label="Use stringent input base and mapping quality filters" help="-0 --standard-filters; default=False. Equivalent to -m 30 -q 20 -R 0 -S 0" /> - <param name="F" type="float" value="0.2" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" help="-F --min-alternate-fraction; default=0.2" /> - <param name="C" type="integer" value="2" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" help="-C --min-alternate-count; default=2" /> - <param name="min_alternate_qsum" type="integer" value="0" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" help="-3 --min-alternate-qsum; default=0" /> - <param name="G" type="integer" value="1" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" help="-G --min-alternate-total N; default=1" /> - <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - - <!-- population and mappability priors --> - - <conditional name="population_mappability_priors"> - <param name="population_mappability_priors_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Set population and mappability priors?" help="Sets -k, -w, -V, and -a options " /> - <when value="set"> - <param name="k" type="boolean" truevalue="-k" falsevalue="" checked="False" label="No population priors" help="-k --no-population-priors; default=False. Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors." /> - <param name="w" type="boolean" truevalue="-w" falsevalue="" checked="False" label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" help="-w --hwe-priors-off; default=False" /> - <param name="V" type="boolean" truevalue="-V" falsevalue="" checked="False" label="Disable incorporation of prior expectations about observations" help="-V --binomial-obs-priors-off; default=False. Uses read placement probability, strand balance probability, and read position (5''-3'') probability." /> - <param name="a" type="boolean" truevalue="-a" falsevalue="" checked="False" label="isable use of aggregate probability of observation balance between alleles as a component of the priors" help="-a --allele-balance-priors-off; default=False " /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - - <!-- genotype likelihoods --> - - <conditional name="genotype_likelihoods"> - <param name="genotype_likelihoods_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Tweak genotype likelihoods?" help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options. " /> - <when value="set"> - <param name="base_quality_cap" type="integer" value="0" label="Limit estimated observation quality by capping base quality at" help="--base-quality-cap" /> - <param name="experimental_gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="False" label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual" help="--experimental-gls; Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples." /> - <param name="prob_contamination" type="float" value="10e-9" label="An estimate of contamination to use for all samples. " help="--prob-contamination; default=10e-9." /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - - <!-- algorithmic features --> - - <conditional name="algorithmic_features"> - <param name="algorithmic_features_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Tweak algorithmic features?" help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options " /> - <when value="set"> - <param name="report_genotype_likelihood_max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="False" label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods." help="--report-genotype-likelihood-max; default=False" /> - <param name="B" type="integer" value="1000" label="Iterate no more than N times during genotyping step" help="-B --genotyping-max-iterations; default=1000." /> - <param name="genotyping_max_banddepth" type="integer" value="6" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" help="--genotyping-max-banddepth; default=6" /> - <param name="W" type="text" value="1,3" label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" help="-W --posterior-integration-limits; default=1,3" /> - <param name="N" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" help="-N --exclude-unobserved-genotypes; default=False" /> - <conditional name="genotype_variant_threshold"> - <param name="genotype_variant_threshold_selector" type="boolean" truevalue="set" falsevalue="do_not_set" label="Do you want to to limit posterior integration" help="-S --genotype-variant-threshold" /> - <when value="do_not_set"> - <!-- do nothing --> - </when> - <when value="set"> - <param name="S" value="" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample." help="-S --genotype-variant-threshold; default=~unbounded" /> - </when> </conditional> - <param name="j" type="boolean" truevalue="-j" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" help="-j --use-mapping-quality; default=False" /> - <param name="H" type="boolean" truevalue="-H" falsevalue="" checked="False" label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel" help="-H --harmonic-indel-quality; default=use a minimum Base Quality in flanking sequence." /> - <param name="D" type="float" value="0.9" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" help="-D --read-dependence-factor; default=0.9." /> - <param name="genotype_qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="False" label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" help="-= --genotype-qualities; default=False " /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - </when> - <when value="simple"> - <!-- do nothing --> - </when> - <when value="simple_w_filters"> - <!-- add standard-filters to command line --> - <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " /> - </when> - <when value="naive"> - <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic --> - </when> - <when value="naive_w_filters"> - <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic standard-filters--> - <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " /> - </when> -</conditional> - - </inputs> - - <outputs> - <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> - <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)"> - <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] is True and options_type['optional_inputs']['output_failed_alleles_option'] is True</filter> - </data> - <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)"> - <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] is True and options_type['optional_inputs']['output_trace_option'] is True</filter> - </data> - </outputs> - <tests> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> - <param name="input_bam" ftype="bam" value="freebayes-phix174.bam"/> - <param name="options_type_selector" value="simple"/> - <output name="output_vcf" file="freebayes-phix174-test1.vcf" compare="contains"/> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> - <param name="input_bam" ftype="bam" value="freebayes-phix174.bam"/> - <param name="options_type_selector" value="naive_w_filters"/> - <param name="min_coverage" value="14"/> - <output name="output_vcf" file="freebayes-phix174-test2.vcf" compare="contains"/> - </test> - </tests> - <stdio> - <exit_code range="1:" /> - </stdio> - <help> + <conditional name="options_type"> + <param name="options_type_selector" type="select" label="Choose parameter selection level" help="Select how much control over the freebayes run you need" > + <option value="simple" selected="True">1:Simple diploid calling</option> + <option value="simple_w_filters">2:Simple diploid calling with filtering and coverage</option> + <option value="naive">3:Frequency-based pooled calling</option> + <option value="naive_w_filters">4:Frequency-based pooled calling with filtering and coverage</option> + <option value="full">5:Complete list of all options</option> + <!-- We will not alloow command line text boxes at this time + <option value="cline">6:Input parameters on the command line</option> + --> + </param> + <when value="full"> + <conditional name="optional_inputs"> + <param name="optional_inputs_selector" type="select" label="Additional inputs" + help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --varinat-input, --only-use-input-alleles, --haplotype-basis-alleles, + --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates"> + <option value="do_not_set" selected="true">Do not provide additional inputs</option> + <option value="set">Provide additional inputs</option> + </param> + <when value="set"> + <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" + label="Write out failed alleles file" help="--failed-alleles" /> + <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" + label="Write out algorithm trace file" help="--trace"/> + <param name="samples" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" + help="-s --samples; default=By default FreeBayes will analyze all samples in its input BAM files"/> + <param name="populations" type="data" format="txt" label="Populations File" optional="True" + help="--populations; default=False. Each line of FILE should list a sample and a population which it is part of. + The population-based bayesian inference model will then be partitioned on the basis of the populations" /> + <param name="A" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" + help="-A --cnv-map; default=copy number is set to as specified by --ploidy. Read a copy number map from the BED file FILE, which has the format: + reference sequence, start, end, sample name, copy number ... for each region in each sample which does not have the default copy number as set by --ploidy."/> + <conditional name="input_variant_type"> + <param name="input_variant_type_selector" type="select" label="Provide variants file"> + <option value="do_not_provide" selected="True">Do not provide</option> + <option value="provide_vcf">Provide VCF file</option> + </param> + <when value="do_not_provide"> + <!-- Do nothing here --> + </when> + <when value="provide_vcf"> + <param name="input_variant_vcf" type="data" format="vcf_bgzip" label="Use variants reported in VCF file as input to the algorithm"> + <conversion name="Tabixized_input" type="tabix" /> + </param> + <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" /> + </when> + </conditional> + <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" + help="--haplotype-basis-alleles" /> + <param name="report_monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="False" + label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes." + help="--report-monomorphic " /> + <param name="observation_bias" optional="True" type="data" format="tabular" label="Load read length-dependent allele observation biases from" + help="--observation-bias; The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" /> + <param name="contamination_estimates" optional="True" type="data" format="tabular" label="Upload per-sample estimates of contamination from" + help="--contamination-estimates; The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates." /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <!-- reporting --> + <conditional name="reporting"> + <param name="reporting_selector" type="select" label="Reporting options" help="Sets -P --pvar option"> + <option value="do_not_set" selected="True">Use defaults</option> + <option value="set">Set reporting options</option> + </param> + <when value="set"> + <param name="pvar" type="float" value="0.0" label="Report sites if the probability that there is a polymorphism at the site is greater than" + help="-P --pvar; default=0.0. Note that post-filtering is generally recommended over the use of this parameter. " /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <!-- population model --> + <conditional name="population_model"> + <param name="population_model_selector" type="select" label="Population model options" + help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options " > + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set population model options</option> + </param> + <when value="set"> + <param name="T" type="float" value="0.001" label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis" + help="-T --theta; default = 0.001. This serves as the single parameter to the Ewens Sampling Formula prior model." /> + <param name="P" type="integer" value="2" label="Set ploidy for the analysis" help="-p --ploidy; default=2" /> + <param name="J" type="boolean" truevalue="-J" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" + help="-J --pooled-discrete; default=False. Model pooled samples using discrete genotypes across pools. + When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy." /> + <param name="K" type="boolean" truevalue="-K" falsevalue="" checked="False" label="Output all alleles which pass input filters, regardles of genotyping outcome or model" + help="-K, --poled-continuous; default=False." /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <!-- reference allele --> + <conditional name="reference_allele"> + <param name="reference_allele_selector" type="select" label="Reference allele options" + help="Sets --use-reference-allele and --reference-quality options."> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set reference allele options</option> + </param> + <when value="set"> + <param name="Z" type="boolean" truevalue="-Z" falsevalue="" checked="False" label="Include the reference allele in the analysis as if it is another sample from the same population" + help="-Z --use-reference-allele; default=False" /> + <param name="reference_quality" type="text" value="100,60" label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" + help="--reference-quality; default=100,60 " /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <!-- allelic scope --> + <conditional name="allele_scope"> + <param name="allele_scope_selector" type="select" label="Allelic scope options" + help="Sets -I, i, -X, -u, -n, --haplotype-length, --min-repeat-size, --min-repeat-entropy, and --no-partial-observations options."> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set alleic scope options</option> + </param> + <when value="set"> + <param name="I" type="boolean" truevalue="-I" falsevalue="" checked="False" label="Ignore SNP alleles" help="-I --no-snps; default=False" /> + <param name="i" type="boolean" truevalue="-i" falsevalue="" checked="False" label="Ignore indels alleles" help="-i --no-indels; default=False" /> + <param name="X" type="boolean" truevalue="-X" falsevalue="" checked="False" label="Ignore multi-nucleotide polymorphisms, MNPs" help="-X --no-mnps; default=False" /> + <param name="u" type="boolean" truevalue="-u" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)." + help="-u --no-complex; default=False" /> + <param name="n" type="integer" value="0" label="How many best SNP alleles to evaluate" + help="-n --use-best-n-alleles; default=0 (all). Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" /> + <param name="haplotype_length" type="integer" value="3" label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" + help="-E --max-complex-gap --haplotype-length; default=3." /> + <param name="min_repeat_length" type="integer" value="5" label="When assembling observations across repeats, require the total repeat length at least this many bp" + help="--min-repeat-size; default=5." /> + <param name="min_repeat_entropy" type="integer" value="0" label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" + help="--min-repeat-entropy; default=0 (off)." /> + <param name="no_partial_observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="False" + label="Exclude observations which do not fully span the dynamically-determined detection window" + help="--no-partial-observations; default=use all observations, dividing partial support across matching haplotypes when generating haplotypes." /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <!-- indel realignment --> + <param name="O" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Turn off left-alignment of indels?" + help="-O --dont-left-align-indels; default=False (do left align)." /> + <!-- input filters --> + <conditional name="input_filters"> + <param name="input_filters_selector" type="select" label="Input filters" + help="Sets -4, -m, -q, -R, -Y, -Q, -U, -z, -$, -e, -0, -F, -C, -3, -G, and -! options."> + <option value="do_not_set" selected="true">No input filters (default)</option> + <option value="set">Set input filters</option> + </param> + <when value="set"> + <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" + label="Include duplicate-marked alignments in the analysis." + help="-4 --use-duplicate-reads; default=False (exclude duplicates marked as such in alignments)." /> + <param name="m" type="integer" value="1" label="Exclude alignments from analysis if they have a mapping quality less than" + help="-m --min-mapping-quality; default=1" /> + <param name="q" type="integer" value="0" label="Exclude alleles from analysis if their supporting base quality less than" + help="-q --min-base-quality; default=0" /> + <param name="R" type="integer" value="0" label="Consider any allele in which the sum of qualities of supporting observations is at least" + help="-R --min-supporting-allele-qsum; default=0" /> + <param name="Y" type="integer" value="0" label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" + help="-Y --min-supporting-mapping-qsum; default=0" /> + <conditional name="mismatch_filters"> + <param name="mismatch_filters_selector" type="select" label="Mismatch filters" + help="Sets -Q, -U, -z, and $ options"> + <option value="do_not_set" selected="true">No mismatch filters (default)</option> + <option value="set">Set mismatch filters</option> + </param> + <when value="set"> + <param name="Q" type="integer" value="10" label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" + help="-Q --mismatch-base-quality-threshold; default=10" /> + <param name="U" type="integer" value="1000" optional="True" label="Exclude reads with more than N mismatches where each mismatch has base quality >= Q (option above)" + help="-U --read-mismatch-limit; default=~unbound" /> + <param name="z" type="float" value="1.0" min="0.0" max="1.0" + label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= Q (second option above)" + help="-z --read-max-mismatch-fraction; default=1.0" /> + <param name="read_snp_limit" type="integer" + value="1000" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= Q (third option abobe)" + help="-$amp; --read-snp-limit N " /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <param name="e" type="integer" value="1000" label="Exclude reads with more than this number of separate gaps" + help="-e --read-snp-limit; default=~unbounded" /> + <param name="standard_filters" type="boolean" truevalue="-0" falsevalue="" checked="False" label="Use stringent input base and mapping quality filters" + help="-0 --standard-filters; default=False. Equivalent to -m 30 -q 20 -R 0 -S 0" /> + <param name="F" type="float" value="0.2" + label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" + help="-F --min-alternate-fraction; default=0.2" /> + <param name="C" type="integer" value="2" + label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" + help="-C --min-alternate-count; default=2" /> + <param name="min_alternate_qsum" type="integer" value="0" + label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" + help="-3 --min-alternate-qsum; default=0" /> + <param name="G" type="integer" value="1" + label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" + help="-G --min-alternate-total N; default=1" /> + <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" + help="-! --min-coverage; default=0 " /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <!-- population and mappability priors --> + <conditional name="population_mappability_priors"> + <param name="population_mappability_priors_selector" type="select" label="Population and mappability priors" + help="Sets -k, -w, -V, and -a options."> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set population and mappability priors</option> + </param> + <when value="set"> + <param name="k" type="boolean" truevalue="-k" falsevalue="" checked="False" label="No population priors" + help="-k --no-population-priors; default=False. Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors." /> + <param name="w" type="boolean" truevalue="-w" falsevalue="" checked="False" + label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" + help="-w --hwe-priors-off; default=False" /> + <param name="V" type="boolean" truevalue="-V" falsevalue="" checked="False" label="Disable incorporation of prior expectations about observations" + help="-V --binomial-obs-priors-off; default=False. Uses read placement probability, strand balance probability, and read position (5''-3'') probability." /> + <param name="a" type="boolean" truevalue="-a" falsevalue="" checked="False" + label="isable use of aggregate probability of observation balance between alleles as a component of the priors" + help="-a --allele-balance-priors-off; default=False " /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <!-- genotype likelihoods --> + <conditional name="genotype_likelihoods"> + <param name="genotype_likelihoods_selector" type="select" label="Genotype likelihood options" + help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options."> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set genotype likelihood options</option> + </param> + <when value="set"> + <param name="base_quality_cap" type="integer" value="0" label="Limit estimated observation quality by capping base quality at" help="--base-quality-cap" /> + <param name="experimental_gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="False" + label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual" + help="--experimental-gls; Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples." /> + <param name="prob_contamination" type="float" value="10e-9" label="An estimate of contamination to use for all samples" + help="--prob-contamination; default=10e-9." /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + <!-- algorithmic features --> + <conditional name="algorithmic_features"> + <param name="algorithmic_features_selector" type="select" label="Algorithmic features" + help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options"> + <option value="do_not_set" selected="true">Use defaults</option> + <option value="set">Set algorithmic features</option> + </param> + <when value="set"> + <param name="report_genotype_likelihood_max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="False" + label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods." + help="--report-genotype-likelihood-max; default=False" /> + <param name="B" type="integer" value="1000" label="Iterate no more than N times during genotyping step" + help="-B --genotyping-max-iterations; default=1000." /> + <param name="genotyping_max_banddepth" type="integer" value="6" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" + help="--genotyping-max-banddepth; default=6" /> + <param name="W" type="text" value="1,3" + label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" + help="-W --posterior-integration-limits; default=1,3" /> + <param name="N" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" + label="Skip sample genotypings for which the sample has no supporting reads" + help="-N --exclude-unobserved-genotypes; default=False" /> + <conditional name="genotype_variant_threshold"> + <param name="genotype_variant_threshold_selector" type="select" + label="Limit posterior integration" + help="-S --genotype-variant-threshold"> + <option value="do_not_set" selected="true">Do not limit posterior integration</option> + <option value="set">Set posterior integration limit</option> + </param> + <when value="do_not_set"> + <!-- do nothing --> + </when> + <when value="set"> + <param name="S" value="" type="integer" + label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample." + help="-S --genotype-variant-threshold; default=~unbounded" /> + </when> + </conditional> + <param name="j" type="boolean" truevalue="-j" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" + help="-j --use-mapping-quality; default=False" /> + <param name="H" type="boolean" truevalue="-H" falsevalue="" checked="False" + label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel" + help="-H --harmonic-indel-quality; default=use a minimum Base Quality in flanking sequence." /> + <param name="D" type="float" value="0.9" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" + help="-D --read-dependence-factor; default=0.9." /> + <param name="genotype_qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="False" + label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" + help="-= --genotype-qualities; default=False " /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + </when> + <when value="simple"> + <!-- do nothing --> + </when> + <when value="simple_w_filters"> + <!-- add standard-filters to command line --> + <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " /> + </when> + <when value="naive"> + <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic --> + </when> + <when value="naive_w_filters"> + <!-- do nothing build command line using haplotype-length 0 min-alternate-count 1 min-alternate-fraction 0 pooled-continuous report-monomorphic standard-filters--> + <param name="min_coverage" type="integer" value="0" label="Require at least this coverage to process a site" help="-! --min-coverage; default=0 " /> + </when> + </conditional> + </inputs> + <outputs> + <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> + <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)"> + <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] == 'set' and options_type['optional_inputs']['output_failed_alleles_option'] is True</filter> + </data> + <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)"> + <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] == 'set' and options_type['optional_inputs']['output_trace_option'] is True</filter> + </data> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> + <param name="options_type_selector" value="simple"/> + <output name="output_vcf" file="freebayes-phix174-test1.vcf" compare="contains"/> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> + <param name="options_type_selector" value="naive_w_filters"/> + <param name="min_coverage" value="14"/> + <output name="output_vcf" file="freebayes-phix174-test2.vcf" compare="contains"/> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> + <param name="options_type_selector" value="naive_w_filters"/> + <param name="min_coverage" value="14"/> + <output name="output_vcf" file="freebayes-phix174-test3.vcf" compare="contains"/> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/> + <param name="options_type_selector" value="full"/> + <param name="population_model_selector" value="set"/> + <param name="P" value="1"/> + <output name="output_vcf" file="freebayes-phix174-test4.vcf" compare="contains"/> + </test> + </tests> + <help> **What it does** FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and complex events (composite insertion and substitution events) smaller than the length of a short-read sequencing alignment. See https://github.com/ekg/freebayes for details on FreeBayes. -This Galaxy instance of FreeBayes corresponds to release 0.9.20 - ------ **Description** @@ -813,16 +950,16 @@ For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing <http://arxiv.org/abs/1207.3907>`_. The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko. +TNG was developed by Bjoern Gruening - </help> - - <citations> - <citation type="bibtex">@misc{1207.3907, + </help> + <citations> + <citation type="bibtex">@misc{1207.3907, Author = {Erik Garrison}, Title = {Haplotype-based variant detection from short-read sequencing}, Year = {2012}, Eprint = {arXiv:1207.3907}, url = {http://arxiv.org/abs/1207.3907}, }</citation> - </citations> + </citations> </tool>