view freebayes.xml @ 36:3e954e7125bf draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freebayes commit fe6bd492e3188ec78ef4bc5111c5da40eae05b0a
author iuc
date Mon, 14 Oct 2024 09:02:00 +0000
parents a7c9782130e8
children
line wrap: on
line source

<tool id="freebayes" name="FreeBayes" version="@TOOL_VERSION@+galaxy0">
    <description>bayesian genetic variant detector</description>
    <xrefs>
        <xref type="bio.tools">freebayes</xref>
    </xrefs>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="5.3.1">gawk</requirement>
        <requirement type="package" version="20240922">parallel</requirement>
    </expand>
    <expand macro="version_command" />
    <command detect_errors="exit_code"><![CDATA[
    ##set up input files

    #set $reference_fasta_filename = "localref.fa"

    #if str( $reference_source.reference_source_selector ) == "history":
        ln -s -f '${reference_source.ref_file}' '${reference_fasta_filename}' &&
        samtools faidx '${reference_fasta_filename}' 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 &&
    #else:
        #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
    #end if

    #if $reference_source.batchmode.processmode == 'merge':
        #set $input_bamfiles = $reference_source.batchmode.input_bams
    #else:
        #set $input_bamfiles = [ $reference_source.batchmode.input_bams ]
    #end if

    #for $bam_count, $input_bam in enumerate( $input_bamfiles ):
        #if $input_bam.ext == 'bam':
            ln -s -f '${input_bam}' 'b_${bam_count}.bam' &&
            ln -s -f '${input_bam.metadata.bam_index}' 'b_${bam_count}.bam.bai' &&
        #elif $input_bam.ext == 'cram':
            ln -s -f '${input_bam}' 'b_${bam_count}.cram' &&
            ln -s -f '${input_bam.metadata.cram_index}' 'b_${bam_count}.cram.crai' &&
        #end if
    #end for

    ## Tabixize optional input_variant_vcf file (for --variant-input option)
    #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and str( $options_type.optional_inputs.optional_inputs_selector ) == 'set' and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
        ln -s -f '${options_type.optional_inputs.input_variant_type.input_variant_vcf}' input_variant_vcf.vcf.gz &&
        ln -s -f '${Tabixized_input}' input_variant_vcf.vcf.gz.tbi &&
    #end if

    ##if the user has specified a region or target file, just use that instead of calculating a set of unique regions
    #if str( $target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
        ln -s '${target_limit_type.input_target_bed}' regions_all.bed &&
    #elif str( $target_limit_type.target_limit_type_selector ) == "limit_by_region":
        printf '${target_limit_type.region_chromosome}\t${target_limit_type.region_start}\t${target_limit_type.region_end}' > regions_all.bed &&
    #else
        ##divide up the regions in the bam file for efficient processing
        #for $bam_count, $input_bam in enumerate( $input_bamfiles ):
            samtools view -H b_${bam_count}.${input_bam.ext}|
            grep '^@SQ' |
            cut -f 2- |
            awk '{ gsub("^SN:","",$1); gsub("^LN:","",$2); print $1"\t0\t"$2; }' >> regions_all.bed &&
        #end for
    #end if

    sort -u regions_all.bed > regions_uniq.bed &&
    ## split into even small chunks, this has some disatvantages and will not be used for the moment
    ## bedtools makewindows -b regions_uniq.bed -w 10000000 -s 9990000 > regions.bed &&

    mkdir vcf_output failed_alleles trace &&

    ## Finished setting up inputs

    for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`;
    do
        echo "

        ## COMMAND LINE STARTS HERE

        freebayes

        --region '\$i'

        #for $bam_count, $input_bam in enumerate( $input_bamfiles ):
            --bam 'b_${bam_count}.${input_bam.ext}'
        #end for
        --fasta-reference '${reference_fasta_filename}'

        ## Outputs
        --vcf './vcf_output/part_\$i.vcf'

        ## Coverage
        #if str($coverage_options.coverage_options_selector) == "set":
            @COVERAGE@
        #end if

        ##advanced options
        #if str( $options_type.options_type_selector ) == "simple":
            #pass
        #elif str( $options_type.options_type_selector ) == "simple_w_filters":
            --standard-filters
        #elif str( $options_type.options_type_selector ) == "naive":
            --haplotype-length 0
            --min-alternate-count 1
            --min-alternate-fraction 0.05
            --pooled-continuous
            --report-monomorphic
        #elif str( $options_type.options_type_selector ) == "naive_w_filters":
            --haplotype-length 0
            --min-alternate-count 1
            --min-alternate-fraction 0.05
            --pooled-continuous
            --report-monomorphic
            --standard-filters
        #elif str( $options_type.options_type_selector ) == "full":
            #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set':
                ${options_type.optional_inputs.report_monomorphic}

                #if $options_type.optional_inputs.output_trace_option:
                    --trace ./trace/part_'\$i'.txt
                #end if
                #if $options_type.optional_inputs.output_failed_alleles_option:
                    --failed-alleles ./failed_alleles/part_'\$i'.bed
                #end if
                #if $options_type.optional_inputs.samples:
                    --samples '${options_type.optional_inputs.samples}'
                #end if
                #if $options_type.optional_inputs.populations:
                    --populations '${options_type.optional_inputs.populations}'
                #end if
                #if $options_type.optional_inputs.A:
                    --cnv-map '${options_type.optional_inputs.A}'
                #end if
                #if str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
                    --variant-input 'input_variant_vcf.vcf.gz'  ## input_variant_vcf.vcf.gz is symlinked to a galaxy-generated dataset in "Tabixize optional input_variant_vcf file" section of the command line above
                    ${options_type.optional_inputs.input_variant_type.only_use_input_alleles}
                #end if
                #if $options_type.optional_inputs.haplotype_basis_alleles:
                    --haplotype-basis-alleles '${options_type.optional_inputs.haplotype_basis_alleles}'
                #end if
                #if $options_type.optional_inputs.observation_bias:
                    --observation-bias '${options_type.optional_inputs.observation_bias}'
                #end if
                #if $options_type.optional_inputs.contamination_estimates:
                    --contamination-estimates '${options_type.optional_inputs.contamination_estimates}'
                #end if
                $options_type.optional_inputs.trim_complex_tail
            #end if

        ## REPORTING
            #if str( $options_type.reporting.reporting_selector ) == "set":
                --pvar ${options_type.reporting.pvar}
            #end if
        ## POPULATION MODEL
            #if str( $options_type.population_model.population_model_selector ) == "set":
                --theta ${options_type.population_model.T}
                --ploidy ${options_type.population_model.P}
                ${options_type.population_model.J}
                ${options_type.population_model.K}
            #end if

        ## REFERENCE ALLELE
            #if str( $options_type.reference_allele.reference_allele_selector ) == "set":
                ${options_type.reference_allele.Z}
                --reference-quality '${options_type.reference_allele.reference_quality}'
            #end if

        ## ALLELE SCOPE
            #if str( $options_type.allele_scope.allele_scope_selector ) == "set":
                ${options_type.allele_scope.I}
                ${options_type.allele_scope.i}
                ${options_type.allele_scope.X}
                ${options_type.allele_scope.u}
                ${options_type.allele_scope.no_partial_observations}

                -n ${options_type.allele_scope.n}

                --haplotype-length ${options_type.allele_scope.haplotype_length}
                --min-repeat-size ${options_type.allele_scope.min_repeat_length}
                --min-repeat-entropy ${options_type.allele_scope.min_repeat_entropy}
            #end if

        ## REALIGNMENT
            ${options_type.O}

        ##INPUT FILTERS
            #if str( $options_type.input_filters.input_filters_selector ) == "set":
                ${options_type.input_filters.standard_filters}
                ${options_type.input_filters.use_duplicate_reads}
                --min-mapping-quality ${options_type.input_filters.min_mapping_quality}
                --min-base-quality ${options_type.input_filters.min_base_quality}
                --min-supporting-allele-qsum ${options_type.input_filters.min_supporting_allele_qsum}
                --min-supporting-mapping-qsum ${options_type.input_filters.min_supporting_mapping_qsum}
                #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "set":
                --mismatch-base-quality-threshold ${options_type.input_filters.mismatch_filters.mismatch_base_quality_threshold}
                  #if str($options_type.input_filters.mismatch_filters.read_mismatch_limit)
                    --read-mismatch-limit ${options_type.input_filters.mismatch_filters.read_mismatch_limit}
                  #end if
                  --read-max-mismatch-fraction ${options_type.input_filters.mismatch_filters.read_max_mismatch_fraction}
                  --read-snp-limit ${options_type.input_filters.mismatch_filters.read_snp_limit}
                #end if
                --read-indel-limit ${options_type.input_filters.read_indel_limit}
                --min-alternate-fraction ${options_type.input_filters.min_alternate_fraction}
                --min-alternate-qsum ${options_type.input_filters.min_alternate_qsum}
                --min-alternate-count ${options_type.input_filters.min_alternate_count}
                --min-alternate-total ${options_type.input_filters.min_alternate_total}
            #end if

        ## POPULATION AND MAPPABILITY PRIORS
            #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "set":
                ${options_type.population_mappability_priors.k}
                ${options_type.population_mappability_priors.w}
                ${options_type.population_mappability_priors.V}
                ${options_type.population_mappability_priors.a}
            #end if

        ## GENOTYPE LIKELIHOODS
            #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "set":
              ${$options_type.genotype_likelihoods.experimental_gls}

              --base-quality-cap ${$options_type.genotype_likelihoods.base_quality_cap}
              --prob-contamination ${$options_type.genotype_likelihoods.prob_contamination}
            #end if

        ## ALGORITHMIC FEATURES
            #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "set":
                -B '${options_type.algorithmic_features.B}'
                -W '${options_type.algorithmic_features.W}'
                -D '${options_type.algorithmic_features.D}'

                #if str($options_type.algorithmic_features.genotype_variant_threshold)
                    -S ${options_type.algorithmic_features.genotype_variant_threshold}
                #end if

                ${options_type.algorithmic_features.N}
                ${options_type.algorithmic_features.j}
                ${options_type.algorithmic_features.H}
                ${options_type.algorithmic_features.genotype_qualities}
                ${options_type.algorithmic_features.report_genotype_likelihood_max}

                --genotyping-max-banddepth ${options_type.algorithmic_features.genotyping_max_banddepth}
            #end if
        #end if

        ";
    done > freebayes_commands.sh &&

    cat freebayes_commands.sh |
    parallel --will-cite -j \${GALAXY_SLOTS:-1} &&

    ## make VCF header
    grep "^#" "./vcf_output/part_\$i.vcf" > header.txt &&

    for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`;
    do
        ## if this fails then it bails out the script
        cat "./vcf_output/part_\$i.vcf" | grep -v "^#" || true
        ;
    done | sort -k1,1 -k2,2n -k5,5 -u | cat header.txt - > '${output_vcf}'

    #if str( $options_type.options_type_selector ) == "full":
        #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set':
            #if $options_type.optional_inputs.output_failed_alleles_option:
                &&
                for i in `cat regions.bed | awk '{print $1":"$2".."$3}'`;
                do
                    cat "./failed_alleles/part_\$i.bed"
                    ;
                done > '${output_failed_alleles_bed}'
            #end if

            #if $options_type.optional_inputs.output_trace_option:
                &&
                for i in `cat regions.bed | awk '{print $1":"$2".."$3}'`;
                do
                    cat './trace/part_\$i.txt'
                    ;
                done > '${output_trace}'
            #end if
        #end if
    #end if
    ]]></command>

    <inputs>
        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
                <option value="cached">Locally cached</option>
                <option value="history">History</option>
            </param>
            <when value="cached">
                <expand macro="input_bam">
                    <expand macro="validation" />
                </expand>
                <param name="ref_file" type="select" label="Using reference genome">
                  <options from_data_table="fasta_indexes" />
                  <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input dataset"/>
                </param>
            </when>
            <when value="history"> <!-- FIX ME!!!! -->
                <expand macro="input_bam" />
                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence"
                       help="You can upload a FASTA sequence to the history and use it as reference" />
            </when>
        </conditional>
        <conditional name="target_limit_type">
            <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options">
                <option value="do_not_limit" selected="true">Do not limit</option>
                <option value="limit_by_target_file">Limit by target file</option>
                <option value="limit_by_region">Limit to region</option>
            </param>
            <when value="do_not_limit" />
            <when value="limit_by_target_file">
                <param name="input_target_bed" argument="--targets" type="data" format="bed" label="Limit analysis to regions in this BED dataset" />
            </when>
            <when value="limit_by_region">
                <param name="region_chromosome" argument="--region" type="text" label="Region Chromosome" value="" /> <!--only once? -->
                <param name="region_start" type="integer" label="Region Start" value="" />
                <param name="region_end" type="integer" label="Region End" value="" />
            </when>
        </conditional>
        <conditional name="coverage_options">
            <param name="coverage_options_selector" type="select" label="Read coverage"
                   help="Sets --min-coverage, --limit-coverage, and --skip-coverage">
                <option value="do_not_set" selected="true">Use defaults</option>
                <option value="set">Specify coverage options</option>
            </param>
            <when value="set">
                <expand macro="par_min_cov" />
            </when>
            <when value="do_not_set" />
        </conditional>
        <conditional name="options_type">
            <param name="options_type_selector" type="select" label="Choose parameter selection level"
                   help="Select how much control over the freebayes run you need">
                <option value="simple" selected="true">1. Simple diploid calling</option>
                <option value="simple_w_filters">2. Simple diploid calling with filtering and coverage</option>
                <option value="naive">3. Frequency-based pooled calling</option>
                <option value="naive_w_filters">4. Frequency-based pooled calling with filtering and coverage</option>
                <option value="full">5. Full list of options</option>
            </param>
            <when value="full">

                <conditional name="optional_inputs">
                    <param name="optional_inputs_selector" type="select" label="Additional inputs"
                           help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --variant-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, --contamination-estimates and --trim-complex-tail">
                        <option value="do_not_set" selected="true">Do not provide additional inputs</option>
                        <option value="set">Provide additional inputs</option>
                    </param>
                    <when value="set">
                        <param name="output_failed_alleles_option" argument="--failed-alleles" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="false"
                               label="Write out failed alleles file" />
                        <param name="output_trace_option" argument="--trace" type="boolean" truevalue="--trace" falsevalue="" checked="false"
                               label="Write out algorithm trace file" />
                        <param argument="--samples" type="data" format="txt"
                               label="Limit analysis to samples listed (one per line) in this dataset" optional="true"
                               help="By default FreeBayes will analyze all samples in its input BAM datasets" />
                        <param argument="--populations" type="data" format="txt" optional="true"
                               label="Populations dataset"
                               help="Each line of this dataset should list a sample and a population which it is part of. The population-based bayesian inference model will then be partitioned on the basis of the populations" />
                        <param name="A" argument="--cnv-map" type="data" format="bed" optional="true"
                               label="Read a copy number map from a BED dataset"
                               help="The BED dataset should have the format: 'reference sequence, start, end, sample name, copy number' for each region in each sample which does not have the default copy number as set by --ploidy. If not specified, copy number is set to as specified by --ploidy" />
                        <conditional name="input_variant_type">
                            <param name="input_variant_type_selector" type="select" label="Provide variants dataset">
                                <option value="do_not_provide" selected="true">Do not provide</option>
                                <option value="provide_vcf">Provide VCF dataset</option>
                            </param>
                            <when value="do_not_provide" />
                            <when value="provide_vcf">
                                <param name="input_variant_vcf" argument="--variant-input" type="data" format="vcf_bgzip"
                                       label="Use variants reported in this VCF dataset as input to the algorithm">
                                    <conversion name="Tabixized_input" type="tabix" />
                                </param>
                                <param name="only_use_input_alleles" argument="--only-use-input-alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="false"
                                       label="Only provide variant calls and genotype likelihoods for sites in VCF" />
                            </when>
                        </conditional>
                        <param name="haplotype_basis_alleles" argument="--haplotype-basis-alleles" type="data" format="vcf" optional="true"
                               label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" />
                        <param name="report_monomorphic" argument="--report-monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="false"
                               label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes" />
                        <param name="observation_bias" argument="--observation-bias" type="data" format="tabular" optional="true"
                               label="Load read length-dependent allele observation biases from"
                               help="The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" />
                        <param name="contamination_estimates" argument="--contamination-estimates" type="data" format="tabular" optional="true"
                               label="Upload per-sample estimates of contamination from"
                               help="The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates" />
                        <param name="trim_complex_tail" argument="--trim-complex-tail" type="boolean" truevalue="--trim-complex-tail" falsevalue=""
                               label="Trim trailing reference matches" />
                    </when>
                    <when value="do_not_set" />
                </conditional>

                <!-- reporting -->
                <conditional name="reporting">
                    <param name="reporting_selector" type="select" label="Reporting options" help="Sets -P --pvar option">
                        <option value="do_not_set" selected="true">Use defaults</option>
                        <option value="set">Set reporting options</option>
                    </param>
                    <when value="set">
                        <param argument="--pvar" type="float" value="0.0"
                               label="Report sites if the probability that there is a polymorphism at the site is greater than"
                               help="Note that post-filtering is generally recommended over the use of this parameter" />
                  </when>
                  <when value="do_not_set" />
                </conditional>

                <!-- population model -->
                <conditional name="population_model">
                    <param name="population_model_selector" type="select" label="Population model options"
                           help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options">
                        <option value="do_not_set" selected="true">Use defaults</option>
                        <option value="set">Set population model options</option>
                    </param>
                    <when value="set">
                        <param name="T" argument="--theta" type="float" value="0.001"
                               label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis"
                               help="This serves as the single parameter to the Ewens Sampling Formula prior model" />
                        <param name="P" argument="--ploidy" type="integer" value="2"
                               label="Set ploidy for the analysis" />
                        <param name="J" argument="--pooled-discrete" type="boolean" truevalue="-J" falsevalue="" checked="false"
                               label="Assume that samples result from pooled sequencing"
                               help="Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy" />
                        <param name="K" argument="--poled-continuous" type="boolean" truevalue="-K" falsevalue="" checked="false"
                               label="Output all alleles which pass input filters, regardles of genotyping outcome or model" />
                    </when>
                    <when value="do_not_set" />
                </conditional>

                <!-- reference allele -->
                <conditional name="reference_allele">
                    <param name="reference_allele_selector" type="select" label="Reference allele options"
                           help="Sets --use-reference-allele and --reference-quality options">
                        <option value="do_not_set" selected="true">Use defaults</option>
                        <option value="set">Set reference allele options</option>
                    </param>
                    <when value="set">
                        <param name="Z" argument="--use-reference-allele" type="boolean" truevalue="-Z" falsevalue="" checked="false"
                               label="Include the reference allele in the analysis as if it is another sample from the same population" />
                        <param name="reference_quality" argument="--reference-quality" type="text" value="100,60"
                               label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" />
                    </when>
                    <when value="do_not_set" />
                </conditional>

                <!-- allelic scope -->
                <conditional name="allele_scope">
                    <param name="allele_scope_selector" type="select" label="Allelic scope options"
                           help="Sets -I, i, -X, -u, -n, --haplotype-length, --min-repeat-size, --min-repeat-entropy, and --no-partial-observations options">
                        <option value="do_not_set" selected="true">Use defaults</option>
                        <option value="set">Set alleic scope options</option>
                    </param>
                    <when value="set">
                        <param name="I" argument="--no-snps" type="boolean" truevalue="-I" falsevalue="" checked="false"
                               label="Ignore SNP alleles" />
                        <param name="i" argument="--no-indels" type="boolean" truevalue="-i" falsevalue="" checked="false"
                               label="Ignore indels alleles" />
                        <param name="X" argument="--no-mnps" type="boolean" truevalue="-X" falsevalue="" checked="false"
                               label="Ignore multi-nucleotide polymorphisms, MNPs" />
                        <param name="u" argument="--no-complex" type="boolean" truevalue="-u" falsevalue="" checked="false"
                               label="Ignore complex events (composites of other classes)" />
                        <param name="n" argument="--use-best-n-alleles" type="integer" value="0"
                               label="How many best SNP alleles to evaluate"
                               help="Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" />
                        <param name="haplotype_length" argument="--haplotype-length" type="integer" value="3"
                               label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" />
                        <param name="min_repeat_length" argument="--min-repeat-size" type="integer" value="5"
                               label="When assembling observations across repeats, require the total repeat length at least this many bp" />
                        <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="1"
                               label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" />
                        <param name="no_partial_observations" argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false"
                               label="Exclude observations which do not fully span the dynamically-determined detection window"
                               help="By default, FreeBayes uses all observations, dividing partial support across matching haplotypes when generating haplotypes" />
                    </when>
                    <when value="do_not_set" />
                </conditional>

                <!-- indel realignment -->
                <param name="O" argument="--dont-left-align-indels" type="boolean" truevalue="-O" falsevalue="" checked="false"
                       label="Turn off left-alignment of indels" />

                <!-- input filters -->
                <conditional name="input_filters">
                    <param name="input_filters_selector" type="select" label="Input filters">
                        <option value="do_not_set" selected="true">No input filters (default)</option>
                        <option value="set">Set input filters</option>
                    </param>
                    <when value="set">
                        <param argument="--standard-filters" type="boolean" truevalue="--standard-filters" falsevalue="" checked="false"
                               label="Use stringent input base and mapping quality filters"
                               help="--min-mapping-quality 30 --min-base-quality 20, --min-supporting-allele-qsum 0 --genotype-variant-threshold 0"/>
                        <param argument="--use-duplicate-reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="false"
                               label="Include duplicate-marked alignments in the analysis" />
                        <param argument="--min-mapping-quality" type="integer" value="1"
                               label="Exclude alignments from analysis if they have a mapping quality less than" />
                        <param argument="--min-base-quality" type="integer" value="0"
                               label="Exclude alleles from analysis if their supporting base quality less than" />
                        <param argument="--min-supporting-allele-qsum" type="integer" value="0"
                               label="Consider any allele in which the sum of qualities of supporting observations is at least" />
                        <param argument="--min-supporting-mapping-qsum" type="integer" value="0"
                               label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" />
                        <conditional name="mismatch_filters">
                            <param name="mismatch_filters_selector" type="select" label="Mismatch filters"
                                   help="Sets -Q, -U, -z, and &#36; options">
                                <option value="do_not_set" selected="true">No mismatch filters (default)</option>
                                <option value="set">Set mismatch filters</option>
                            </param>
                            <when value="set">
                                <param argument="--mismatch-base-quality-threshold" type="integer" value="10"
                                       label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" />
                                <param argument="--read-mismatch-limit" type="integer" value="1000" optional="true"
                                       label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (option above)"
                                       help="default=~unbounded" />
                                <param argument="--read-max-mismatch-fraction" type="float" value="1.0" min="0.0" max="1.0"
                                       label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (second option above)" />
                                <param name="read_snp_limit" argument="--read-snp-limit" type="integer" value="1000"
                                       label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold (third option above)"
                                       help="default=~unbounded" />
                            </when>
                            <when value="do_not_set" />
                        </conditional>
                        <param argument="--read-indel-limit" type="integer" value="1000"
                               label="Exclude reads with more than this number of separate gaps"
                               help="default=~unbounded" />
                        <param argument="--min-alternate-fraction" type="float" value="0.05"
                               label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" />
                        <param argument="--min-alternate-qsum" type="integer" value="0"
                               label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" />
                        <param argument="--min-alternate-count" type="integer" value="2"
                               label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" />
                        <param argument="--min-alternate-total" type="integer" value="1"
                               label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" />
                    </when>
                    <when value="do_not_set" />
                </conditional>

                <!-- population and mappability priors -->
                <conditional name="population_mappability_priors">
                    <param name="population_mappability_priors_selector" type="select" label="Population and mappability priors"
                           help="Sets -k, -w, -V, and -a options">
                        <option value="do_not_set" selected="true">Use defaults</option>
                        <option value="set">Set population and mappability priors</option>
                    </param>
                    <when value="set">
                        <param name="k" argument="--no-population-priors" type="boolean" truevalue="-k" falsevalue="" checked="false"
                               label="No population priors"
                               help="Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors" />
                        <param name="w" argument="--hwe-priors-off" type="boolean" truevalue="-w" falsevalue="" checked="false"
                               label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
                        <param name="V" argument="--binomial-obs-priors-off" type="boolean" truevalue="-V" falsevalue="" checked="false"
                               label="Disable incorporation of prior expectations about observations"
                               help="Uses read placement probability, strand balance probability, and read position (5&#39;'-3&#39;') probability" />
                        <param name="a" argument="--allele-balance-priors-off" type="boolean" truevalue="-a" falsevalue="" checked="false"
                               label="Disable use of aggregate probability of observation balance between alleles as a component of the priors" />
                    </when>
                    <when value="do_not_set" />
                  </conditional>

                <!-- genotype likelihoods -->
                <conditional name="genotype_likelihoods">
                    <param name="genotype_likelihoods_selector" type="select" label="Genotype likelihood options"
                           help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options">
                        <option value="do_not_set" selected="true">Use defaults</option>
                        <option value="set">Set genotype likelihood options</option>
                    </param>
                    <when value="set">
                        <param name="base_quality_cap" argument="--base-quality-cap" type="integer" value="0"
                               label="Limit estimated observation quality by capping base quality at" />
                        <param name="experimental_gls" argument="--experimental-gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="false"
                               label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual"
                               help="Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples" />
                        <param name="prob_contamination" argument="--prob-contamination" type="float" value="10e-9"
                               label="An estimate of contamination to use for all samples" />
                    </when>
                    <when value="do_not_set" />
                </conditional>

                <!-- algorithmic features -->
                <conditional name="algorithmic_features">
                    <param name="algorithmic_features_selector" type="select" label="Algorithmic features"
                           help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options">
                        <option value="do_not_set" selected="true">Use defaults</option>
                        <option value="set">Set algorithmic features</option>
                    </param>
                    <when value="set">
                        <param name="report_genotype_likelihood_max" argument="--report-genotype-likelihood-max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="false"
                               label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods" />
                        <param name="B" argument="--genotyping-max-iterations" type="integer" value="1000"
                               label="Iterate no more than N times during genotyping step" />
                        <param name="genotyping_max_banddepth" argument="--genotyping-max-banddepth" type="integer" value="6"
                               label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" />
                        <param name="W" argument="--posterior-integration-limits" type="text" value="1,3"
                               label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" />
                        <param name="N" argument="--exclude-unobserved-genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="false"
                               label="Skip sample genotypings for which the sample has no supporting reads" />
                        <param name="genotype_variant_threshold" argument="--genotype-variant-threshold" type="integer" value="" optional="true"
                               label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample"
                               help="default=~unbounded" />
                        <param name="j" argument="--use-mapping-quality" type="boolean" truevalue="-j" falsevalue="" checked="false"
                               label="Use mapping quality of alleles when calculating data likelihoods" />
                        <param name="H" argument="--harmonic-indel-quality" type="boolean" truevalue="-H" falsevalue="" checked="false"
                               label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel"
                               help="By default, FreeBayes uses a minimum Base Quality in flanking sequence" />
                        <param name="D" argument="--read-dependence-factor" type="float" value="0.9"
                               label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" />
                        <param name="genotype_qualities" argument="--genotype-qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="false"
                               label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" />
                    </when>
                    <when value="do_not_set" />
                </conditional>
            </when>
            <when value="simple" />
            <when value="simple_w_filters" />
            <when value="naive" />
            <when value="naive_w_filters" />
        </conditional>
    </inputs>
    <outputs>
        <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
        <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
            <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] == 'set' and options_type['optional_inputs']['output_failed_alleles_option'] is True</filter>
        </data>
        <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
            <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] == 'set' and options_type['optional_inputs']['output_trace_option'] is True</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="processmode" value="individual" />
            <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
            <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
            <param name="options_type_selector" value="simple"/>
            <output name="output_vcf" file="freebayes-phix174-test1.vcf" lines_diff="4" />
        </test>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="processmode" value="individual" />
            <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
            <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
            <param name="options_type_selector" value="naive_w_filters"/>
            <param name="coverage_options_selector" value="set" />
            <param name="min_coverage" value="14"/>
            <output name="output_vcf" file="freebayes-phix174-test2.vcf" lines_diff="4" />
        </test>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="processmode" value="individual" />
            <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
            <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
            <param name="options_type_selector" value="naive_w_filters"/>
            <param name="coverage_options_selector" value="set" />
            <param name="min_coverage" value="14"/>
            <output name="output_vcf" file="freebayes-phix174-test3.vcf" lines_diff="4" />
        </test>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="processmode" value="individual" />
            <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
            <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
            <param name="options_type_selector" value="full"/>
            <param name="population_model_selector" value="set"/>
            <param name="P" value="1"/>
            <param name="trim_complex_tail" value="--trim-complex-tail"/>
            <output name="output_vcf" file="freebayes-phix174-test4.vcf" lines_diff="4" />
        </test>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="processmode" value="individual" />
            <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
            <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/>
            <param name="options_type_selector" value="simple"/>
            <param name="coverage_options_selector" value="set" />
            <param name="min_coverage" value="250" />
            <output name="output_vcf" file="freebayes-hxb2-test5.vcf" lines_diff="4" />
        </test>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="processmode" value="individual" />
            <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
            <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/>
            <param name="options_type_selector" value="simple"/>
            <param name="coverage_options_selector" value="set" />
            <param name="limit_coverage" value="400" />
            <output name="output_vcf" file="freebayes-hxb2-test6.vcf" lines_diff="4" />
        </test>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="processmode" value="individual" />
            <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
            <param name="input_bams" ftype="bam" value="freebayes-hxb2.bam"/>
            <param name="options_type_selector" value="simple"/>
            <param name="coverage_options_selector" value="set" />
            <param name="skip_coverage" value="100" />
            <output name="output_vcf" file="freebayes-hxb2-test7.vcf" lines_diff="4" />
        </test>
        <test> <!-- Test with CRAM -->
            <param name="reference_source_selector" value="history" />
            <param name="processmode" value="individual" />
            <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
            <param name="input_bams" ftype="cram" value="freebayes-phix174.cram"/>
            <param name="options_type_selector" value="simple"/>
            <output name="output_vcf" file="freebayes-phix174-test1.vcf" lines_diff="6" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and complex events (composite insertion and substitution events) smaller than the length of a short-read sequencing alignment.

See https://github.com/ekg/freebayes for details on FreeBayes.

------

**Description**

Provided some BAM dataset(s) and a reference sequence, FreeBayes will produce a VCF dataset describing SNPs, indels, and complex variants in samples in the input alignments.

By default, FreeBayes will consider variants supported by at least 2 observations in a single sample (-C) and also by at least 20% of the reads from a single sample (-F).  These settings are suitable to low to high depth sequencing in haploid and diploid samples, but users working with polyploid or pooled samples may wish to adjust them depending on the characteristics of their sequencing data.

FreeBayes is capable of calling variant haplotypes shorter than a read length where multiple polymorphisms segregate on the same read.  The maximum distance between polymorphisms phased in this way is determined by the --max-complex-gap, which defaults to 3bp.  In practice, this can comfortably be set to half the read length.

Ploidy may be set to any level (-p), but by default all samples are assumed to be diploid.  FreeBayes can model per-sample and per-region variation in copy-number (-A) using a copy-number variation map.

FreeBayes can act as a frequency-based pooled caller and describe variants and haplotypes in terms of observation frequency rather than called genotypes. To do so, use --pooled-continuous and set input filters to a suitable level. Allele observation counts will be described by AO and RO fields in the VCF output.

-------

**Galaxy-specific options**

Galaxy allows five levels of control over FreeBayes options, provided by the **Choose parameter selection level** menu option. These are:

 1. *Simple diploid calling*: The simplest possible FreeBayes application. Equivalent to using FreeBayes with only a BAM input and no other parameter options.
 2. *Simple diploid calling with filtering and coverage*: Same as #1 plus two additional options: -0 (standard filters: --min-mapping-quality 30 --min-base-quality 20 --min-supporting-allele-qsum 0 --genotype-variant-threshold 0) and --min-coverage.
 3. *Frequency-based pooled calling*: This is equivalent to using FreeBayes with the following options: --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0 --pooled-continuous --report-monomorphic. This is the best choice for calling variants in mixtures such as viral, bacterial, or organellar genomes.
 4. *Frequency-based pooled calling with filtering and coverage*: Same as #3 but adds -0 and --min-coverage like in #2.
 5. *Complete list of all options*: Gives you full control by exposing all FreeBayes options as Galaxy parameters.

------

**Command-line parameters**

**Input**::

    --bam FILE                          The file or set of BAM files to be analyzed.
    --bam-list FILE                     A file containing a list of BAM files to be analyzed.

    --stdin                             Read BAM input on stdin.
    --fasta-reference FILE              Use FILE as the reference sequence for analysis.
                                        An index file (FILE.fai) will be created if none exists.
                                        If neither --targets nor --region are specified, FreeBayes
                                        will analyze every position in this reference.
    --targets FILE                      Limit analysis to targets listed in the BED-format FILE.
    --region <chrom>:<start>-<end>      Limit analysis to the specified region, 0-base coordinates,
                                        end_position not included (same as BED format).
                                        Either '-' or '..' maybe used as a separator.
    --samples FILE                      Limit analysis to samples listed (one per line) in the FILE.
                                        By default FreeBayes will analyze all samples in its input
                                        BAM files.
    --populations FILE                  Each line of FILE should list a sample and a population which
                                        it is part of.  The population-based bayesian inference model
                                        will then be partitioned on the basis of the populations.
    --cnv-map FILE                      Read a copy number map from the BED file FILE, which has
                                        either a sample-level ploidy:
                                        sample_name copy_number
                                        or a region-specific format:
                                        seq_name start end sample_name copy_number
                                        ... for each region in each sample which does not have the
                                        default copy number as set by --ploidy. These fields can be delimited
                                        by space or tab.

**Output**::

    --vcf FILE                          Output VCF-format results to FILE. (default: stdout)
    --gvcf                              Write gVCF output, which indicates coverage in uncalled regions.
    --gvcf-chunk NUM                    When writing gVCF output emit a record for every NUM bases.
    --gvcf-dont-use-chunk               When writing the gVCF output emit a record for all bases if
                                        set to "true" , will also route an int to --gvcf-chunk
                                        similar to --output-mode EMIT_ALL_SITES from GATK
    --variant-input VCF                 Use variants reported in VCF file as input to the algorithm.
                                        Variants in this file will included in the output even if
                                        there is not enough support in the data to pass input filters.
    --only-use-input-alleles            Only provide variant calls and genotype likelihoods for sites
                                        and alleles which are provided in the VCF input, and provide
                                        output in the VCF for all input alleles, not just those which
                                        have support in the data.
    --haplotype-basis-alleles VCF       When specified, only variant alleles provided in this input
                                        VCF will be used for the construction of complex or haplotype
                                        alleles.
    --report-all-haplotype-alleles      At sites where genotypes are made over haplotype alleles,
                                        provide information about all alleles in output, not only
                                        those which are called.
    --report-monomorphic                Report even loci which appear to be monomorphic, and report all
                                        considered alleles, even those which are not in called genotypes.
                                        Loci which do not have any potential alternates have '.' for ALT.
    --pvar N                            Report sites if the probability that there is a polymorphism
                                        at the site is greater than N.  default: 0.0.  Note that post-
                                        filtering is generally recommended over the use of this parameter.
    --strict-vcf                        Generate strict VCF format (FORMAT/GQ will be an int)

**Population model**::

    --theta N                           The expected mutation rate or pairwise nucleotide diversity
                                        among the population under analysis.  This serves as the
                                        single parameter to the Ewens Sampling Formula prior model
                                        default: 0.001
    --ploidy N                          Sets the default ploidy for the analysis to N.  default: 2
    --pooled-discrete                   Assume that samples result from pooled sequencing.
                                        Model pooled samples using discrete genotypes across pools.
                                        When using this flag, set --ploidy to the number of
                                        alleles in each sample or use the --cnv-map to define
                                        per-sample ploidy.
    --pooled-continuous                 Output all alleles which pass input filters, regardles of
                                        genotyping outcome or model.

**Reference allele**::

    --use-reference-allele              This flag includes the reference allele in the analysis as
                                        if it is another sample from the same population.
    --reference-quality MQ,BQ           Assign mapping quality of MQ to the reference allele at each
                                        site and base quality of BQ.  default: 100,60

**Allele scope**::

    --use-best-n-alleles N              Evaluate only the best N SNP alleles, ranked by sum of
                                        supporting quality scores.  (Set to 0 to use all; default: all)
    --max-complex-gap
    --haplotype-length N                Allow haplotype calls with contiguous embedded matches of up
                                        to this length. Set N=-1 to disable clumping. (default: 3)
    --min-repeat-size                   When assembling observations across repeats, require the total repeat
                                        length at least this many bp.  (default: 5)
    --min-repeat-entropy N              To detect interrupted repeats, build across sequence until it has
                                        entropy > N bits per bp. Set to 0 to turn off. (default: 1)
    --no-partial-observations           Exclude observations which do not fully span the dynamically-determined
                                        detection window.  (default, use all observations, dividing partial
                                        support across matching haplotypes when generating haplotypes.)

**Indel realignment**::

    --dont-left-align-indels            Turn off left-alignment of indels, which is enabled by default.

**Input filters**::

    --use-duplicate-reads               Include duplicate-marked alignments in the analysis.
                                        default: exclude duplicates marked as such in alignments
    --min-mapping-quality Q             Exclude alignments from analysis if they have a mapping
                                        quality less than Q.  default: 1
    --min-base-quality Q                Exclude alleles from analysis if their supporting base
                                        quality is less than Q.  default: 0
    --min-supporting-allele-qsum Q      Consider any allele in which the sum of qualities of supporting
                                        observations is at least Q.  default: 0
    --min-supporting-mapping-qsum Q     Consider any allele in which and the sum of mapping qualities of
                                        supporting reads is at least Q.  default: 0
    --mismatch-base-quality-threshold Q Count mismatches toward --read-mismatch-limit if the base
                                        quality of the mismatch is >= Q.  default: 10
    --read-mismatch-limit N             Exclude reads with more than N mismatches where each mismatch
                                        has base quality >= mismatch-base-quality-threshold.
                                        default: ~unbounded
    --read-max-mismatch-fraction N      Exclude reads with more than N [0,1] fraction of mismatches where
                                        each mismatch has base quality >= mismatch-base-quality-threshold
                                        default: 1.0
    --read-snp-limit N                  Exclude reads with more than N base mismatches, ignoring gaps
                                        with quality >= mismatch-base-quality-threshold.
                                        default: ~unbounded
    --read-indel-limit N                Exclude reads with more than N separate gaps.
                                        default: ~unbounded
    --standard-filters                  Use stringent input base and mapping quality filters
                                        Equivalent to -m 30 -q 20 -R 0 -S 0
    --min-alternate-fraction N          Require at least this fraction of observations supporting
                                        an alternate allele within a single individual in the
                                        in order to evaluate the position.  default: 0.05
    --min-alternate-count N             Require at least this count of observations supporting
                                        an alternate allele within a single individual in order
                                        to evaluate the position.  default: 2
    --min-alternate-qsum N              Require at least this sum of quality of observations supporting
                                        an alternate allele within a single individual in order
                                        to evaluate the position.  default: 0
    --min-alternate-total N             Require at least this count of observations supporting
                                        an alternate allele within the total population in order
                                        to use the allele in analysis.  default: 1
    --min-coverage N                    Require at least this coverage to process a site. default: 0
    --limit-coverage N                  Downsample per-sample coverage to this level if greater than this coverage.
                                        default: no limit
    --skip-coverage N                   Skip processing of alignments overlapping positions with coverage >N.
                                        This filters sites above this coverage, but will also reduce data nearby.
                                        default: no limit

**Population priors**::

    --no-population-priors              Equivalent to --pooled-discrete --hwe-priors-off and removal of
                                        Ewens Sampling Formula component of priors.

**Mappability priors**::

    --hwe-priors-off                    Disable estimation of the probability of the combination
                                        arising under HWE given the allele frequency as estimated
                                        by observation frequency.
    --binomial-obs-priors-off           Disable incorporation of prior expectations about observations.
                                        Uses read placement probability, strand balance probability,
                                        and read position (5'-3') probability.
    --allele-balance-priors-off         Disable use of aggregate probability of observation balance between alleles
                                        as a component of the priors.

**Genotype likelihoods**::

    --observation-bias FILE             Read length-dependent allele observation biases from FILE.
                                        The format is [length] [alignment efficiency relative to reference]
                                        where the efficiency is 1 if there is no relative observation bias.
    --base-quality-cap Q                Limit estimated observation quality by capping base quality at Q.
    --prob-contamination F              An estimate of contamination to use for all samples.  default: 10e-9
    --legacy-gls                        Use legacy (polybayes equivalent) genotype likelihood calculations
    --contamination-estimates FILE      A file containing per-sample estimates of contamination, such as
                                        those generated by VerifyBamID.  The format should be:
                                        sample p(read=R|genotype=AR) p(read=A|genotype=AA)
                                        Sample '*' can be used to set default contamination estimates.

**Algorithmic features**::

    --report-genotype-likelihood-max    Report genotypes using the maximum-likelihood estimate provided
                                        from genotype likelihoods.
    --genotyping-max-iterations N       Iterate no more than N times during genotyping step. default: 1000.
    --genotyping-max-banddepth N        Integrate no deeper than the Nth best genotype by likelihood when
                                        genotyping. default: 6.
    --posterior-integration-limits N,M  Integrate all genotype combinations in our posterior space
                                        which include no more than N samples with their Mth best
                                        data likelihood. default: 1,3.
    --exclude-unobserved-genotypes      Skip sample genotypings for which the sample has no supporting reads.
    --genotype-variant-threshold N      Limit posterior integration to samples where the second-best
                                        genotype likelihood is no more than log(N) from the highest
                                        genotype likelihood for the sample.  default: ~unbounded
    --use-mapping-quality               Use mapping quality of alleles when calculating data likelihoods.
    --harmonic-indel-quality            Use a weighted sum of base qualities around an indel, scaled by the
                                        distance from the indel.  By default use a minimum BQ in flanking sequence.
    --read-dependence-factor N          Incorporate non-independence of reads by scaling successive
                                        observations by this factor during data likelihood
                                        calculations.  default: 0.9
    --genotype-qualities                Calculate the marginal probability of genotypes and report as GQ in
                                        each sample field in the VCF output.

------

**Acknowledgments**

The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko.
TNG was developed by Bjoern Gruening.
]]>
    </help>
    <expand macro="citations">
        <citation type="bibtex">
            @article{Tange2011a,
                title = {GNU Parallel - The Command-Line Power Tool},
                author = {O. Tange},
                address = {Frederiksberg, Denmark},
                journal = {;login: The USENIX Magazine},
                month = {Feb},
                number = {1},
                volume = {36},
                url = {http://www.gnu.org/s/parallel},
                year = {2011},
                pages = {42-47}
            }
        </citation>
    </expand>
</tool>