view bcftools_mpileup.xml @ 20:0a5dcc9a8ec2 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 784611c9caf2680d41414ca2880b93a69d719701
author iuc
date Sun, 18 Aug 2024 10:02:02 +0000
parents bf533509c5b6
children
line wrap: on
line source

<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy4" profile="@PROFILE@">
    <description>Generate VCF or BCF containing genotype likelihoods for one or multiple alignment (BAM or CRAM) files</description>
    <macros>
        <token name="@EXECUTABLE@">mpileup</token>
        <import>macros.xml</import>
        <xml name="bam_flag_options">
            <option value="1">Read is paired</option>
            <option value="2">Read is mapped in a proper pair</option>
            <option value="4">The read is unmapped</option>
            <option value="8">The mate is unmapped</option>
            <option value="16">Read strand</option>
            <option value="32">Mate strand</option>
            <option value="64">Read is the first in a pair</option>
            <option value="128">Read is the second in a pair</option>
            <option value="256">The alignment or this read is not primary</option>
            <option value="512">The read fails platform/vendor quality checks</option>
            <option value="1024">The read is a PCR or optical duplicate</option>
        </xml>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements">
        <expand macro="samtools_requirement"/>
    </expand>
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
#import re
#set bam_list = []
#if $input.input_number == 'single':
    #set $input_base = $re.sub('\W','_',$input.input_bam.display_name.replace('.bam','').replace('.cram',''))
    #set $ext = 'bam'
    #set $idx_ext = 'bai'
    #if $input.input_bam.ext == 'cram':
        #set $ext = 'cram'
        #set $idx_ext = 'crai'
    #end if
    #set $input_name = $input_base + '.' + $ext
    #silent $bam_list.append($input_name)
    ln -s '${input.input_bam}' ${input_name} &&
    #if $input.input_bam.ext == 'bam':
        ln -s '${input.input_bam.metadata.bam_index}' ${input_name}.${idx_ext} &&
    #else:
        ln -s '${input.input_bam.metadata.cram_index}' ${input_name}.${idx_ext} &&
    #end if
#else:
    #for $bam_count, $input_bam in enumerate( $input.input_bams ):
        #set $input_base = $re.sub('\W','_',$input_bam.display_name.replace('.bam','').replace('.cram',''))
        #set $ext = 'bam'
        #set $idx_ext = 'bai'
        #if $input_bam.ext == 'cram':
            #set $ext = 'cram'
            #set $idx_ext = 'crai'
        #end if
        #set $input_name = $input_base + '.' + $ext
        #silent $bam_list.append($input_name)
        ln -s '${input_bam}' ${input_name} &&
        ln -s '${input_bam.metadata.bam_index}' ${input_name}.${idx_ext} &&
    #end for
#end if

#set $input_fa_ref = None
#if $reference_source.reference_source_selector == "history":
    #set $input_fa_ref = 'ref.fa'
    ln -s '${reference_source.ref_file}' $input_fa_ref &&
    samtools faidx $input_fa_ref &&
#elif $reference_source.reference_source_selector == "cached":
    #set $input_fa_ref = $reference_source.ref_file.fields.path
#end if

#set $section = $sec_restrict
@PREPARE_REGIONS_FILE@
@PREPARE_TARGETS_FILE@

bcftools @EXECUTABLE@

#if $input_fa_ref is not None:
    --fasta-ref '$input_fa_ref'
#else:
    --non-reference
#end if

## Indel Calling section
#set $section = $sec_indel
#if $section.perform_indel_calling.perform_indel_calling_selector == 'do_not_perform_indel_calling':
    --skip-indels
#elif $section.perform_indel_calling.perform_indel_calling_selector == 'perform_indel_calling':
    -o "${section.perform_indel_calling.gap_open_sequencing_error_probability}"
    -e "${section.perform_indel_calling.gap_extension_sequencing_error_probability}"
    -h "${section.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}"
    -L "${section.perform_indel_calling.skip_indel_calling_above_sample_depth}"
    -m "${section.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}"
    --open-prob "${section.perform_indel_calling.open_seq_error_probability}"
    -F "${section.perform_indel_calling.minimum_gapped_read_fraction}"
    ${section.perform_indel_calling.gapped_read_per_sample}
    #if len( $section.perform_indel_calling.platform_list_repeat ):
        -P "${ ",".join( [ str( platform['platform_entry'] ) for platform in $section.perform_indel_calling.platform_list_repeat ] ) }"
    #end if
#end if

#if $section.ambig_reads
    --ambig-reads $section.ambig_reads
#end if

#if $section.indel_bias
    --indel-bias $section.indel_bias
#end if

#if $section.indel_size
    --indel-size $section.indel_size
#end if

## Filter section
#set $section = $sec_filtering
#if str( $section.filter_by_flags.filter_flags ) == "filter":
    #if $section.filter_by_flags.skip_all_set:
        --skip-all-set ${sum([int(flag) for flag in str($section.filter_by_flags.skip_all_set).split(',')])}
    #end if
    #if $section.filter_by_flags.skip_any_set:
        --skip-any-set ${sum([int(flag) for flag in str($section.filter_by_flags.skip_any_set).split(',')])}
    #end if
    #if $section.filter_by_flags.skip_all_unset:
        --skip-all-unset ${sum([int(flag) for flag in str($section.filter_by_flags.skip_all_unset).split(',')])}
    #end if
    #if $section.filter_by_flags.skip_any_unset:
        --skip-any-unset ${sum([int(flag) for flag in str($section.filter_by_flags.skip_any_unset).split(',')])}
    #end if
#end if
-d "${section.max_reads_per_bam}"
$section.ignore_overlaps
${section.skip_anomalous_read_pairs}
#if str( $section.quality.quality_settings ) == "adjust":
    $section.quality.baq
    -q "${section.quality.minimum_mapping_quality}"
    -Q "${section.quality.minimum_base_quality}"
    -C "${section.quality.coefficient_for_downgrading}"
#end if
#if str( $section.read_groups.read_groups_selector ) == "ignore_rg":
    --ignore-RG
#elif str( $section.read_groups.read_groups_selector ) == "paste":
    -G "${section.read_groups.rg_action}${read_groups_file}"
#elif str( $section.read_groups.read_groups_selector ) == "history"
    -G "${section.read_groups.rg_action}${section.read_groups.read_groups}"
#end if

#set $section = $sec_output_options
#if $section.output_tags:
    --annotate "$section.output_tags"
#end if
#if $section.gvcf:
    --gvcf "$section.gvcf"
#end if

## Subset section
#set $section = $sec_subset
@SAMPLES@

## Restrict section
#set $section = $sec_restrict
@REGIONS@
@TARGETS@
@THREADS@
@OUTPUT_TYPE@

## Primary Input/Outputs
#echo ' '.join($bam_list)#
> '$output_file'
#if str( $sec_filtering.read_groups.read_groups_selector ) == "paste":
    && echo 'read-groups:'
    && cat ${read_groups_file}
#end if
    ]]></command>
    <configfiles>
        <configfile name="read_groups_file">
<![CDATA[#slurp
#set pasted_data = ''
#set $section = $sec_filtering
#if str( $section.read_groups.read_groups_selector ) == "paste":
    #set pasted_data = '\t'.join( str( $section.read_groups.group_paste).split() )
#end if
#slurp
${pasted_data}
]]>
        </configfile>
    </configfiles>
    <inputs>

        <conditional name="input">
            <param name="input_number" type="select" label="Alignment Inputs">
                 <option value="single">Single BAM/CRAM</option>
                 <option value="multiple">Multiple BAM/CRAMs</option>
            </param>
            <when value="single">
                <param name="input_bam" type="data" format="bam,cram" label="Input BAM/CRAM" />
            </when>
            <when value="multiple">
                <param name="input_bams" type="data" format="bam,cram" multiple="true" label="Input BAM/CRAMs" />
            </when>
        </conditional>

        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
                <option value="cached">Locally cached</option>
                <option value="history">History</option>
                <option value="none">No Reference</option>
            </param>
            <when value="cached">
                <param name="ref_file" type="select" label="Select reference genome">
                    <options from_data_table="fasta_indexes"/>
                </param>
            </when>
            <when value="history">
                <param name="ref_file" type="data" format="fasta" label="Genome Reference" />
            </when>
            <when value="none"/>
        </conditional>

        <section name="sec_indel" expanded="false" title="Indel Calling">
            <conditional name="perform_indel_calling">
                <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
                    <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option>
                    <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option>
                    <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
                </param>
                <when value="perform_indel_calling_def" />
                <when value="perform_indel_calling">
                    <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" help="--open-prob; Reducing this value leads to more indel calls; default=40"/>
                    <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" help="--ext-prob;  Reducing this value leads to longer indels. default=20"/>
                    <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." help="--tandem-qual; default=100"/>
                    <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" help="--max-idepth; default=250"/>
                    <param name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" label="Minimum gapped reads for indel candidates" help="--min-ireads; default=1"/>
                    <param name="open_seq_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" help="--open-prob; Reducing this value leads to more indel calls; default=40"/>
                    <param name="minimum_gapped_read_fraction" type="float" value="0.002" label="Minimum fraction of gapped reads" help="--gap-frac; default=0.002"/>
                    <param name="gapped_read_per_sample" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply --min-ireads and --gap-frac values on a per-sample basis" help="--per-sample-mF;  by default both options are applied to reads pooled from all samples"/>
                    <repeat name="platform_list_repeat" title="Platform for INDEL candidates">
                        <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/>
                    </repeat>
                </when>
                <when value="do_not_perform_indel_calling" />
            </conditional>
            <param argument="--ambig-reads" type="select" optional="true" label="Ambiguous indel reads" help="What to do with ambiguous indel reads that do not span an entire short tandem repeat region: discard ambiguous reads from calling and do not increment high-quality AD depth counters (drop), exclude from calling but increment AD counters proportionally (incAD), exclude from calling and increment the first value of the AD counter (incAD0) ">
                <option value="drop">Drop</option>
                <option value="incAD">IncAD</option>
                <option value="incAD0">IncAD0</option>
            </param>
            <param argument="--indel-bias" type="float" min="0" value="" optional="true" label="Indel bias" help="Skews the indel scores up or down, trading recall (low false-negative) vs precision (low false-positive) [1.0]. In Bcftools 1.12 and earlier this parameter didn’t exist, but had an implied value of 1.0. If you are planning to do heavy filtering of variants, selecting the best quality ones only (favouring precision over recall), it is advisable to set this lower (such as 0.75) while higher depth samples or where you favour recall rates over precision may work better with a higher value such as 2.0" />
            <param argument="--indel-size" type="integer" min="0" value="" optional="true" label="Indel size" help="Indel window size to use when assessing the quality of candidate indels. Note that although the window size approximately corresponds to the maximum indel size considered, it is not an exact threshold. Default: 110" />
        </section>

        <section name="sec_filtering" expanded="false" title="Input Filtering Options">
            <param name="max_reads_per_bam" type="integer" value="250" min="1" label="Max reads per BAM" help="--max-depth; default=250"/>
            <param name="ignore_overlaps" type="boolean" truevalue="-x" falsevalue="" checked="False" label="Disable read-pair overlap detection" help="--ignore-overlaps"/>
            <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" help="--count-orphans"/>
            <conditional name="filter_by_flags">
                <param name="filter_flags" type="select" label="Set filter by flags">
                    <option selected="True" value="nofilter">Do not filter</option>
                    <option value="filter">Filter by flags to exclude or require</option>
                </param>
                <when value="filter">
                    <param argument="--skip-all-set" type="select" display="checkboxes" label="Skip reads with all of the FLAG bits set" multiple="True" help="--skip-all-set">
                        <expand macro="bam_flag_options" />
                    </param>
                    <param argument="--skip-any-set" type="select" display="checkboxes" label="Skip reads with any of the FLAG bits set" multiple="True" help="--skip-any-set">
                        <expand macro="bam_flag_options" />
                    </param>
                    <param argument="--skip-all-unset" type="select" display="checkboxes" label="Skip reads with all of the FLAG bits unset" multiple="True" help="--skip-all-unset">
                        <expand macro="bam_flag_options" />
                    </param>
                    <param argument="--skip-any-unset" type="select" display="checkboxes" label="Skip reads with any of the FLAG bits unset" multiple="True" help="--skip-any-unset">
                        <expand macro="bam_flag_options" />
                    </param>
                </when>
                <when value="nofilter" />
            </conditional>
            <conditional name="quality">
                <param label="Quality Options" name="quality_settings" type="select">
                    <option value="none" selected="True">defaults</option>
                    <option value="adjust">Set base and mapping quality options</option>
                </param>
                <when value="adjust">
                    <param name="baq" type="select" label="per-Base Alignment Quality">
                        <help>
--no-BAQ; BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments.
--redo-BAQ; ignore existing BQ tags
                        </help>
                        <option value="">Default</option>
                        <option value="--no-BAQ">disable BAQ (per-Base Alignment Quality) (no-BAQ)</option>
                        <option value="--redo-BAQ">recalculate BAQ on the fly, ignore existing BQs (redo-BAQ)</option>
                    </param>
                    <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" help="--adjust-MQ; Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. default=0"/>
                    <param label="Minimum mapping quality for an alignment to be used" name="minimum_mapping_quality" type="integer" value="0" help="-min-MQ; default=0"/>
                    <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" help="--min-BQ; default=13"/>
                </when>
                <when value="none"/>
            </conditional>

            <conditional name="read_groups">
                <param name="read_groups_selector" type="select" label="Select read groups to include or exclude" help="--read-groups">
                    <option value="no_limit" selected="True">use defaults</option>
                    <option value="history">From an uploaded text file</option>
                    <option value="paste">Paste a list of read groups</option>
                    <option value="ignore_rg">Ignore RG tags. Treat all reads in one alignment file as one sample. </option>
                </param>
                <when value="history">
                    <param name="read_groups" format="txt" label="Text file" type="data">
                        <validator type="dataset_ok_validator" />
                    </param>
                    <param name="rg_action" type="select" label="Include or Exclude these Read Groups">
                        <option value="" selected="true">Include</option>
                        <option value="^">Exclude</option>
                    </param>
                </when>
                <when value="paste">
                    <param name="group_paste" type="text" size="10x35" area="true" label="Read groups" help="Paste a list of read groups" />
                    <param name="rg_action" type="select" label="Include or Exclude these Read Groups">
                        <option value="" selected="true">Include</option>
                        <option value="^">Exclude</option>
                    </param>
                </when>
                <when value="ignore_rg" />
                <when value="no_limit" />
            </conditional>
        </section>

        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_restrict" />
            <expand macro="macro_restrict" type="target" label_type="Target">
                <expand macro="macro_invert_targets" />
            </expand>
        </section>
        <section name="sec_subset" expanded="false" title="Subset Options">
            <expand macro="macro_samples" />
        </section>

        <section name="sec_output_options" expanded="false" title="Output options">
            <expand macro="macro_output_tags">
                <option value="DP">DP (Number of high-quality bases)</option>
                <option value="AD">AD (Allelic depth)</option>
                <option value="ADF">ADF (Allelic depth on the forward strand)</option>
                <option value="ADR">ADR (Allelic depth on the reverse strand)</option>
                <option value="INFO/AD">INFO/AD (Allelic depth)</option>
                <option value="INFO/ADF">INFO/ADF (Allelic depth on the forward strand)</option>
                <option value="INFO/ADR">INFO/ADR (Allelic depth on the reverse strand)</option>
                <option value="SP">SP (Phred-scaled strand bias P-value)</option>
                <option value="DV">DV (Number of high-quality non-reference bases)</option>
                <option value="QS">QS (Allele phred-score quality sum)</option>
                <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option>
                <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option>
                <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option>
            </expand>
            <param name="gvcf" type="text" value="" label="gVCF blocks of homozygous REF calls">
                <help>
output gVCF blocks of homozygous REF calls, with depth (DP) ranges specified by the list of integers. For example, passing 5,15 will group sites into two types of gVCF blocks, the first with minimum per-sample DP from the interval [5,15) and the latter with minimum depth 15 or more. In this example, sites with minimum per-sample depth less than 5 will be printed as separate records, outside of gVCF blocks. 
                </help>
                <validator type="regex" message="integers separated by commas">^(\d+(,\d+)*)?$</validator>
            </param>
        </section>

        <expand macro="macro_select_output_type" />

    </inputs>
    <outputs>
        <expand macro="macro_vcf_output" />
    </outputs>
    <tests>
        <test>
            <param name="input_number" value="single" />
            <param name="input_bam" ftype="bam" value="mpileup.1.bam" />
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="mpileup" />
                    <has_text text="HG00100" />
                    <has_text_matching expression="17\t1\t.\tA\t...\t0\t.\tDP=5;" />
                    <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=9;" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_number" value="single" />
            <param name="input_bam" ftype="bam" value="mpileup.1.bam" />
            <param name="reference_source_selector" value="cached" />
            <param name="ref_file" value="mpileup" />
            <param name="quality_settings" value="adjust" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="mpileup" />
                    <has_text text="HG00100" />
                    <has_text_matching expression="17\t1\t.\tA\t...\t0\t.\tDP=5;" />
                    <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=9;" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_number" value="single" />
            <param name="input_bam" ftype="cram" value="mpileup.3.cram" />
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="mpileup" />
                    <has_text text="HG00102" />
                    <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=5;" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_number" value="multiple" />
            <param name="input_bams" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
            <section name="sec_restrict">
                <conditional name="regions">
                    <param name="regions_src" value="regions" />
                    <repeat name="regions">
                        <param name="chrom" value="17" />
                        <param name="start" value="100" />
                        <param name="stop" value="110" />
                    </repeat>
                </conditional>
            </section>
            <param name="output_tags" value="DP,INFO/AD,DV" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="mpileup" />
                    <has_text text="HG00100" />
                    <has_text text="HG00101" />
                    <has_text text="HG00102" />
                    <has_text text="ID=DP," />
                    <not_has_text text="17\t111" />
                    <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=18;AD=17,0;" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_number" value="multiple" />
            <param name="input_bams" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
            <section name="sec_restrict">
                <conditional name="targets">
                    <param name="targets_src" value="targets" />
                    <repeat name="targets">
                        <param name="chrom" value="17" />
                        <param name="start" value="100" />
                        <param name="stop" value="104" />
                    </repeat>
                </conditional>
            </section>
            <param name="output_tags" value="DP,INFO/AD,DV" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="mpileup" />
                    <has_text text="HG00100" />
                    <has_text text="HG00101" />
                    <has_text text="HG00102" />
                    <has_text text="ID=DP," />
                    <not_has_text text="17\t105" />
                    <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=18;AD=17,0;" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_number" value="multiple" />
            <param name="input_bams" ftype="bam" value="mpileup.1.bam,mpileup.2.bam,mpileup.3.bam" />
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
            <section name="sec_restrict">
                <conditional name="regions">
                    <param name="regions_src" value="regions" />
                    <repeat name="regions">
                        <param name="chrom" value="17" />
                        <param name="start" value="1050" />
                        <param name="stop" value="1060" />
                    </repeat>
                </conditional>
            </section>
            <param name="filter_flags" value="filter" />
            <param name="skip_all_set" value="4,16" /> 
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="mpileup" />
                    <has_text text="HG00100" />
                    <has_text_matching expression="17\t1050\t.\tA\t" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--skip-all-set" />
            </assert_command>
        </test>
        <!-- Test indel options-->
        <test>
            <param name="input_number" value="single" />
            <param name="input_bam" ftype="bam" value="mpileup.1.bam" />
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="mpileup.ref.fa" />
            <section name="sec_indel">
                <param name="ambig_reads" value="drop"/>
                <param name="indel_bias" value="1"/>
                <param name="indel_size" value="110"/>
            </section>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="mpileup" />
                    <has_text text="HG00100" />
                    <has_text_matching expression="17\t1\t.\tA\t...\t0\t.\tDP=5;" />
                    <has_text_matching expression="17\t100\t.\tC\t...\t0\t.\tDP=9;" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--ambig-reads" />
                <has_text text="--indel-bias" />
                <has_text text="--indel-size" />
            </assert_command>
            
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@
=====================================

Haplotype aware consequence predictor which correctly handles combined variants such as MNPs split over multiple VCF records, SNPs separated by an intron (but adjacent in the spliced transcript) or nearby frame-shifting indels which in combination in fact are not frame-shifting.

The output VCF is annotated with INFO/BCSQ and FORMAT/BCSQ tag (configurable with the -c option). The latter is a bitmask of indexes to INFO/BCSQ, with interleaved haplotypes. See the usage examples below for using the %TBCSQ converter in query for extracting a more human readable form from this bitmask. The contruction of the bitmask limits the number of consequences that can be referenced in the FORMAT/BCSQ tags. By default this is 16, but if more are required, see the --ncsq option.

The program requires on input a VCF/BCF file, the reference genome in fasta format (--fasta-ref) and genomic features in the GFF3 format downloadable from the Ensembl website (--gff-annot), and outputs an annotated VCF/BCF file. Currently, only Ensembl GFF3 files are supported.

By default, the input VCF should be phased. If phase is unknown, or only partially known, the --phase option can be used to indicate how to handle unphased data. Alternatively, haplotype aware calling can be turned off with the --local-csq option.

If conflicting (overlapping) variants within one haplotype are detected, a warning will be emitted and predictions will be based on only the first variant in the analysis.

Symbolic alleles are not supported. They will remain unannotated in the output VCF and are ignored for the prediction analysis.


@REGIONS_HELP@
@TARGETS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
    ]]></help>
    <expand macro="citations" />
</tool>