view bcftools_annotate.xml @ 22:0f087051441e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 868f4386992de55d25da197660a43a913a09c8df
author iuc
date Wed, 14 Aug 2024 16:26:20 +0000
parents 8a2e296c07ef
children
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Annotate and edit VCF/BCF files</description>
    <macros>
        <token name="@EXECUTABLE@">annotate</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements" />
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $annotation_file = None
#set $annotation_hdr = None
#set $section = $sec_annofile
#if $section.annofile.anno_fmt == 'vcf':
  #if $section.annofile.annotations.is_of_type('vcf')
    #set $annotation_file = 'annotations.vcf.gz'
    bgzip -c '$section.annofile.annotations' > $annotation_file &&
    bcftools index $annotation_file &&
  #elif $section.annofile.annotations.is_of_type('bcf')
    #set $annotation_file = 'annotations.bcf'
    ln -s '$section.annofile.annotations' $annotation_file &&
    #if $section.annofile.annotations.metadata.bcf_index:
      ln -s '${section.annofile.annotations.metadata.bcf_index}' ${annotation_file}.csi &&
    #else
      bcftools index $annotation_file &&
    #end if
  #end if
#elif $section.annofile.anno_fmt == 'tab':
  #if $section.annofile.annotations.is_of_type('bed')
    #set $annotation_file = 'annotations.bed.gz'
    bgzip -c '$section.annofile.annotations' > $annotation_file &&
    tabix -s 1 -b 2 -e 3 $annotation_file &&
  #else:
    #set $annotation_file = 'annotations.tab.gz'
    bgzip -c '$section.annofile.annotations' > $annotation_file &&
    tabix -s 1 -b 2 -e 2 $annotation_file &&
  #end if
  #if $section.annofile.header_file:
    #set $annotation_hdr = $section.annofile.header_file
  #elif $section.annofile.header_lines:
    #set $annotation_hdr = 'annotation.hdr'
    grep '^\#\#INFO' '${hdr_file}' > $annotation_hdr &&
  #end if
#end if
#set $section = $sec_restrict

bcftools @EXECUTABLE@
@PREPARE_REGIONS_FILE@

#set $section = $sec_annofile
@COLUMNS@
#if $annotation_file:
  --annotations '${annotation_file}'
#end if
#if $annotation_hdr:
    --header-lines '${annotation_hdr}'
#end if

#if $section.set_id:
  --set-id '${section.set_id}'
#end if
#if $section.mark_sites:
  --mark-sites '${section.mark_sites}'
#end if
#if $section.min_overlap
    --min-overlap '$section.min_overlap'
#end if

#set $section = $sec_annotate
#if $section.rename_chrs:
  --rename-chrs '${section.rename_chrs}'
#end if
#if $section.remove:
  --remove '${section.remove}'
#end if
#if $section.rename_annots
    --rename-annots '$section.rename_annots'
#end if


## Default section
#set $section = $sec_restrict
@INCLUDE@
@EXCLUDE@
@COLLAPSE@
@REGIONS@
@SAMPLES@

@OUTPUT_TYPE@
@THREADS@

## Primary Input/Outputs
@INPUT_FILE@
> '$output_file'
]]>
    </command>
    <configfiles>
        <configfile name="hdr_file"><![CDATA[#slurp
#if $sec_annofile.annofile.anno_fmt == 'tab' and str($sec_annofile.annofile.header_lines) != '':
$sec_annofile.annofile.header_lines.__str__.strip()#slurp
#end if]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="macro_input" />
        <section name="sec_annofile" expanded="false" title="Add Annotations">
            <expand macro="macro_columns" />
            <conditional name="annofile">
                <param name="anno_fmt" type="select" label="Annotations File">
                    <option value="none">None</option>
                    <option value="vcf">From a VCF/BCF file</option>
                    <option value="tab">From a BED or tab-delimited file</option>
                </param>
                <when value="none"/>
                <when value="vcf">
                    <param name="annotations" type="data" format="vcf,bcf" label="Annotations VCF"/>
                </when>
                <when value="tab">
                    <param name="annotations" type="data" format="tabular,bed" label="Annotations">
                        <help><![CDATA[
                        BED, or a tab-delimited file with mandatory columns CHROM, POS (or, alternatively, FROM and TO),
                        optional columns REF and ALT, and arbitrary number of annotation columns.
                        Note that in case of tab-delimited file, the coordinates POS, FROM and TO are one-based and inclusive.
                        ]]></help>
                    </param>
                    <param name="header_file" type="data" format="txt" label="Header Lines File" optional="true" help="lines which should be appended to the VCF header" />
                    <param name="header_lines" type="text" area="true" label="Header Lines" optional="true" help="lines which should be appended to the VCF header" >
                        <help><![CDATA[
                        ##INFO=<ID=NUMERIC_TAG,Number=1,Type=Integer,Description="Example header line">
                        ##INFO=<ID=STRING_TAG,Number=1,Type=String,Description="Yet another header line">
                        ]]></help>
                        <sanitizer sanitize="False"/>
                    </param>
                </when>
            </conditional>
            <param argument="--mark-sites" type="text" value="" label="Mark Sites TAG"
                help="add INFO/TAG flag to sites which are (&quot;+&quot;) or are not (&quot;-&quot;) listed in the annotations file" />
            <param argument="--min-overlap" type="text" value="" label="Minimum required overlap of intersecting regions" optional="true" help="Minimum overlap required as a fraction of the variant in the annotation -a file (ANN), in the target VCF file (:VCF), or both for reciprocal overlap (ANN:VCF). By default overlaps of arbitrary length are sufficient.">
                <sanitizer invalid_char="">
                    <valid initial="string.digits">
                        <add value=":" />
                        <add value="." />
                    </valid>
                </sanitizer>
                <validator type="regex">[0-9.:]+</validator>
            </param>
            <param argument="--set-id" type="text" value="" optional="true" label="Set Id">
                <help>Assign ID on the fly using the given format.
                By default all existing IDs are replaced.
                If the format string is preceded by "+", only missing IDs will be set.
                For example: '%CHROM\_%POS\_%REF\_%FIRST_ALT'
                </help>
                <sanitizer sanitize="False"/>
                <validator type="regex" message="">^([+]?(%[A-Z_]+)(\\_%[A-Z_]+)*)?$</validator>
            </param>
        </section>
        <section name="sec_annotate" expanded="false" title="Change Annotations">
            <param argument="--remove" type="text" value="" label="Remove annotations" optional="true">
                <help><![CDATA[
                 List of annotations to remove.
                 Use "FILTER" to remove all filters or "FILTER/SomeFilter" to remove a specific filter.
                 Similarly, "INFO" can be used to remove all INFO tags and "FORMAT" to remove all FORMAT tags except GT.
                 To remove all INFO tags except "FOO" and "BAR", use "^INFO/FOO,INFO/BAR" (and similarly for FORMAT and FILTER).
                 "INFO" can be abbreviated to "INF" and "FORMAT" to "FMT".
                ]]></help>
                <validator type="regex" message="">^(\^?[A-Z]+(/\w+)?(,\^?[A-Z]+(/\w+)?)*)?$</validator>
            </param>
            <param argument="--rename-chrs" type="data" format="tabular" label="Rename CHROM" optional="true"
                   help="rename chromosomes according to the map in file, with old_name new_name pairs separated by whitespaces, each on a separate line." />
            <param argument="--rename-annots" type="data" format="tabular" optional="true" label="Rename annotations" help="TYPE/old\tnew, where TYPE is one of FILTER,INFO,FORMAT" />
        </section>
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_include" />
            <expand macro="macro_exclude" />
            <expand macro="macro_collapse" />
            <expand macro="macro_restrict" />
            <expand macro="macro_samples" />
        </section>
        <expand macro="macro_select_output_type" />
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output"/>
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="vcf" value="annotate.vcf" />
            <param name="anno_fmt" value="tab" />
            <param name="annotations" value="annotate.tab" />
            <param name="header_file" value="annotate.hdr" />
            <param name="columns" value="CHROM,POS,REF,ALT,ID,QUAL,INFO/T_INT,INFO/T_FLOAT,INDEL" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="snp_3000150" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate.vcf" />
            <section name="sec_annofile">
                <param name="columns" value="CHROM,FROM,TO,T_STR" />
                <conditional name="annofile">
                    <param name="anno_fmt" value="tab" />
                    <param name="annotations" value="annotate2.tab" />
                    <param name="header_file" value="annotate.hdr" />
                </conditional>
                <param name="set_id" value="%CHROM\_%POS\_%REF\_%FIRST_ALT" />
            </section>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="T_STR=region_3000150_3106154" />
                    <has_text text="1_3062915_GTTT_G" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate.vcf" />
            <param name="anno_fmt" value="vcf" />
            <param name="annotations" value="annots.vcf" />
            <param name="columns" value="STR,ID,QUAL,FILTER" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="idIndel" />
                    <has_text text="STR=testSNP" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate.vcf" />
            <param name="anno_fmt" value="vcf" />
            <param name="annotations" value="annots.bcf" />
            <param name="columns" value="STR,ID,QUAL,FILTER" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="idIndel" />
                    <has_text text="STR=testSNP" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate2.vcf" />
            <param name="anno_fmt" value="vcf" />
            <param name="annotations" value="annots2.vcf" />
            <param name="columns" value="ID,QUAL,FILTER,INFO,FMT" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="q99" />
                    <has_text text="FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate2.vcf" />
            <param name="anno_fmt" value="vcf" />
            <param name="annotations" value="annots2.vcf" />
            <param name="columns" value="ID,QUAL,FILTER,INFO,FMT" />
            <param name="samples" value="A" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="q11" />
                    <has_text text="FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate3.vcf" />
            <param name="anno_fmt" value="none" />
            <param name="remove" value="ID,QUAL,^FILTER/fltA,FILTER/fltB,^INFO/AA,INFO/BB,^FMT/GT,FMT/PL" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="fltY" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="annotate3.vcf" />
            <param name="anno_fmt" value="none" />
            <param name="remove" value="FORMAT" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="GT:X:PL:Y:AA" />
                </assert_contents>
            </output>
        </test>
        <!-- Test region overlap option -->
        <test>
            <param name="input_file" ftype="vcf" value="annotate3.vcf" />
            <param name="anno_fmt" value="none" />
            <param name="remove" value="FORMAT" />
            <param name="output_type" value="v" />
            <section name="sec_restrict">
                <param name="regions_overlap" value="1"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="GT:X:PL:Y:AA" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions-overlap" />
            </assert_command>
        </test>
        <!-- Test min overlap option -->
        <test>
            <param name="input_file" ftype="vcf" value="annotate3.vcf" />
            <param name="anno_fmt" value="none" />
            <param name="remove" value="FORMAT" />
            <param name="output_type" value="v" />
            <section name="sec_annofile">
                <param name="min_overlap" value="0.5"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="GT:X:PL:Y:AA" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--min-overlap" />
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
==================================
 bcftools @EXECUTABLE@
==================================

Annotate and edit VCF/BCF files.

Examples:

    # Remove three fields
    bcftools annotate -x ID,INFO/DP,FORMAT/DP file.vcf.gz

    # Remove all INFO fields and all FORMAT fields except for GT and PL
    bcftools annotate -x INFO,^FORMAT/GT,FORMAT/PL file.vcf

    # Add ID, QUAL and INFO/TAG, not replacing TAG if already present
    bcftools annotate -a src.bcf -c ID,QUAL,+TAG dst.bcf

    # Carry over all INFO and FORMAT annotations except FORMAT/GT
    bcftools annotate -a src.bcf -c INFO,^FORMAT/GT dst.bcf

    # Annotate from a tab-delimited file with six columns (the fifth is ignored),
    # first indexing with tabix. The coordinates are 1-based.
    tabix -s1 -b2 -e2 annots.tab.gz
    bcftools annotate -a annots.tab.gz -h annots.hdr -c CHROM,POS,REF,ALT,-,TAG file.vcf

    # Annotate from a tab-delimited file with regions (1-based coordinates, inclusive)
    tabix -s1 -b2 -e3 annots.tab.gz
    bcftools annotate -a annots.tab.gz -h annots.hdr -c CHROM,FROM,TO,TAG inut.vcf

    # Annotate from a bed file (0-based coordinates, half-closed, half-open intervals)
    bcftools annotate -a annots.bed.gz -h annots.hdr -c CHROM,FROM,TO,TAG input.vcf

@REGIONS_HELP@
@EXPRESSIONS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
]]>
    </help>
    <expand macro="citations" />
</tool>