Mercurial > repos > iuc > bcftools_annotate
view bcftools_annotate.xml @ 23:2c372041ed8c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 784611c9caf2680d41414ca2880b93a69d719701
author | iuc |
---|---|
date | Sun, 18 Aug 2024 09:59:56 +0000 |
parents | 8a2e296c07ef |
children |
line wrap: on
line source
<?xml version='1.0' encoding='utf-8'?> <tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Annotate and edit VCF/BCF files</description> <macros> <token name="@EXECUTABLE@">annotate</token> <import>macros.xml</import> </macros> <expand macro="bio_tools" /> <expand macro="requirements" /> <expand macro="version_command" /> <command detect_errors="aggressive"><![CDATA[ @PREPARE_ENV@ @PREPARE_INPUT_FILE@ #set $annotation_file = None #set $annotation_hdr = None #set $section = $sec_annofile #if $section.annofile.anno_fmt == 'vcf': #if $section.annofile.annotations.is_of_type('vcf') #set $annotation_file = 'annotations.vcf.gz' bgzip -c '$section.annofile.annotations' > $annotation_file && bcftools index $annotation_file && #elif $section.annofile.annotations.is_of_type('bcf') #set $annotation_file = 'annotations.bcf' ln -s '$section.annofile.annotations' $annotation_file && #if $section.annofile.annotations.metadata.bcf_index: ln -s '${section.annofile.annotations.metadata.bcf_index}' ${annotation_file}.csi && #else bcftools index $annotation_file && #end if #end if #elif $section.annofile.anno_fmt == 'tab': #if $section.annofile.annotations.is_of_type('bed') #set $annotation_file = 'annotations.bed.gz' bgzip -c '$section.annofile.annotations' > $annotation_file && tabix -s 1 -b 2 -e 3 $annotation_file && #else: #set $annotation_file = 'annotations.tab.gz' bgzip -c '$section.annofile.annotations' > $annotation_file && tabix -s 1 -b 2 -e 2 $annotation_file && #end if #if $section.annofile.header_file: #set $annotation_hdr = $section.annofile.header_file #elif $section.annofile.header_lines: #set $annotation_hdr = 'annotation.hdr' grep '^\#\#INFO' '${hdr_file}' > $annotation_hdr && #end if #end if #set $section = $sec_restrict bcftools @EXECUTABLE@ @PREPARE_REGIONS_FILE@ #set $section = $sec_annofile @COLUMNS@ #if $annotation_file: --annotations '${annotation_file}' #end if #if $annotation_hdr: --header-lines '${annotation_hdr}' #end if #if $section.set_id: --set-id '${section.set_id}' #end if #if $section.mark_sites: --mark-sites '${section.mark_sites}' #end if #if $section.min_overlap --min-overlap '$section.min_overlap' #end if #set $section = $sec_annotate #if $section.rename_chrs: --rename-chrs '${section.rename_chrs}' #end if #if $section.remove: --remove '${section.remove}' #end if #if $section.rename_annots --rename-annots '$section.rename_annots' #end if ## Default section #set $section = $sec_restrict @INCLUDE@ @EXCLUDE@ @COLLAPSE@ @REGIONS@ @SAMPLES@ @OUTPUT_TYPE@ @THREADS@ ## Primary Input/Outputs @INPUT_FILE@ > '$output_file' ]]> </command> <configfiles> <configfile name="hdr_file"><![CDATA[#slurp #if $sec_annofile.annofile.anno_fmt == 'tab' and str($sec_annofile.annofile.header_lines) != '': $sec_annofile.annofile.header_lines.__str__.strip()#slurp #end if]]></configfile> </configfiles> <inputs> <expand macro="macro_input" /> <section name="sec_annofile" expanded="false" title="Add Annotations"> <expand macro="macro_columns" /> <conditional name="annofile"> <param name="anno_fmt" type="select" label="Annotations File"> <option value="none">None</option> <option value="vcf">From a VCF/BCF file</option> <option value="tab">From a BED or tab-delimited file</option> </param> <when value="none"/> <when value="vcf"> <param name="annotations" type="data" format="vcf,bcf" label="Annotations VCF"/> </when> <when value="tab"> <param name="annotations" type="data" format="tabular,bed" label="Annotations"> <help><![CDATA[ BED, or a tab-delimited file with mandatory columns CHROM, POS (or, alternatively, FROM and TO), optional columns REF and ALT, and arbitrary number of annotation columns. Note that in case of tab-delimited file, the coordinates POS, FROM and TO are one-based and inclusive. ]]></help> </param> <param name="header_file" type="data" format="txt" label="Header Lines File" optional="true" help="lines which should be appended to the VCF header" /> <param name="header_lines" type="text" area="true" label="Header Lines" optional="true" help="lines which should be appended to the VCF header" > <help><![CDATA[ ##INFO=<ID=NUMERIC_TAG,Number=1,Type=Integer,Description="Example header line"> ##INFO=<ID=STRING_TAG,Number=1,Type=String,Description="Yet another header line"> ]]></help> <sanitizer sanitize="False"/> </param> </when> </conditional> <param argument="--mark-sites" type="text" value="" label="Mark Sites TAG" help="add INFO/TAG flag to sites which are ("+") or are not ("-") listed in the annotations file" /> <param argument="--min-overlap" type="text" value="" label="Minimum required overlap of intersecting regions" optional="true" help="Minimum overlap required as a fraction of the variant in the annotation -a file (ANN), in the target VCF file (:VCF), or both for reciprocal overlap (ANN:VCF). By default overlaps of arbitrary length are sufficient."> <sanitizer invalid_char=""> <valid initial="string.digits"> <add value=":" /> <add value="." /> </valid> </sanitizer> <validator type="regex">[0-9.:]+</validator> </param> <param argument="--set-id" type="text" value="" optional="true" label="Set Id"> <help>Assign ID on the fly using the given format. By default all existing IDs are replaced. If the format string is preceded by "+", only missing IDs will be set. For example: '%CHROM\_%POS\_%REF\_%FIRST_ALT' </help> <sanitizer sanitize="False"/> <validator type="regex" message="">^([+]?(%[A-Z_]+)(\\_%[A-Z_]+)*)?$</validator> </param> </section> <section name="sec_annotate" expanded="false" title="Change Annotations"> <param argument="--remove" type="text" value="" label="Remove annotations" optional="true"> <help><![CDATA[ List of annotations to remove. Use "FILTER" to remove all filters or "FILTER/SomeFilter" to remove a specific filter. Similarly, "INFO" can be used to remove all INFO tags and "FORMAT" to remove all FORMAT tags except GT. To remove all INFO tags except "FOO" and "BAR", use "^INFO/FOO,INFO/BAR" (and similarly for FORMAT and FILTER). "INFO" can be abbreviated to "INF" and "FORMAT" to "FMT". ]]></help> <validator type="regex" message="">^(\^?[A-Z]+(/\w+)?(,\^?[A-Z]+(/\w+)?)*)?$</validator> </param> <param argument="--rename-chrs" type="data" format="tabular" label="Rename CHROM" optional="true" help="rename chromosomes according to the map in file, with old_name new_name pairs separated by whitespaces, each on a separate line." /> <param argument="--rename-annots" type="data" format="tabular" optional="true" label="Rename annotations" help="TYPE/old\tnew, where TYPE is one of FILTER,INFO,FORMAT" /> </section> <section name="sec_restrict" expanded="false" title="Restrict to"> <expand macro="macro_include" /> <expand macro="macro_exclude" /> <expand macro="macro_collapse" /> <expand macro="macro_restrict" /> <expand macro="macro_samples" /> </section> <expand macro="macro_select_output_type" /> </inputs> <outputs> <expand macro="macro_vcf_output"/> </outputs> <tests> <test> <param name="input_file" ftype="vcf" value="annotate.vcf" /> <param name="anno_fmt" value="tab" /> <param name="annotations" value="annotate.tab" /> <param name="header_file" value="annotate.hdr" /> <param name="columns" value="CHROM,POS,REF,ALT,ID,QUAL,INFO/T_INT,INFO/T_FLOAT,INDEL" /> <param name="output_type" value="v" /> <output name="output_file"> <assert_contents> <has_text text="snp_3000150" /> </assert_contents> </output> </test> <test> <param name="input_file" ftype="vcf" value="annotate.vcf" /> <section name="sec_annofile"> <param name="columns" value="CHROM,FROM,TO,T_STR" /> <conditional name="annofile"> <param name="anno_fmt" value="tab" /> <param name="annotations" value="annotate2.tab" /> <param name="header_file" value="annotate.hdr" /> </conditional> <param name="set_id" value="%CHROM\_%POS\_%REF\_%FIRST_ALT" /> </section> <param name="output_type" value="v" /> <output name="output_file"> <assert_contents> <has_text text="T_STR=region_3000150_3106154" /> <has_text text="1_3062915_GTTT_G" /> </assert_contents> </output> </test> <test> <param name="input_file" ftype="vcf" value="annotate.vcf" /> <param name="anno_fmt" value="vcf" /> <param name="annotations" value="annots.vcf" /> <param name="columns" value="STR,ID,QUAL,FILTER" /> <param name="output_type" value="v" /> <output name="output_file"> <assert_contents> <has_text text="idIndel" /> <has_text text="STR=testSNP" /> </assert_contents> </output> </test> <test> <param name="input_file" ftype="vcf" value="annotate.vcf" /> <param name="anno_fmt" value="vcf" /> <param name="annotations" value="annots.bcf" /> <param name="columns" value="STR,ID,QUAL,FILTER" /> <param name="output_type" value="v" /> <output name="output_file"> <assert_contents> <has_text text="idIndel" /> <has_text text="STR=testSNP" /> </assert_contents> </output> </test> <test> <param name="input_file" ftype="vcf" value="annotate2.vcf" /> <param name="anno_fmt" value="vcf" /> <param name="annotations" value="annots2.vcf" /> <param name="columns" value="ID,QUAL,FILTER,INFO,FMT" /> <param name="output_type" value="v" /> <output name="output_file"> <assert_contents> <has_text text="q99" /> <has_text text="FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999" /> </assert_contents> </output> </test> <test> <param name="input_file" ftype="vcf" value="annotate2.vcf" /> <param name="anno_fmt" value="vcf" /> <param name="annotations" value="annots2.vcf" /> <param name="columns" value="ID,QUAL,FILTER,INFO,FMT" /> <param name="samples" value="A" /> <param name="output_type" value="v" /> <output name="output_file"> <assert_contents> <has_text text="q11" /> <has_text text="FLAG;IINT=88,99;IFLT=8.8,9.9;ISTR=888,999" /> </assert_contents> </output> </test> <test> <param name="input_file" ftype="vcf" value="annotate3.vcf" /> <param name="anno_fmt" value="none" /> <param name="remove" value="ID,QUAL,^FILTER/fltA,FILTER/fltB,^INFO/AA,INFO/BB,^FMT/GT,FMT/PL" /> <param name="output_type" value="v" /> <output name="output_file"> <assert_contents> <not_has_text text="fltY" /> </assert_contents> </output> </test> <test> <param name="input_file" ftype="vcf" value="annotate3.vcf" /> <param name="anno_fmt" value="none" /> <param name="remove" value="FORMAT" /> <param name="output_type" value="v" /> <output name="output_file"> <assert_contents> <not_has_text text="GT:X:PL:Y:AA" /> </assert_contents> </output> </test> <!-- Test region overlap option --> <test> <param name="input_file" ftype="vcf" value="annotate3.vcf" /> <param name="anno_fmt" value="none" /> <param name="remove" value="FORMAT" /> <param name="output_type" value="v" /> <section name="sec_restrict"> <param name="regions_overlap" value="1"/> </section> <output name="output_file"> <assert_contents> <not_has_text text="GT:X:PL:Y:AA" /> </assert_contents> </output> <assert_command> <has_text text="--regions-overlap" /> </assert_command> </test> <!-- Test min overlap option --> <test> <param name="input_file" ftype="vcf" value="annotate3.vcf" /> <param name="anno_fmt" value="none" /> <param name="remove" value="FORMAT" /> <param name="output_type" value="v" /> <section name="sec_annofile"> <param name="min_overlap" value="0.5"/> </section> <output name="output_file"> <assert_contents> <not_has_text text="GT:X:PL:Y:AA" /> </assert_contents> </output> <assert_command> <has_text text="--min-overlap" /> </assert_command> </test> </tests> <help><![CDATA[ ================================== bcftools @EXECUTABLE@ ================================== Annotate and edit VCF/BCF files. Examples: # Remove three fields bcftools annotate -x ID,INFO/DP,FORMAT/DP file.vcf.gz # Remove all INFO fields and all FORMAT fields except for GT and PL bcftools annotate -x INFO,^FORMAT/GT,FORMAT/PL file.vcf # Add ID, QUAL and INFO/TAG, not replacing TAG if already present bcftools annotate -a src.bcf -c ID,QUAL,+TAG dst.bcf # Carry over all INFO and FORMAT annotations except FORMAT/GT bcftools annotate -a src.bcf -c INFO,^FORMAT/GT dst.bcf # Annotate from a tab-delimited file with six columns (the fifth is ignored), # first indexing with tabix. The coordinates are 1-based. tabix -s1 -b2 -e2 annots.tab.gz bcftools annotate -a annots.tab.gz -h annots.hdr -c CHROM,POS,REF,ALT,-,TAG file.vcf # Annotate from a tab-delimited file with regions (1-based coordinates, inclusive) tabix -s1 -b2 -e3 annots.tab.gz bcftools annotate -a annots.tab.gz -h annots.hdr -c CHROM,FROM,TO,TAG inut.vcf # Annotate from a bed file (0-based coordinates, half-closed, half-open intervals) bcftools annotate -a annots.bed.gz -h annots.hdr -c CHROM,FROM,TO,TAG input.vcf @REGIONS_HELP@ @EXPRESSIONS_HELP@ @BCFTOOLS_MANPAGE@#@EXECUTABLE@ @BCFTOOLS_WIKI@ ]]> </help> <expand macro="citations" /> </tool>