Mercurial > repos > greg > bcftools_sort
changeset 0:1d4c51dcbbbe draft
Uploaded
author | greg |
---|---|
date | Wed, 19 Dec 2018 08:58:27 -0500 |
parents | |
children | bb0f975d69b4 |
files | .shed.yml bcftools_sort.xml macros.xml test-data/sorted.vcf test-data/unsorted.vcf |
diffstat | 5 files changed, 181 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Wed Dec 19 08:58:27 2018 -0500 @@ -0,0 +1,11 @@ +name: bcftools_sort +owner: greg +description: | + Contains a tool that sorts VCF/BCF files +homepage_url: https://samtools.github.io/bcftools/bcftools.html +long_description: | + Contains a tool that sorts VCF/BCF files +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/corals/bcftools_sort +type: unrestricted +categories: + - Sequence Analysis
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bcftools_sort.xml Wed Dec 19 08:58:27 2018 -0500 @@ -0,0 +1,47 @@ +<?xml version='1.0' encoding='utf-8'?> +<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@VERSION@"> + <description>sort VCF/BCF files</description> + <macros> + <token name="@EXECUTABLE@">sort</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +@PREPARE_ENV@ +#set temp_dir = 'tmp' +mkdir $temp_dir && +bcftools @EXECUTABLE@ +$input_file +--output-file '$output_file' +@OUTPUT_TYPE@ +--temp-dir '$temp_dir' +]]> + </command> + <inputs> + <expand macro="macro_input"/> + <expand macro="macro_select_output_type"/> + </inputs> + <outputs> + <expand macro="macro_vcf_output"/> + </outputs> + <tests> + <test> + <param name="input_file" value="unsorted.vcf" ftype="vcf"/> + <param name="output_type" value="v"/> + <output name="output_file" file="sorted.vcf" ftype="vcf"/> + </test> + </tests> + <help><![CDATA[ +===================================== + bcftools @EXECUTABLE@ +===================================== + +Sort VCF/BCF files. + +@BCFTOOLS_MANPAGE@#@EXECUTABLE@ + +@BCFTOOLS_WIKI@ +]]> + </help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Dec 19 08:58:27 2018 -0500 @@ -0,0 +1,92 @@ +<macros> + <token name="@VERSION@">1.4.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.9">bcftools</requirement> + <requirement type="package" version="1.9">htslib</requirement> + <yield /> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <yield /> + </citations> + </xml> + <token name="@THREADS@"> + --threads \${GALAXY_SLOTS:-4} + </token> + <token name="@PREPARE_ENV@"> +<![CDATA[ +export BCFTOOLS_PLUGINS=`which bcftools | sed 's,bin/bcftools,libexec/bcftools,'`; +]]> + </token> + <xml name="macro_input"> + <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf,bcf_bgzip" label="VCF/BCF Data" /> + </xml> + <token name="@PREPARE_INPUT_FILE@"> +<![CDATA[ +## May need to symlink input if there is an associated +#set $input_vcf = 'input.vcf.gz' +#if $input_file.is_of_type('vcf') + bgzip -c '$input_file' > $input_vcf && + bcftools index $input_vcf && +#elif $input_file.is_of_type('vcf_bgzip') + ln -s '$input_file' $input_vcf && +#elif $input_file.is_of_type('bcf') + #set $input_vcf = 'input.bcf' + ln -s '$input_file' $input_vcf && + #if $input_file.metadata.bcf_index: + ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && + #else + bcftools index $input_vcf && + #end if +#elif $input_file.is_of_type('bcf_bgzip') + ln -s '$input_file' $input_vcf && +#end if +]]> + </token> + <token name="@INPUT_FILE@"> +$input_vcf + </token> + <xml name="macro_select_output_type"> + <param name="output_type" type="select"> + <option value="b">compressed BCF</option> + <option value="u">uncompressed BCF</option> + <option value="z">compressed VCF</option> + <option value="v">uncompressed VCF</option> + </param> + </xml> + <token name="@OUTPUT_TYPE@"> +#if str($output_type) != "__none__": + --output-type '${output_type}' +#end if + </token> + + <xml name="macro_vcf_output"> + <data name="output_file" format="vcf"> + <change_format> + <when input="output_type" value="b" format="bcf" /> + <when input="output_type" value="u" format="bcf" /> + <when input="output_type" value="z" format="vcf_bgzip" /> + <when input="output_type" value="v" format="vcf" /> + </change_format> + </data> + </xml> + + <token name="@OUTPUT_HELP@"> + <![CDATA[ +Output Type +----------- + +Output compressed BCF (b), or uncompressed VCF (v). +Use the BCF option when piping between bcftools subcommands to speed up +performance by removing unecessary compression/decompression +and VCF<->BCF conversion. + +This Galaxy tool recommends using the compressed BCF format +as piping is not implemented, and uncompressed data would +use unnecessary amounts of space. + ]]></token> +</macros> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted.vcf Wed Dec 19 08:58:27 2018 -0500 @@ -0,0 +1,16 @@ +##fileformat=VCFv4.0 +##FILTER=<ID=PASS,Description="All filters passed"> +##INFO=<ID=XX,Number=1,Type=Integer,Description="Test"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FILTER=<ID=Fail,Description="Fail"> +##contig=<ID=1,length=62435964> +##contig=<ID=2,length=62435964> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A +1 110 . C T,G 1792 Fail XX=11;DP=32 GT:GQ:DP 0/1:245:32 +1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22 +1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22 +2 140 . A G 727 PASS DP=30 GT:GQ:DP 0/1:150:30 +2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unsorted.vcf Wed Dec 19 08:58:27 2018 -0500 @@ -0,0 +1,15 @@ +##fileformat=VCFv4.0 +##INFO=<ID=XX,Number=1,Type=Integer,Description="Test"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FILTER=<ID=Fail,Description="Fail"> +##contig=<ID=1,length=62435964> +##contig=<ID=2,length=62435964> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A +2 140 . A G 727 PASS DP=30 GT:GQ:DP 0/1:150:30 +2 160 . TAAAA TA,TC,T 246 PASS DP=10 GT:GQ:DP 0/2:12:10 +1 110 . C T,G 1792 Fail XX=11;DP=32 GT:GQ:DP 0/1:245:32 +1 130 . GAA GG 1016 PASS DP=22 GT:GQ:DP 0/1:212:22 +1 130 . G T 1016 PASS DP=22 GT:GQ:DP 0/1:212:22