changeset 0:1d4c51dcbbbe draft

Uploaded
author greg
date Wed, 19 Dec 2018 08:58:27 -0500
parents
children bb0f975d69b4
files .shed.yml bcftools_sort.xml macros.xml test-data/sorted.vcf test-data/unsorted.vcf
diffstat 5 files changed, 181 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Wed Dec 19 08:58:27 2018 -0500
@@ -0,0 +1,11 @@
+name: bcftools_sort
+owner: greg
+description: |
+  Contains a tool that sorts VCF/BCF files
+homepage_url: https://samtools.github.io/bcftools/bcftools.html
+long_description: |
+  Contains a tool that sorts VCF/BCF files
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/corals/bcftools_sort
+type: unrestricted
+categories:
+  - Sequence Analysis
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bcftools_sort.xml	Wed Dec 19 08:58:27 2018 -0500
@@ -0,0 +1,47 @@
+<?xml version='1.0' encoding='utf-8'?>
+<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@VERSION@">
+    <description>sort VCF/BCF files</description>
+    <macros>
+        <token name="@EXECUTABLE@">sort</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+@PREPARE_ENV@
+#set temp_dir = 'tmp'
+mkdir $temp_dir &&
+bcftools @EXECUTABLE@
+$input_file
+--output-file '$output_file'
+@OUTPUT_TYPE@
+--temp-dir '$temp_dir'
+]]>
+    </command>
+    <inputs>
+        <expand macro="macro_input"/>
+        <expand macro="macro_select_output_type"/>
+    </inputs>
+    <outputs>
+        <expand macro="macro_vcf_output"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="unsorted.vcf" ftype="vcf"/>
+            <param name="output_type" value="v"/>
+            <output name="output_file" file="sorted.vcf" ftype="vcf"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+=====================================
+ bcftools @EXECUTABLE@
+=====================================
+
+Sort VCF/BCF files.
+
+@BCFTOOLS_MANPAGE@#@EXECUTABLE@
+
+@BCFTOOLS_WIKI@
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Dec 19 08:58:27 2018 -0500
@@ -0,0 +1,92 @@
+<macros>
+  <token name="@VERSION@">1.4.0</token>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="1.9">bcftools</requirement>
+      <requirement type="package" version="1.9">htslib</requirement>
+      <yield />
+    </requirements>
+  </xml>
+  <xml name="citations">
+    <citations>
+      <citation type="doi">10.1093/bioinformatics/btp352</citation>
+      <yield />
+    </citations>
+  </xml>
+  <token name="@THREADS@">
+  --threads \${GALAXY_SLOTS:-4}
+  </token>
+  <token name="@PREPARE_ENV@">
+<![CDATA[
+export BCFTOOLS_PLUGINS=`which bcftools | sed 's,bin/bcftools,libexec/bcftools,'`;
+]]>
+  </token>
+  <xml name="macro_input">
+    <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf,bcf_bgzip" label="VCF/BCF Data" />
+  </xml>
+  <token name="@PREPARE_INPUT_FILE@">
+<![CDATA[
+## May need to symlink input if there is an associated
+#set $input_vcf = 'input.vcf.gz'
+#if $input_file.is_of_type('vcf')
+  bgzip -c '$input_file' > $input_vcf &&
+  bcftools index $input_vcf &&
+#elif $input_file.is_of_type('vcf_bgzip')
+  ln -s '$input_file' $input_vcf &&
+#elif $input_file.is_of_type('bcf')
+  #set $input_vcf = 'input.bcf'
+  ln -s '$input_file' $input_vcf &&
+  #if $input_file.metadata.bcf_index:
+    ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
+  #else
+    bcftools index $input_vcf &&
+  #end if
+#elif $input_file.is_of_type('bcf_bgzip')
+  ln -s '$input_file' $input_vcf &&
+#end if
+]]>
+  </token>
+  <token name="@INPUT_FILE@">
+$input_vcf
+  </token>
+  <xml name="macro_select_output_type">
+    <param name="output_type" type="select">
+      <option value="b">compressed BCF</option>
+      <option value="u">uncompressed BCF</option>
+      <option value="z">compressed VCF</option>
+      <option value="v">uncompressed VCF</option>
+    </param>
+  </xml>
+  <token name="@OUTPUT_TYPE@">
+#if str($output_type) != "__none__":
+  --output-type '${output_type}'
+#end if
+  </token>
+
+  <xml name="macro_vcf_output">
+      <data name="output_file" format="vcf">
+        <change_format>
+          <when input="output_type" value="b" format="bcf" />
+          <when input="output_type" value="u" format="bcf" />
+          <when input="output_type" value="z" format="vcf_bgzip" />
+          <when input="output_type" value="v" format="vcf" />
+        </change_format>
+      </data>
+  </xml>
+
+  <token name="@OUTPUT_HELP@">
+      <![CDATA[
+Output Type
+-----------
+
+Output compressed BCF (b), or uncompressed VCF (v).
+Use the BCF option when piping between bcftools subcommands to speed up
+performance by removing unecessary compression/decompression
+and VCF<->BCF conversion.
+
+This Galaxy tool recommends using the compressed BCF format
+as piping is not implemented, and uncompressed data would
+use unnecessary amounts of space.
+  ]]></token>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sorted.vcf	Wed Dec 19 08:58:27 2018 -0500
@@ -0,0 +1,16 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=PASS,Description="All filters passed">
+##INFO=<ID=XX,Number=1,Type=Integer,Description="Test">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FILTER=<ID=Fail,Description="Fail">
+##contig=<ID=1,length=62435964>
+##contig=<ID=2,length=62435964>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	A
+1	110	.	C	T,G	1792	Fail	XX=11;DP=32	GT:GQ:DP	0/1:245:32
+1	130	.	G	T	1016	PASS	DP=22	GT:GQ:DP	0/1:212:22
+1	130	.	GAA	GG	1016	PASS	DP=22	GT:GQ:DP	0/1:212:22
+2	140	.	A	G	727	PASS	DP=30	GT:GQ:DP	0/1:150:30
+2	160	.	TAAAA	TA,TC,T	246	PASS	DP=10	GT:GQ:DP	0/2:12:10
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unsorted.vcf	Wed Dec 19 08:58:27 2018 -0500
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.0
+##INFO=<ID=XX,Number=1,Type=Integer,Description="Test">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FILTER=<ID=Fail,Description="Fail">
+##contig=<ID=1,length=62435964>
+##contig=<ID=2,length=62435964>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	A
+2	140	.	A	G	727	PASS	DP=30	GT:GQ:DP	0/1:150:30
+2	160	.	TAAAA	TA,TC,T	246	PASS	DP=10	GT:GQ:DP	0/2:12:10
+1	110	.	C	T,G	1792	Fail	XX=11;DP=32	GT:GQ:DP	0/1:245:32
+1	130	.	GAA	GG	1016	PASS	DP=22	GT:GQ:DP	0/1:212:22
+1	130	.	G	T	1016	PASS	DP=22	GT:GQ:DP	0/1:212:22