changeset 0:bc178c43e4fe draft

Uploaded
author devteam
date Sun, 24 Nov 2013 17:08:23 -0500
parents
children db407a47527b
files test-data/test_in1.vcf test-data/test_out1.vcf tool_dependencies.xml vcftools_annotate.xml
diffstat 4 files changed, 124 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_in1.vcf	Sun Nov 24 17:08:23 2013 -0500
@@ -0,0 +1,28 @@
+##fileformat=VCFv4.1
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample1
+chr1	14470	.	G	A	.	PASS	ADP=25;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:28:25:23:2:8%:9.8E-1:33:39:22:1:2:0
+chr1	14513	.	G	A	.	PASS	ADP=32;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:36:32:24:8:25%:9.8E-1:31:22:16:8:7:1
+chr1	14626	.	G	C	.	PASS	ADP=44;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:45:44:40:4:9.09%:9.8E-1:34:36:14:26:0:4
+chr1	14677	.	G	A	.	PASS	ADP=70;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:82:70:63:7:10%:9.8E-1:33:17:44:19:7:0
+chr1	14813	.	T	G	.	PASS	ADP=105;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:121:105:103:2:1.9%:9.8E-1:32:22:42:61:1:1
+chr1	14815	.	C	T	.	PASS	ADP=117;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:122:117:109:8:6.84%:9.8E-1:33:29:50:59:4:4
+chr1	14907	.	A	G	.	PASS	ADP=104;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:108:104:19:16:45.71%:9.8E-1:37:18:10:9:8:8
+chr1	14930	.	A	G	.	PASS	ADP=103;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:110:103:17:17:50%:9.8E-1:37:24:8:9:8:9
+chr1	15015	.	G	C	.	PASS	ADP=52;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:55:52:44:8:15.38%:9.8E-1:34:35:5:39:1:7
+chr1	15211	.	T	G	.	PASS	ADP=21;WT=0;HET=0;HOM=1;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	1/1:0:23:21:0:2:100%:9.8E-1:0:26:0:0:1:1
+chr1	15527	.	G	A	.	PASS	ADP=53;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:57:53:29:5:14.71%:9.8E-1:32:23:19:10:3:2
+chr1	15688	.	C	T	.	PASS	ADP=34;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:36:34:11:4:26.67%:9.8E-1:35:26:6:5:1:3
+chr1	16257	.	G	C	.	PASS	ADP=23;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:26:23:20:2:9.09%:9.8E-1:35:17:7:13:0:2
+chr1	16288	.	C	G	.	PASS	ADP=15;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:16:15:11:3:21.43%:9.8E-1:34:36:3:8:1:2
+chr1	16298	.	C	T	.	PASS	ADP=12;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:12:12:9:2:18.18%:9.8E-1:34:30:2:7:1:1
+chr1	16378	.	T	C	.	PASS	ADP=24;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:24:24:11:12:52.17%:9.8E-1:37:34:7:4:8:4
+chr1	16495	.	G	C	.	PASS	ADP=21;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:22:21:14:6:30%:9.8E-1:31:25:5:9:2:4
+chr1	16534	.	C	T	.	PASS	ADP=11;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:11:11:6:4:40%:9.8E-1:32:37:1:5:0:4
+chr1	16571	.	G	A	.	PASS	ADP=8;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:8:8:5:2:28.57%:9.8E-1:31:34:2:3:0:2
+chr1	16737	.	G	T	.	PASS	ADP=27;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:30:27:22:5:18.52%:9.8E-1:37:34:21:1:5:0
+chr1	16742	.	G	C	.	PASS	ADP=30;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:30:30:23:7:23.33%:9.8E-1:32:37:21:2:7:0
+chr1	16900	.	T	C	.	PASS	ADP=140;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:150:140:135:5:3.57%:9.8E-1:35:25:119:16:5:0
+chr1	16963	.	G	A	.	PASS	ADP=338;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:354:338:297:41:12.13%:9.8E-1:32:34:218:79:26:15
+chr1	16977	.	G	A	.	PASS	ADP=368;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:384:368:270:98:26.63%:9.8E-1:34:25:186:84:63:35
+chr1	16996	.	T	C	.	PASS	ADP=356;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:371:356:269:87:24.44%:9.8E-1:35:34:147:122:58:29
+chr1	17020	.	G	A	.	PASS	ADP=355;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:380:355:347:8:2.25%:9.8E-1:33:29:164:183:4:4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out1.vcf	Sun Nov 24 17:08:23 2013 -0500
@@ -0,0 +1,30 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=MinAF,Description="MinAF [7]">
+##source_20130813.1=vcf-annotate(r797) -f f.txt <input_file>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample1
+chr1	14470	.	G	A	.	PASS	ADP=25;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:28:25:23:2:8%:9.8E-1:33:39:22:1:2:0
+chr1	14513	.	G	A	.	PASS	ADP=32;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:36:32:24:8:25%:9.8E-1:31:22:16:8:7:1
+chr1	14626	.	G	C	.	PASS	ADP=44;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:45:44:40:4:9.09%:9.8E-1:34:36:14:26:0:4
+chr1	14677	.	G	A	.	PASS	ADP=70;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:82:70:63:7:10%:9.8E-1:33:17:44:19:7:0
+chr1	14813	.	T	G	.	MinAF	ADP=105;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:121:105:103:2:1.9%:9.8E-1:32:22:42:61:1:1
+chr1	14815	.	C	T	.	MinAF	ADP=117;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:122:117:109:8:6.84%:9.8E-1:33:29:50:59:4:4
+chr1	14907	.	A	G	.	PASS	ADP=104;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:108:104:19:16:45.71%:9.8E-1:37:18:10:9:8:8
+chr1	14930	.	A	G	.	PASS	ADP=103;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:110:103:17:17:50%:9.8E-1:37:24:8:9:8:9
+chr1	15015	.	G	C	.	PASS	ADP=52;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:55:52:44:8:15.38%:9.8E-1:34:35:5:39:1:7
+chr1	15211	.	T	G	.	PASS	ADP=21;WT=0;HET=0;HOM=1;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	1/1:0:23:21:0:2:100%:9.8E-1:0:26:0:0:1:1
+chr1	15527	.	G	A	.	PASS	ADP=53;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:57:53:29:5:14.71%:9.8E-1:32:23:19:10:3:2
+chr1	15688	.	C	T	.	PASS	ADP=34;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:36:34:11:4:26.67%:9.8E-1:35:26:6:5:1:3
+chr1	16257	.	G	C	.	PASS	ADP=23;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:26:23:20:2:9.09%:9.8E-1:35:17:7:13:0:2
+chr1	16288	.	C	G	.	PASS	ADP=15;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:16:15:11:3:21.43%:9.8E-1:34:36:3:8:1:2
+chr1	16298	.	C	T	.	PASS	ADP=12;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:12:12:9:2:18.18%:9.8E-1:34:30:2:7:1:1
+chr1	16378	.	T	C	.	PASS	ADP=24;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:24:24:11:12:52.17%:9.8E-1:37:34:7:4:8:4
+chr1	16495	.	G	C	.	PASS	ADP=21;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:22:21:14:6:30%:9.8E-1:31:25:5:9:2:4
+chr1	16534	.	C	T	.	PASS	ADP=11;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:11:11:6:4:40%:9.8E-1:32:37:1:5:0:4
+chr1	16571	.	G	A	.	PASS	ADP=8;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:8:8:5:2:28.57%:9.8E-1:31:34:2:3:0:2
+chr1	16737	.	G	T	.	PASS	ADP=27;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:30:27:22:5:18.52%:9.8E-1:37:34:21:1:5:0
+chr1	16742	.	G	C	.	PASS	ADP=30;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:30:30:23:7:23.33%:9.8E-1:32:37:21:2:7:0
+chr1	16900	.	T	C	.	MinAF	ADP=140;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:150:140:135:5:3.57%:9.8E-1:35:25:119:16:5:0
+chr1	16963	.	G	A	.	PASS	ADP=338;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:354:338:297:41:12.13%:9.8E-1:32:34:218:79:26:15
+chr1	16977	.	G	A	.	PASS	ADP=368;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:384:368:270:98:26.63%:9.8E-1:34:25:186:84:63:35
+chr1	16996	.	T	C	.	PASS	ADP=356;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:371:356:269:87:24.44%:9.8E-1:35:34:147:122:58:29
+chr1	17020	.	G	A	.	MinAF	ADP=355;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:380:355:347:8:2.25%:9.8E-1:33:29:164:183:4:4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Sun Nov 24 17:08:23 2013 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="vcftools" version="0.1.11">
+        <repository changeset_revision="61f9ddecde82" name="package_vcftools_0_1_11" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcftools_annotate.xml	Sun Nov 24 17:08:23 2013 -0500
@@ -0,0 +1,60 @@
+<tool id="vcftools_annotate" name="Annotate" version="0.1">
+    <description>a VCF dataset with custom filters</description>
+
+    <requirements>
+        <requirement type="binary">echo</requirement>
+        <requirement type="package" version="0.1.11">vcftools</requirement>
+    </requirements>
+    
+    <command>
+        ## Generate filter file.
+        echo "{ tag  => '${tag}', name => '${filter}', desc => '${description}', test => sub { my @t = split('%', @\\$MATCH[0]); return @t[0] $condition ? \\$PASS : \\$FAIL }, }," > f.txt ;
+
+        ## Annotate.
+        vcf-annotate -f f.txt ${input} > ${output}
+    </command>
+    <inputs>
+        <param name="input" label="Input" type="data" format="vcf"/>
+        <param name="tag" label="Tag name" type="text"/>
+        <param name="description" label="Tag description" type="text">
+            <sanitizer sanitize="False"/>
+        </param>
+        <param name="filter" label="Filter name" type="text"/>
+        <param name="condition" label="Filter condition" type="text">
+            <sanitizer sanitize="False"/>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="vcf"/>
+    </outputs>
+
+    <stdio>
+        <regex match=".*" source="both" level="log" description="tool progress"/>
+    </stdio>
+
+    <tests>
+        <test>
+            <param name="input" value="test_in1.vcf" />
+            <param name="tag" value="FORMAT/FREQ" />
+            <param name="description" value="MinAF [7]"/>
+            <param name="filter" value="MinAF" />
+            <param name="condition" value=">= 7" />
+            <!-- 2 lines diff because command line with full file path is included in output VCF, and
+                 it not possible to match full file path.  -->
+            <output name="output" file="test_out1.vcf" lines_diff="2" />
+        </test>
+    </tests>
+
+    <help>
+Annotates VCF dataset with custom annotations. For example, if this format tag is used for allele frequency:
+
+##FORMAT=&lt;ID=FREQ,Number=1,Type=String,Description=&quot;Variant allele frequency&quot;&gt;
+
+you can add a filter for allele frequency using &quot;FORMAT/FREQ&quot; as the tag name and the condition &quot;>= [desired allele freq]&quot;
+
+Please see the VCFtools `documentation`__ for help and further information.
+
+.. __: http://vcftools.sourceforge.net/perl_module.html#vcf-annotate
+    </help>
+</tool>