changeset 0:87d916fbb7c4 draft

Uploaded
author devteam
date Thu, 19 Mar 2015 12:39:51 -0400
parents
children 829d46588cf5
files macros.xml test-data/vcfannotate-test1.vcf test-data/vcfannotate.bed test-data/vcflib.vcf tool_dependencies.xml vcfannotate.xml
diffstat 6 files changed, 194 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Mar 19 12:39:51 2015 -0400
@@ -0,0 +1,28 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="8a5602bf07">vcflib</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" />
+        </stdio>
+    </xml>
+   	<xml name="citations">
+   	     <citations>
+             <citation type="bibtex">
+@misc{Garrison2015,
+  author = {Garrison, Erik},
+  year = {2015},
+  title = {vcflib},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/ekg/vcflib},
+}
+             </citation>
+   	     </citations>
+   	</xml>
+    <token name="@IS_PART_OF_VCFLIB@">is a part of VCFlib toolkit developed by Erik Garrison (https://github.com/ekg/vcflib).</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcfannotate-test1.vcf	Thu Mar 19 12:39:51 2015 -0400
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##INFO=<ID=BED-features,Number=1,Type=String,Description="Annotation from /space/anton/galaxy-central/database/files/000/dataset_50.dat delimited by ':'">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	14370	rs6054257	G	A	29	PASS	AF=0.5;DP=14;NS=3;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	AF=0.017;DP=11;NS=3	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	AA=T;AF=0.333,0.667;BED-features=uc010zpo.2:uc002wel.4:uc010zpp.2:uc002wen.4;DP=10;NS=2;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	AA=T;DP=13;NS=3	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	AA=G;AC=3,1;AN=6;DP=9;NS=3	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
+20	1235237	.	T	.	0	.	.	GT	0/0	0|0	.
+X	10	rsTest	AC	A,ATG	10	PASS	.	GT	0	0/1	0|2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcfannotate.bed	Thu Mar 19 12:39:51 2015 -0400
@@ -0,0 +1,46 @@
+20	123251	126392	uc002wcx.3	0	+	123269	126333	0	2	76,337,	0,2804,
+20	138110	139804	uc002wcy.2	0	+	138185	139665	0	2	124,390,	0,1304,
+20	168526	170264	uc002wcz.1	0	-	168526	170264	0	2	233,49,	0,1689,
+20	207898	210527	uc002wda.3	0	+	207929	210412	0	2	89,609,	0,2020,
+20	238376	241736	uc002wdb.3	0	+	238419	239947	0	2	101,2019,	0,1341,
+20	251503	271419	uc021vzl.1	0	-	251847	271244	0	11	405,119,87,102,158,100,159,119,118,49,192,	0,5105,5930,6181,6363,7467,8309,13100,18696,19396,19724,
+20	251503	271419	uc002wde.2	0	-	251847	271251	0	11	405,119,87,102,158,100,159,119,118,49,188,	0,5105,5930,6181,6363,7467,8309,13100,18696,19396,19728,
+20	251503	271419	uc010zpi.2	0	-	251847	270227	0	10	405,119,87,102,158,100,159,119,118,188,	0,5105,5930,6181,6363,7467,8309,13100,18696,19728,
+20	256608	271079	uc010zpj.1	0	-	257400	270981	0	7	912,102,158,100,159,119,180,	0,1076,1258,2362,3204,7995,14291,
+20	256608	271419	uc010zpk.2	0	-	257400	271232	0	7	912,102,158,100,159,119,188,	0,1076,1258,2362,3204,7995,14623,
+20	278203	280963	uc002wdf.3	0	+	278227	279442	0	1	2760,	0,
+20	306214	310872	uc002wdh.4	0	+	306568	307516	0	1	4658,	0,
+20	327369	335512	uc002wdi.4	0	+	330287	334279	0	4	417,115,195,1659,	0,2523,2912,6484,
+20	361307	378203	uc002wdm.3	0	+	368654	377334	0	4	506,291,293,1362,	0,7347,10623,15534,
+20	361940	378203	uc002wdn.3	0	+	363189	377334	0	5	204,194,291,293,1362,	0,1136,6714,9990,14901,
+20	388708	391408	uc002wdo.3	0	+	388708	388708	0	3	715,145,353,	0,1816,2347,
+20	388708	400504	uc010zpl.1	0	+	389401	400429	0	6	715,145,94,199,122,303,	0,1816,9461,9667,11282,11493,
+20	388708	409233	uc010zpm.1	0	+	388708	388708	0	9	715,145,94,199,122,174,112,180,99,	0,1816,9461,9667,11282,11493,14062,19248,20426,
+20	388708	411610	uc002wdp.4	0	+	389401	411074	0	12	715,145,94,199,122,174,161,112,180,99,144,617,	0,1816,9461,9667,11282,11493,12806,14062,19248,20426,20886,22285,
+20	388708	411610	uc002wdq.4	0	+	389382	411074	0	11	715,94,199,122,174,161,112,180,99,144,617,	0,9461,9667,11282,11493,12806,14062,19248,20426,20886,22285,
+20	388708	411610	uc010fzy.3	0	+	388708	388708	0	12	715,145,94,199,122,174,136,112,180,99,144,617,	0,1816,9461,9667,11282,11493,12806,14062,19248,20426,20886,22285,
+20	388708	411610	uc002wdr.4	0	+	398463	411074	0	10	715,94,199,122,174,112,180,99,144,617,	0,9461,9667,11282,11493,14062,19248,20426,20886,22285,
+20	416123	443187	uc002wds.3	0	-	419229	443049	0	8	3362,188,142,102,187,81,186,208,	0,3628,4768,6108,6377,9570,12409,26856,
+20	416123	443187	uc002wdt.3	0	-	416123	416123	0	10	491,159,292,188,142,102,187,81,186,208,	0,1960,3070,3628,4768,6108,6377,9570,12409,26856,
+20	416123	443187	uc002wdv.3	0	-	419229	422326	0	7	3362,188,142,456,81,186,208,	0,3628,4768,6108,9570,12409,26856,
+20	416125	417600	uc021vzm.1	0	-	416125	416125	0	1	1475,	0,
+20	463337	524482	uc002wdw.1	0	-	464604	489195	0	14	1383,87,149,101,102,111,84,60,51,102,112,210,117,167,	0,3682,4733,5984,7086,9560,13025,15027,16561,17139,22424,25757,45239,60978,
+20	463337	524482	uc002wdx.1	0	-	464604	489195	0	13	1383,87,149,101,102,111,84,60,51,102,112,210,167,	0,3682,4733,5984,7086,9560,13025,15027,16561,17139,22424,25757,60978,
+20	463337	524482	uc002wdy.1	0	-	464604	478382	0	12	1383,87,149,101,102,111,84,60,51,102,112,167,	0,3682,4733,5984,7086,9560,13025,15027,16561,17139,22424,60978,
+20	584636	590910	uc002wdz.3	0	-	585234	590881	0	2	673,554,	0,5720,
+20	627267	634014	uc002wea.4	0	-	629357	633829	0	2	2294,395,	0,6352,
+20	627267	656823	uc002web.4	0	-	629499	656245	0	3	2294,90,711,	0,7607,28845,
+20	642239	656823	uc002wec.3	0	-	644314	656245	0	2	2866,711,	0,13873,
+20	740723	749228	uc002wed.4	0	-	741669	746418	0	5	1159,124,506,618,288,	0,1621,3418,5128,8217,
+20	740724	749228	uc002wee.2	0	-	742293	746418	0	4	1744,506,618,288,	0,3417,5127,8216,
+20	814355	826922	uc002wef.1	0	+	825447	826335	0	2	239,1572,	0,10995,
+20	816710	826922	uc002weg.1	0	+	825447	826335	0	2	47,1572,	0,8640,
+20	825284	826922	uc002weh.1	0	+	825447	826335	0	1	1638,	0,
+20	853296	896960	uc002wei.3	0	-	853602	896857	0	9	467,131,167,102,116,248,122,156,412,	0,1630,5507,7093,8517,12424,15664,17559,43252,
+20	853296	896960	uc010zpn.2	0	-	853750	896839	0	8	467,167,102,116,248,122,156,412,	0,5507,7093,8517,12424,15664,17559,43252,
+20	939095	982907	uc002wej.3	0	-	940999	982807	0	5	2014,186,141,189,179,	0,5482,8721,9497,43633,
+20	939095	982907	uc002wek.3	0	-	940999	982807	0	4	2014,141,189,179,	0,8721,9497,43633,
+20	1093905	1147970	uc010zpo.2	0	+	1106275	1146898	0	7	147,153,83,186,54,159,1100,	0,12235,14163,21858,49868,51056,52965,
+20	1093905	1148426	uc002wel.4	0	+	1099416	1145724	0	8	147,150,153,83,186,54,159,2754,	0,5490,12235,14163,21858,49868,51056,51767,
+20	1099239	1147970	uc010zpp.2	0	+	1099416	1146898	0	6	306,153,83,54,159,1100,	0,6901,8829,44534,45722,47631,
+20	1099239	1148426	uc002wen.4	0	+	1099416	1145724	0	7	306,153,83,186,54,159,2754,	0,6901,8829,16524,44534,45722,46433,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcflib.vcf	Thu Mar 19 12:39:51 2015 -0400
@@ -0,0 +1,31 @@
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	NS=3;DP=9;AA=G;AN=6;AC=3,1	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
+20	1235237	.	T	.	.	.	.	GT	0/0	0|0	./.
+X	10	rsTest	AC	A,ATG	10	PASS	.	GT	0	0/1	0|2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Mar 19 12:39:51 2015 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="vcflib" version="8a5602bf07">
+        <repository changeset_revision="7e67466b033e" name="package_vcflib_8a5602bf07" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcfannotate.xml	Thu Mar 19 12:39:51 2015 -0400
@@ -0,0 +1,51 @@
+<tool id="vcfannotate" name="VCFannotate:" version="0.0.3">
+  <description>Intersect VCF records with BED annotations</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"></expand>
+  <expand macro="stdio" />
+  <command>vcfannotate --key "${key_option}" -b "${bed_data}" "${input}" > "${out_file1}"</command>
+  <inputs>        
+    <param format="vcf" name="input" type="data" label="Select VCF dataset"/>
+    <param name="bed_data" type="data" format="bed" label="Select BED dataset to itersect with"/>
+    <param name="key_option" size="20" type="text" value="BED-features" label="Intersecting VCF records will be prefixed with this TAG within the INFO field" help="--key option"/>
+  </inputs>
+  <outputs>
+    <data format="vcf" name="out_file1" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="bed_data" value="vcfannotate.bed" ftype="bed"/>
+      <param name="key_option" value="BED-features"/>
+      <param name="input" value="vcflib.vcf"/>
+      <output name="out_file1" file="vcfannotate-test1.vcf" lines_diff="2" />
+    </test>
+    </tests>
+  <help>
+
+Intersect the records in the VCF file with intervals (features) provided in a BED file. Intersections are done on the reference sequences in the VCF file. 
+
+-----
+
+.. class:: infomark
+
+
+**Example**:
+
+The following VCF line::
+
+ #CHROM POS     ID        REF ALT QUAL FILTER INFO                              FORMAT      NA00001        NA00002      NA00003
+ 20     1110696 rs6040355 A   G,T 67   PASS   NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,.
+
+will appear as the follwing after intersectuion with BED records uc010zpo.2, uc002wel.4, uc010zpp.2, and uc002wen::
+
+ #CHROM POS     ID        REF ALT QUAL FILTER INFO                                                                                       FORMAT      NA00001        NA00002      NA00003                                                  
+ 20     1110696 rs6040355 A   G,T 67   PASS   AA=T;AF=0.333,0.667;BED-features=uc010zpo.2:uc002wel.4:uc010zpp.2:uc002wen.4;DP=10;NS=2;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,.
+
+----
+
+Vcfannotate @IS_PART_OF_VCFLIB@
+    </help>
+    <expand macro="citations" />
+</tool>