changeset 0:7e8a201b4c1c

Imported from capsule None
author anton
date Wed, 11 Jun 2014 17:11:32 -0400
parents
children a83d472202c8
files test-data/vcfannotate.bed test-data/vcfbedintersect-test1.vcf test-data/vcfbedintersect-test2.vcf test-data/vcflib.vcf tool_dependencies.xml vcfbedintersect.xml
diffstat 6 files changed, 191 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcfannotate.bed	Wed Jun 11 17:11:32 2014 -0400
@@ -0,0 +1,46 @@
+20	123251	126392	uc002wcx.3	0	+	123269	126333	0	2	76,337,	0,2804,
+20	138110	139804	uc002wcy.2	0	+	138185	139665	0	2	124,390,	0,1304,
+20	168526	170264	uc002wcz.1	0	-	168526	170264	0	2	233,49,	0,1689,
+20	207898	210527	uc002wda.3	0	+	207929	210412	0	2	89,609,	0,2020,
+20	238376	241736	uc002wdb.3	0	+	238419	239947	0	2	101,2019,	0,1341,
+20	251503	271419	uc021vzl.1	0	-	251847	271244	0	11	405,119,87,102,158,100,159,119,118,49,192,	0,5105,5930,6181,6363,7467,8309,13100,18696,19396,19724,
+20	251503	271419	uc002wde.2	0	-	251847	271251	0	11	405,119,87,102,158,100,159,119,118,49,188,	0,5105,5930,6181,6363,7467,8309,13100,18696,19396,19728,
+20	251503	271419	uc010zpi.2	0	-	251847	270227	0	10	405,119,87,102,158,100,159,119,118,188,	0,5105,5930,6181,6363,7467,8309,13100,18696,19728,
+20	256608	271079	uc010zpj.1	0	-	257400	270981	0	7	912,102,158,100,159,119,180,	0,1076,1258,2362,3204,7995,14291,
+20	256608	271419	uc010zpk.2	0	-	257400	271232	0	7	912,102,158,100,159,119,188,	0,1076,1258,2362,3204,7995,14623,
+20	278203	280963	uc002wdf.3	0	+	278227	279442	0	1	2760,	0,
+20	306214	310872	uc002wdh.4	0	+	306568	307516	0	1	4658,	0,
+20	327369	335512	uc002wdi.4	0	+	330287	334279	0	4	417,115,195,1659,	0,2523,2912,6484,
+20	361307	378203	uc002wdm.3	0	+	368654	377334	0	4	506,291,293,1362,	0,7347,10623,15534,
+20	361940	378203	uc002wdn.3	0	+	363189	377334	0	5	204,194,291,293,1362,	0,1136,6714,9990,14901,
+20	388708	391408	uc002wdo.3	0	+	388708	388708	0	3	715,145,353,	0,1816,2347,
+20	388708	400504	uc010zpl.1	0	+	389401	400429	0	6	715,145,94,199,122,303,	0,1816,9461,9667,11282,11493,
+20	388708	409233	uc010zpm.1	0	+	388708	388708	0	9	715,145,94,199,122,174,112,180,99,	0,1816,9461,9667,11282,11493,14062,19248,20426,
+20	388708	411610	uc002wdp.4	0	+	389401	411074	0	12	715,145,94,199,122,174,161,112,180,99,144,617,	0,1816,9461,9667,11282,11493,12806,14062,19248,20426,20886,22285,
+20	388708	411610	uc002wdq.4	0	+	389382	411074	0	11	715,94,199,122,174,161,112,180,99,144,617,	0,9461,9667,11282,11493,12806,14062,19248,20426,20886,22285,
+20	388708	411610	uc010fzy.3	0	+	388708	388708	0	12	715,145,94,199,122,174,136,112,180,99,144,617,	0,1816,9461,9667,11282,11493,12806,14062,19248,20426,20886,22285,
+20	388708	411610	uc002wdr.4	0	+	398463	411074	0	10	715,94,199,122,174,112,180,99,144,617,	0,9461,9667,11282,11493,14062,19248,20426,20886,22285,
+20	416123	443187	uc002wds.3	0	-	419229	443049	0	8	3362,188,142,102,187,81,186,208,	0,3628,4768,6108,6377,9570,12409,26856,
+20	416123	443187	uc002wdt.3	0	-	416123	416123	0	10	491,159,292,188,142,102,187,81,186,208,	0,1960,3070,3628,4768,6108,6377,9570,12409,26856,
+20	416123	443187	uc002wdv.3	0	-	419229	422326	0	7	3362,188,142,456,81,186,208,	0,3628,4768,6108,9570,12409,26856,
+20	416125	417600	uc021vzm.1	0	-	416125	416125	0	1	1475,	0,
+20	463337	524482	uc002wdw.1	0	-	464604	489195	0	14	1383,87,149,101,102,111,84,60,51,102,112,210,117,167,	0,3682,4733,5984,7086,9560,13025,15027,16561,17139,22424,25757,45239,60978,
+20	463337	524482	uc002wdx.1	0	-	464604	489195	0	13	1383,87,149,101,102,111,84,60,51,102,112,210,167,	0,3682,4733,5984,7086,9560,13025,15027,16561,17139,22424,25757,60978,
+20	463337	524482	uc002wdy.1	0	-	464604	478382	0	12	1383,87,149,101,102,111,84,60,51,102,112,167,	0,3682,4733,5984,7086,9560,13025,15027,16561,17139,22424,60978,
+20	584636	590910	uc002wdz.3	0	-	585234	590881	0	2	673,554,	0,5720,
+20	627267	634014	uc002wea.4	0	-	629357	633829	0	2	2294,395,	0,6352,
+20	627267	656823	uc002web.4	0	-	629499	656245	0	3	2294,90,711,	0,7607,28845,
+20	642239	656823	uc002wec.3	0	-	644314	656245	0	2	2866,711,	0,13873,
+20	740723	749228	uc002wed.4	0	-	741669	746418	0	5	1159,124,506,618,288,	0,1621,3418,5128,8217,
+20	740724	749228	uc002wee.2	0	-	742293	746418	0	4	1744,506,618,288,	0,3417,5127,8216,
+20	814355	826922	uc002wef.1	0	+	825447	826335	0	2	239,1572,	0,10995,
+20	816710	826922	uc002weg.1	0	+	825447	826335	0	2	47,1572,	0,8640,
+20	825284	826922	uc002weh.1	0	+	825447	826335	0	1	1638,	0,
+20	853296	896960	uc002wei.3	0	-	853602	896857	0	9	467,131,167,102,116,248,122,156,412,	0,1630,5507,7093,8517,12424,15664,17559,43252,
+20	853296	896960	uc010zpn.2	0	-	853750	896839	0	8	467,167,102,116,248,122,156,412,	0,5507,7093,8517,12424,15664,17559,43252,
+20	939095	982907	uc002wej.3	0	-	940999	982807	0	5	2014,186,141,189,179,	0,5482,8721,9497,43633,
+20	939095	982907	uc002wek.3	0	-	940999	982807	0	4	2014,141,189,179,	0,8721,9497,43633,
+20	1093905	1147970	uc010zpo.2	0	+	1106275	1146898	0	7	147,153,83,186,54,159,1100,	0,12235,14163,21858,49868,51056,52965,
+20	1093905	1148426	uc002wel.4	0	+	1099416	1145724	0	8	147,150,153,83,186,54,159,2754,	0,5490,12235,14163,21858,49868,51056,51767,
+20	1099239	1147970	uc010zpp.2	0	+	1099416	1146898	0	6	306,153,83,54,159,1100,	0,6901,8829,44534,45722,47631,
+20	1099239	1148426	uc002wen.4	0	+	1099416	1145724	0	7	306,153,83,186,54,159,2754,	0,6901,8829,16524,44534,45722,46433,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcfbedintersect-test1.vcf	Wed Jun 11 17:11:32 2014 -0400
@@ -0,0 +1,23 @@
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcfbedintersect-test2.vcf	Wed Jun 11 17:11:32 2014 -0400
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcflib.vcf	Wed Jun 11 17:11:32 2014 -0400
@@ -0,0 +1,31 @@
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	NS=3;DP=9;AA=G;AN=6;AC=3,1	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
+20	1235237	.	T	.	.	.	.	GT	0/0	0|0	./.
+X	10	rsTest	AC	A,ATG	10	PASS	.	GT	0	0/1	0|2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Jun 11 17:11:32 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="vcflib" version="586c5ae5d57a38dae6b32ea831fb1f7cfa14c9bd">
+      <repository changeset_revision="7949cc09120a" name="package_vcflib" owner="anton" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcfbedintersect.xml	Wed Jun 11 17:11:32 2014 -0400
@@ -0,0 +1,61 @@
+<tool id="vcfbedintersect" name="VCF-BEDintersect:" version="0.0.1">
+<requirements>
+    <requirement type="package" version="586c5ae5d57a38dae6b32ea831fb1f7cfa14c9bd">vcflib</requirement>
+    <!-- <requirement type="package" version="0.1.18">samtools</requirement> -->
+</requirements>
+  <description>Intersect VCF and BED datasets</description>
+  <command>
+    #if str($bed_vs_interval.bed_vs_interval_selector) == "bed":
+        vcfintersect -b "${bed_vs_interval.bed_input}" ${invert} "${vcf_input}" > "${out_file1}"
+   #else:
+        vcfintersect -R "${bed_vs_interval.int_input}" ${invert} "${vcf_input}" > "${out_file1}"
+   #end if
+
+  </command>
+  <inputs>
+    <!-- selecting refernce source -->
+    <param name="vcf_input" type="data" format="vcf" label="Select VCF dataset" />
+     <conditional name="bed_vs_interval">
+       <param name="bed_vs_interval_selector" type="select" label="Intersect with BED dataset or an interval">
+	 <option value="bed">BED</option>
+	 <option value="interval">Interval</option>
+       </param>
+       <when value="bed">
+	 <param name="bed_input" type="data" format="bed" label="Select BED dataset" />
+       </when>
+       <when value="interval">
+	 <param name="int_input" type="text" size="20" value="chr20:1-30" label="Enter interval string" help="use chr:start-end format" />
+       </when>
+     </conditional>
+     <param name="invert" type="boolean" truevalue="-v" falsevalue="" label="Invert selection?" help="-v, --invert. Print entries that DO NOT intersect." />
+  </inputs>
+  <outputs>
+    <data format="vcf" name="out_file1" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="bed_vs_interval_selector" value="bed" />
+      <param name="bed_input" value="vcfannotate.bed" />
+      <param name="invert" value="False" />
+      <param name="vcf_input" value="vcflib.vcf"/>
+      <output name="out_file1" file="vcfbedintersect-test1.vcf"/>
+    </test>
+    <test>
+      <param name="bed_vs_interval_selector" value="interval" />
+      <param name="int_input" value="20:1-30000" />
+      <param name="invert" value="False" />
+      <param name="vcf_input" value="vcflib.vcf"/>
+      <output name="out_file1" file="vcfbedintersect-test2.vcf"/>
+    </test>
+    </tests>
+
+  <help>
+
+Computes intersection between a VCF dataset and a set of genomic intervals defined as either a BED dataset (http://genome.ucsc.edu/FAQ/FAQformat.html#format1) or a manually typed interval (in the form of chr:start-end).
+
+----                                                                                                                                                                              
+
+VCFBEDintersect is based on vcfintersect utility of VCFlib toolkit developed by Erik Garrison (https://github.com/ekg/vcflib).     
+
+</help>
+</tool>