changeset 0:026d0f104183 draft

Uploaded
author devteam
date Sun, 24 Nov 2013 14:03:51 -0500
parents
children aa232e38338f
files test-data/1.vcf test-data/2.vcf test-data/out.vcf tool_dependencies.xml vcftools_isec.xml
diffstat 5 files changed, 127 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1.vcf	Sun Nov 24 14:03:51 2013 -0500
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)">
+##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test">
+##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)">
+##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)">
+##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools">
+##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15">
+##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant">
+##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant">
+##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called">
+##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)">
+##source=VarScan2
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample7
+chr1	14653	.	C	T	.	PASS	ADP=30;HET=7;HOM=0;NC=0;WT=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:18:18:13:5:27.78%:9.8E-1:37:38:7:6:5:0
+chr1	14907	.	A	G	.	PASS	ADP=18;HET=2;HOM=2;NC=1;WT=2	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:19:19:5:8:61.54%:9.8E-1:37:18:5:0:5:3
+chr1	14930	.	A	G	.	PASS	ADP=19;HET=2;HOM=2;NC=1;WT=2	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:24:22:5:11:68.75%:9.8E-1:35:24:4:1:5:6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.vcf	Sun Nov 24 14:03:51 2013 -0500
@@ -0,0 +1,25 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)">
+##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test">
+##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)">
+##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)">
+##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools">
+##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15">
+##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant">
+##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant">
+##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called">
+##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)">
+##source=VarScan2
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample7
+chr1	14907	.	A	G	.	PASS	ADP=18;HET=2;HOM=2;NC=1;WT=2	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:19:19:5:8:61.54%:9.8E-1:37:18:5:0:5:3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out.vcf	Sun Nov 24 14:03:51 2013 -0500
@@ -0,0 +1,6 @@
+##fileformat=VCFv4.1
+##source_20130524.1=vcf-isec(r797) 1.vcf.header.gz 2.vcf.header.gz
+##sourceFiles_20130524.1=0:1.vcf.header.gz,1:2.vcf.header.gz
+##INFO=<ID=SF,Number=.,Type=String,Description="Source File (index to sourceFiles, f when filtered)">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+chr1	14907	.	A	G	.	PASS	ADP=18;HET=2;HOM=2;NC=1;WT=2;SF=0,1	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:19:19:5:8:61.54%:9.8E-1:37:18:5:0:5:3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Sun Nov 24 14:03:51 2013 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="vcftools" version="0.1.11">
+        <repository changeset_revision="61f9ddecde82" name="package_vcftools_0_1_11" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcftools_isec.xml	Sun Nov 24 14:03:51 2013 -0500
@@ -0,0 +1,63 @@
+<tool id="vcftools_isec" name="Intersect" version="0.1">
+    <description>multiple VCF datasets</description>
+
+    <requirements>
+        <requirement type="package">tabix</requirement>
+        <requirement type="package" version="0.1.11">vcftools</requirement>
+    </requirements>
+    
+    <command>
+        ## Preprocessing for each dataset.
+        #set dataset_names = []
+        #for i, $input in enumerate( $inputs ):
+            ## Sort file.
+            vcf-sort ${input.file} > ${i}.vcf.sorted ;
+
+            ## Compress.
+            bgzip ${i}.vcf.sorted ;
+
+            ## Index.
+            tabix -p vcf ${i}.vcf.sorted.gz ;
+
+            #silent dataset_names.append( str($i) + '.vcf.sorted.gz' )
+        #end for
+
+        ## Intersect.
+        vcf-isec -f
+        #if $complement:
+        -c
+        #end if 
+        #echo ' '.join( dataset_names ) # > ${output}
+    </command>
+    <inputs>
+        <repeat name="inputs" title="Dataset" min="2">
+            <param name="file" label="Dataset" type="data" format="vcf"/>
+        </repeat>
+        <param name="complement" type="boolean" label="Complement intersection" help="If checked, output positions present in the first file but missing from the other files"/>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="vcf"/>
+    </outputs>
+
+    <stdio>
+        <regex match=".*" source="both" level="log" description="tool progress"/>
+    </stdio>
+
+    <tests>
+        <!-- Cannot specify multiple repeats in test framework right now.
+        <test>
+            <param name='inputs|1' value='1.vcf' />
+            <param name='inputs|2' value='2.vcf' />
+            <param name='complement' value='False' />
+            <output name='output' file='out.vcf' />
+        </test>
+        -->
+    </tests>
+
+    <help>
+        Please see the VCFtools `documentation`__ for help and further information.
+
+        .. __: http://vcftools.sourceforge.net/docs.html
+    </help>
+</tool>