comparison commons/core/parsing/test/Test_F_VarscanToVCF.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 import unittest
2 import os
3 from commons.core.parsing.VarscanToVCF import VarscanToVCF
4 from commons.core.utils.FileUtils import FileUtils
5
6 class Test_F_VarscanToVCF(unittest.TestCase):
7
8 def setUp(self):
9 self.emptyVarscanFileName = "emptyfile.varscan"
10 self.varscanFileName = "%s/commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan" % os.environ["REPET_PATH"]
11 self.expVCFFileName = "expVCF.vcf"
12 self.obsVCFFileName = "obsVCF.vcf"
13
14 def tearDown(self):
15 if os.path.exists(self.emptyVarscanFileName):
16 os.remove(self.emptyVarscanFileName)
17 os.remove(self.expVCFFileName)
18 os.remove(self.obsVCFFileName)
19
20 def test_VarscanToVCF(self):
21 self._writeExpOutputFile()
22 iVarscanFile = VarscanToVCF(self.varscanFileName, self.obsVCFFileName, doClean = True)
23 iVarscanFile.run()
24 self.assertTrue(FileUtils.are2FilesIdentical(self.expVCFFileName, self.obsVCFFileName))
25
26 def test_VarscanToVCF_empty(self):
27 self._writeInputFile_empty()
28 self._writeExpOutputFile_empty()
29 iVarscanFile = VarscanToVCF(self.emptyVarscanFileName, self.obsVCFFileName, doClean = True)
30 iVarscanFile.run()
31 self.assertTrue(FileUtils.are2FilesIdentical(self.expVCFFileName, self.obsVCFFileName))
32
33 def _writeInputFile_empty(self):
34 with open(self.emptyVarscanFileName, "w") as varscanFileName:
35 varscanFileName.write("Chrom\tPosition\tRef\tCons\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\tMapQual1\tMapQual2\tReads1Plus\tReads1Minus\tReads2Plus\tReads2Minus\tVarAllele\n")
36
37 def _writeExpOutputFile_empty(self):
38 with open(self.expVCFFileName, "w") as vcfFileName:
39 vcfFileName.write("##fileformat=VCFv4.1\n")
40 vcfFileName.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
41
42 def _writeExpOutputFile(self):
43 with open(self.expVCFFileName, "w") as vcfFileName:
44 vcfFileName.write("##fileformat=VCFv4.1\n")
45 vcfFileName.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
46 vcfFileName.write("chr1\t10759\t.\tC\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=47\n")
47 vcfFileName.write("chr1\t12438\t.\tC\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=62;ABQ=42\n")
48 vcfFileName.write("chr1\t17432\t.\tA\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=55;ABQ=37\n")
49 vcfFileName.write("chr1\t20391\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=56;ABQ=37\n")
50 vcfFileName.write("chr1\t21207\t.\tT\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=55;ABQ=41\n")
51 vcfFileName.write("chr1\t26057\t.\tT\tG\t0.087739243\t.\tAF=0.1538;DP=13;RBQ=60;ABQ=37\n")
52 vcfFileName.write("chr1\t36838\t.\tT\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=61;ABQ=36\n")
53 vcfFileName.write("chr1\t37751\t.\tA\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=42\n")
54 vcfFileName.write("chr1\t43500\t.\tC\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=40\n")
55 vcfFileName.write("chr1\t50481\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=60;ABQ=40\n")
56 vcfFileName.write("chr1\t106849\t.\tC\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=40\n")
57 vcfFileName.write("chr1\t108726\t.\tT\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=50;ABQ=41\n")
58 vcfFileName.write("chr1\t114204\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=6;RBQ=60;ABQ=39\n")
59 vcfFileName.write("chr1\t115030\t.\tA\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=57;ABQ=41\n")
60 vcfFileName.write("chr1\t116173\t.\tA\tG\t0.087739243\t.\tAF=0.2222;DP=9;RBQ=58;ABQ=39\n")
61 vcfFileName.write("chr1\t118433\t.\tG\tC\t0.087739243\t.\tAF=0.1429;DP=7;RBQ=64;ABQ=50\n")
62 vcfFileName.write("chr1\t119042\t.\tA\tG\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=55;ABQ=51\n")
63 vcfFileName.write("chr1\t139219\t.\tA\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=46\n")
64 vcfFileName.write("chr1\t140710\t.\tA\tG\t0.087739243\t.\tAF=0.1818;DP=11;RBQ=63;ABQ=41\n")
65 vcfFileName.write("chr1\t144419\t.\tT\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=63;ABQ=39\n")
66 vcfFileName.write("chr1\t146099\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=38\n")
67 vcfFileName.write("chr1\t146435\t.\tT\tC\t0.087739243\t.\tAF=0.3889;DP=18;RBQ=60;ABQ=24\n")
68 vcfFileName.write("chr1\t147232\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=39\n")
69 vcfFileName.write("chr1\t158703\t.\tA\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=60;ABQ=55\n")
70 vcfFileName.write("chr1\t166732\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=6;RBQ=62;ABQ=24\n")
71 vcfFileName.write("chr1\t179887\t.\tC\tA\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=39\n")
72 vcfFileName.write("chr1\t185971\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=55;ABQ=40\n")
73 vcfFileName.write("chr1\t211074\t.\tA\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=40\n")
74 vcfFileName.write("chr1\t219573\t.\tG\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=61;ABQ=41\n")
75 vcfFileName.write("chr1\t229396\t.\tC\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=57;ABQ=39\n")
76 vcfFileName.write("chr1\t236388\t.\tT\tG\t0.087739243\t.\tAF=0.2000;DP=10;RBQ=52;ABQ=32\n")
77 vcfFileName.write("chr1\t245990\t.\tG\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=61;ABQ=46\n")
78 vcfFileName.write("chr1\t249155\t.\tC\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=62;ABQ=37\n")
79 vcfFileName.write("chr1\t261257\t.\tT\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=60;ABQ=39\n")
80 vcfFileName.write("chr1\t274692\t.\tA\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=63;ABQ=39\n")
81 vcfFileName.write("chr1\t283468\t.\tG\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=63;ABQ=45\n")
82 vcfFileName.write("chr1\t284288\t.\tT\tA\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=39\n")
83 vcfFileName.write("chr1\t286983\t.\tT\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=37\n")
84 vcfFileName.write("chr1\t287378\t.\tC\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=37\n")
85 vcfFileName.write("chr1\t302928\t.\tA\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=64;ABQ=38\n")
86 vcfFileName.write("chr1\t305952\t.\tC\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=38\n")
87 vcfFileName.write("chr1\t307932\t.\tT\tC\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=65;ABQ=42\n")
88 vcfFileName.write("chr1\t317422\t.\tG\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=57;ABQ=40\n")
89 vcfFileName.write("chr1\t321480\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=38\n")
90 vcfFileName.write("chr1\t322307\t.\tA\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=37\n")
91 vcfFileName.write("chr1\t328326\t.\tG\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=62;ABQ=52\n")
92 vcfFileName.write("chr1\t333138\t.\tT\tG\t0.087739243\t.\tAF=0.4000;DP=5;RBQ=63;ABQ=24\n")
93 vcfFileName.write("chr1\t333388\t.\tT\tC\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=64;ABQ=43\n")
94 vcfFileName.write("chr1\t335592\t.\tT\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=58;ABQ=39\n")
95 vcfFileName.write("chr1\t336572\t.\tC\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=58;ABQ=38\n")
96 vcfFileName.write("chr1\t347396\t.\tT\tG\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=62;ABQ=52\n")
97 vcfFileName.write("chr1\t359080\t.\tT\tG\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=61;ABQ=42\n")
98 vcfFileName.write("chr1\t360223\t.\tA\tT\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=54;ABQ=52\n")
99 vcfFileName.write("chr1\t361047\t.\tT\tG\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=62;ABQ=50\n")
100 vcfFileName.write("chr1\t366048\t.\tA\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=39\n")
101 vcfFileName.write("chr1\t368105\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=55;ABQ=40\n")
102 vcfFileName.write("chr1\t373782\t.\tT\tA\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=41\n")
103 vcfFileName.write("chr1\t378159\t.\tG\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=63;ABQ=41\n")
104 vcfFileName.write("chr1\t383945\t.\tC\tG\t0.087739243\t.\tAF=0.1111;DP=9;RBQ=58;ABQ=57\n")
105 vcfFileName.write("chr1\t389461\t.\tA\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=36\n")
106 vcfFileName.write("chr1\t396860\t.\tG\tT\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=65;ABQ=43\n")
107 vcfFileName.write("chr1\t397170\t.\tA\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=36\n")
108 vcfFileName.write("chr1\t399939\t.\tA\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=63;ABQ=46\n")
109 vcfFileName.write("chr1\t400733\t.\tG\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=41\n")
110 vcfFileName.write("chr1\t401165\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=41\n")
111 vcfFileName.write("chr1\t406774\t.\tT\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=58;ABQ=39\n")
112 vcfFileName.write("chr1\t417293\t.\tC\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=55\n")
113 vcfFileName.write("chr1\t417723\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=49\n")
114 vcfFileName.write("chr1\t420308\t.\tC\tA\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=36\n")
115 vcfFileName.write("chr1\t435579\t.\tC\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=40\n")
116 vcfFileName.write("chr1\t437183\t.\tG\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=39\n")
117 vcfFileName.write("chr1\t437194\t.\tA\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=41\n")
118 vcfFileName.write("chr1\t438866\t.\tG\tC\t0.087739243\t.\tAF=0.1429;DP=7;RBQ=62;ABQ=52\n")
119 vcfFileName.write("chr1\t446237\t.\tT\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=39\n")
120 vcfFileName.write("chr1\t446308\t.\tA\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=50;ABQ=38\n")
121 vcfFileName.write("chr1\t452322\t.\tA\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=36\n")
122 vcfFileName.write("chr1\t462721\t.\tT\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=58;ABQ=41\n")
123 vcfFileName.write("chr1\t477145\t.\tC\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=39\n")
124 vcfFileName.write("chr1\t493772\t.\tG\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=40\n")
125 vcfFileName.write("chr1\t498962\t.\tC\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=41\n")
126 vcfFileName.write("chr1\t510532\t.\tT\tC\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=64;ABQ=53\n")
127 vcfFileName.write("chr1\t516369\t.\tT\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=34;ABQ=40\n")
128 vcfFileName.write("chr1\t523631\t.\tG\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=40\n")
129 vcfFileName.write("chr1\t524680\t.\tC\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=41\n")
130 vcfFileName.write("chr1\t525898\t.\tT\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=62;ABQ=49\n")
131 vcfFileName.write("chr1\t526118\t.\tA\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=61;ABQ=50\n")
132 vcfFileName.write("chr1\t535762\t.\tC\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=42\n")
133 vcfFileName.write("chr1\t543235\t.\tG\tT\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=45;ABQ=41\n")
134 vcfFileName.write("chr1\t550086\t.\tT\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=50;ABQ=41\n")
135 vcfFileName.write("chr1\t550508\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=55;ABQ=40\n")
136 vcfFileName.write("chr1\t551143\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=39\n")
137 vcfFileName.write("chr1\t552924\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=8;RBQ=62;ABQ=38\n")
138 vcfFileName.write("chr1\t553541\t.\tA\tG\t0.087739243\t.\tAF=0.1250;DP=8;RBQ=65;ABQ=52\n")
139 vcfFileName.write("chr1\t560806\t.\tT\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=65;ABQ=49\n")
140 vcfFileName.write("chr1\t562736\t.\tC\tG\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=64;ABQ=52\n")
141 vcfFileName.write("chr1\t563224\t.\tT\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=51;ABQ=39\n")
142 vcfFileName.write("chr1\t564217\t.\tT\tA\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=62;ABQ=40\n")
143 vcfFileName.write("chr1\t567288\t.\tC\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=40\n")
144 vcfFileName.write("chr1\t569652\t.\tT\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=48;ABQ=42\n")
145 vcfFileName.write("chr1\t570280\t.\tG\tT\t0.087739243\t.\tAF=0.1250;DP=8;RBQ=60;ABQ=53\n")
146 vcfFileName.write("chr1\t582185\t.\tT\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=63;ABQ=40\n")
147 vcfFileName.write("chr1\t582453\t.\tG\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=38\n")
148 vcfFileName.write("chr1\t583477\t.\tT\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=62;ABQ=39\n")
149 vcfFileName.write("chr1\t584179\t.\tG\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=41\n")
150 vcfFileName.write("chr1\t589074\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=36\n")
151 vcfFileName.write("chr1\t596641\t.\tC\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=40\n")
152 vcfFileName.write("chr1\t599263\t.\tG\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=60;ABQ=38\n")
153
154 ##fileDate=20090805
155 ##source=myImputationProgramV3.1
156 ##reference=1000Gchr1 10759 . C T 0.087739243 . AF=33.33%;DP=3enomesPilot-NCBI36
157 ##phasing=partial
158 ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
159 ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
160 ##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
161 ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
162 ##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
163 ##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
164 ##FILTER=<ID=q10,Description="Quality below 10">
165 ##FILTER=<ID=s50,Description="Less than 50% of samples have data">
166 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
167 ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
168 ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
169 ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
170
171 if __name__ == "__main__":
172 unittest.main()
173