Mercurial > repos > yufei-luo > s_mart
comparison commons/core/parsing/test/Test_F_VarscanToVCF.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 import unittest | |
2 import os | |
3 from commons.core.parsing.VarscanToVCF import VarscanToVCF | |
4 from commons.core.utils.FileUtils import FileUtils | |
5 | |
6 class Test_F_VarscanToVCF(unittest.TestCase): | |
7 | |
8 def setUp(self): | |
9 self.emptyVarscanFileName = "emptyfile.varscan" | |
10 self.varscanFileName = "%s/commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan" % os.environ["REPET_PATH"] | |
11 self.expVCFFileName = "expVCF.vcf" | |
12 self.obsVCFFileName = "obsVCF.vcf" | |
13 | |
14 def tearDown(self): | |
15 if os.path.exists(self.emptyVarscanFileName): | |
16 os.remove(self.emptyVarscanFileName) | |
17 os.remove(self.expVCFFileName) | |
18 os.remove(self.obsVCFFileName) | |
19 | |
20 def test_VarscanToVCF(self): | |
21 self._writeExpOutputFile() | |
22 iVarscanFile = VarscanToVCF(self.varscanFileName, self.obsVCFFileName, doClean = True) | |
23 iVarscanFile.run() | |
24 self.assertTrue(FileUtils.are2FilesIdentical(self.expVCFFileName, self.obsVCFFileName)) | |
25 | |
26 def test_VarscanToVCF_empty(self): | |
27 self._writeInputFile_empty() | |
28 self._writeExpOutputFile_empty() | |
29 iVarscanFile = VarscanToVCF(self.emptyVarscanFileName, self.obsVCFFileName, doClean = True) | |
30 iVarscanFile.run() | |
31 self.assertTrue(FileUtils.are2FilesIdentical(self.expVCFFileName, self.obsVCFFileName)) | |
32 | |
33 def _writeInputFile_empty(self): | |
34 with open(self.emptyVarscanFileName, "w") as varscanFileName: | |
35 varscanFileName.write("Chrom\tPosition\tRef\tCons\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\tMapQual1\tMapQual2\tReads1Plus\tReads1Minus\tReads2Plus\tReads2Minus\tVarAllele\n") | |
36 | |
37 def _writeExpOutputFile_empty(self): | |
38 with open(self.expVCFFileName, "w") as vcfFileName: | |
39 vcfFileName.write("##fileformat=VCFv4.1\n") | |
40 vcfFileName.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n") | |
41 | |
42 def _writeExpOutputFile(self): | |
43 with open(self.expVCFFileName, "w") as vcfFileName: | |
44 vcfFileName.write("##fileformat=VCFv4.1\n") | |
45 vcfFileName.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n") | |
46 vcfFileName.write("chr1\t10759\t.\tC\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=47\n") | |
47 vcfFileName.write("chr1\t12438\t.\tC\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=62;ABQ=42\n") | |
48 vcfFileName.write("chr1\t17432\t.\tA\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=55;ABQ=37\n") | |
49 vcfFileName.write("chr1\t20391\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=56;ABQ=37\n") | |
50 vcfFileName.write("chr1\t21207\t.\tT\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=55;ABQ=41\n") | |
51 vcfFileName.write("chr1\t26057\t.\tT\tG\t0.087739243\t.\tAF=0.1538;DP=13;RBQ=60;ABQ=37\n") | |
52 vcfFileName.write("chr1\t36838\t.\tT\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=61;ABQ=36\n") | |
53 vcfFileName.write("chr1\t37751\t.\tA\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=42\n") | |
54 vcfFileName.write("chr1\t43500\t.\tC\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=40\n") | |
55 vcfFileName.write("chr1\t50481\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=60;ABQ=40\n") | |
56 vcfFileName.write("chr1\t106849\t.\tC\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=40\n") | |
57 vcfFileName.write("chr1\t108726\t.\tT\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=50;ABQ=41\n") | |
58 vcfFileName.write("chr1\t114204\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=6;RBQ=60;ABQ=39\n") | |
59 vcfFileName.write("chr1\t115030\t.\tA\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=57;ABQ=41\n") | |
60 vcfFileName.write("chr1\t116173\t.\tA\tG\t0.087739243\t.\tAF=0.2222;DP=9;RBQ=58;ABQ=39\n") | |
61 vcfFileName.write("chr1\t118433\t.\tG\tC\t0.087739243\t.\tAF=0.1429;DP=7;RBQ=64;ABQ=50\n") | |
62 vcfFileName.write("chr1\t119042\t.\tA\tG\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=55;ABQ=51\n") | |
63 vcfFileName.write("chr1\t139219\t.\tA\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=46\n") | |
64 vcfFileName.write("chr1\t140710\t.\tA\tG\t0.087739243\t.\tAF=0.1818;DP=11;RBQ=63;ABQ=41\n") | |
65 vcfFileName.write("chr1\t144419\t.\tT\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=63;ABQ=39\n") | |
66 vcfFileName.write("chr1\t146099\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=38\n") | |
67 vcfFileName.write("chr1\t146435\t.\tT\tC\t0.087739243\t.\tAF=0.3889;DP=18;RBQ=60;ABQ=24\n") | |
68 vcfFileName.write("chr1\t147232\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=39\n") | |
69 vcfFileName.write("chr1\t158703\t.\tA\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=60;ABQ=55\n") | |
70 vcfFileName.write("chr1\t166732\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=6;RBQ=62;ABQ=24\n") | |
71 vcfFileName.write("chr1\t179887\t.\tC\tA\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=39\n") | |
72 vcfFileName.write("chr1\t185971\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=55;ABQ=40\n") | |
73 vcfFileName.write("chr1\t211074\t.\tA\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=40\n") | |
74 vcfFileName.write("chr1\t219573\t.\tG\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=61;ABQ=41\n") | |
75 vcfFileName.write("chr1\t229396\t.\tC\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=57;ABQ=39\n") | |
76 vcfFileName.write("chr1\t236388\t.\tT\tG\t0.087739243\t.\tAF=0.2000;DP=10;RBQ=52;ABQ=32\n") | |
77 vcfFileName.write("chr1\t245990\t.\tG\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=61;ABQ=46\n") | |
78 vcfFileName.write("chr1\t249155\t.\tC\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=62;ABQ=37\n") | |
79 vcfFileName.write("chr1\t261257\t.\tT\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=60;ABQ=39\n") | |
80 vcfFileName.write("chr1\t274692\t.\tA\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=63;ABQ=39\n") | |
81 vcfFileName.write("chr1\t283468\t.\tG\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=63;ABQ=45\n") | |
82 vcfFileName.write("chr1\t284288\t.\tT\tA\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=39\n") | |
83 vcfFileName.write("chr1\t286983\t.\tT\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=37\n") | |
84 vcfFileName.write("chr1\t287378\t.\tC\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=37\n") | |
85 vcfFileName.write("chr1\t302928\t.\tA\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=64;ABQ=38\n") | |
86 vcfFileName.write("chr1\t305952\t.\tC\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=38\n") | |
87 vcfFileName.write("chr1\t307932\t.\tT\tC\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=65;ABQ=42\n") | |
88 vcfFileName.write("chr1\t317422\t.\tG\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=57;ABQ=40\n") | |
89 vcfFileName.write("chr1\t321480\t.\tA\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=38\n") | |
90 vcfFileName.write("chr1\t322307\t.\tA\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=37\n") | |
91 vcfFileName.write("chr1\t328326\t.\tG\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=62;ABQ=52\n") | |
92 vcfFileName.write("chr1\t333138\t.\tT\tG\t0.087739243\t.\tAF=0.4000;DP=5;RBQ=63;ABQ=24\n") | |
93 vcfFileName.write("chr1\t333388\t.\tT\tC\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=64;ABQ=43\n") | |
94 vcfFileName.write("chr1\t335592\t.\tT\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=58;ABQ=39\n") | |
95 vcfFileName.write("chr1\t336572\t.\tC\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=58;ABQ=38\n") | |
96 vcfFileName.write("chr1\t347396\t.\tT\tG\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=62;ABQ=52\n") | |
97 vcfFileName.write("chr1\t359080\t.\tT\tG\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=61;ABQ=42\n") | |
98 vcfFileName.write("chr1\t360223\t.\tA\tT\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=54;ABQ=52\n") | |
99 vcfFileName.write("chr1\t361047\t.\tT\tG\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=62;ABQ=50\n") | |
100 vcfFileName.write("chr1\t366048\t.\tA\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=39\n") | |
101 vcfFileName.write("chr1\t368105\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=55;ABQ=40\n") | |
102 vcfFileName.write("chr1\t373782\t.\tT\tA\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=41\n") | |
103 vcfFileName.write("chr1\t378159\t.\tG\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=63;ABQ=41\n") | |
104 vcfFileName.write("chr1\t383945\t.\tC\tG\t0.087739243\t.\tAF=0.1111;DP=9;RBQ=58;ABQ=57\n") | |
105 vcfFileName.write("chr1\t389461\t.\tA\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=36\n") | |
106 vcfFileName.write("chr1\t396860\t.\tG\tT\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=65;ABQ=43\n") | |
107 vcfFileName.write("chr1\t397170\t.\tA\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=36\n") | |
108 vcfFileName.write("chr1\t399939\t.\tA\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=63;ABQ=46\n") | |
109 vcfFileName.write("chr1\t400733\t.\tG\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=41\n") | |
110 vcfFileName.write("chr1\t401165\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=41\n") | |
111 vcfFileName.write("chr1\t406774\t.\tT\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=58;ABQ=39\n") | |
112 vcfFileName.write("chr1\t417293\t.\tC\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=55\n") | |
113 vcfFileName.write("chr1\t417723\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=49\n") | |
114 vcfFileName.write("chr1\t420308\t.\tC\tA\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=36\n") | |
115 vcfFileName.write("chr1\t435579\t.\tC\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=40\n") | |
116 vcfFileName.write("chr1\t437183\t.\tG\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=39\n") | |
117 vcfFileName.write("chr1\t437194\t.\tA\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=41\n") | |
118 vcfFileName.write("chr1\t438866\t.\tG\tC\t0.087739243\t.\tAF=0.1429;DP=7;RBQ=62;ABQ=52\n") | |
119 vcfFileName.write("chr1\t446237\t.\tT\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=39\n") | |
120 vcfFileName.write("chr1\t446308\t.\tA\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=50;ABQ=38\n") | |
121 vcfFileName.write("chr1\t452322\t.\tA\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=36\n") | |
122 vcfFileName.write("chr1\t462721\t.\tT\tG\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=58;ABQ=41\n") | |
123 vcfFileName.write("chr1\t477145\t.\tC\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=64;ABQ=39\n") | |
124 vcfFileName.write("chr1\t493772\t.\tG\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=40\n") | |
125 vcfFileName.write("chr1\t498962\t.\tC\tA\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=41\n") | |
126 vcfFileName.write("chr1\t510532\t.\tT\tC\t0.087739243\t.\tAF=0.2000;DP=5;RBQ=64;ABQ=53\n") | |
127 vcfFileName.write("chr1\t516369\t.\tT\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=34;ABQ=40\n") | |
128 vcfFileName.write("chr1\t523631\t.\tG\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=64;ABQ=40\n") | |
129 vcfFileName.write("chr1\t524680\t.\tC\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=41\n") | |
130 vcfFileName.write("chr1\t525898\t.\tT\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=62;ABQ=49\n") | |
131 vcfFileName.write("chr1\t526118\t.\tA\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=61;ABQ=50\n") | |
132 vcfFileName.write("chr1\t535762\t.\tC\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=42\n") | |
133 vcfFileName.write("chr1\t543235\t.\tG\tT\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=45;ABQ=41\n") | |
134 vcfFileName.write("chr1\t550086\t.\tT\tC\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=50;ABQ=41\n") | |
135 vcfFileName.write("chr1\t550508\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=55;ABQ=40\n") | |
136 vcfFileName.write("chr1\t551143\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=39\n") | |
137 vcfFileName.write("chr1\t552924\t.\tA\tG\t0.087739243\t.\tAF=0.2500;DP=8;RBQ=62;ABQ=38\n") | |
138 vcfFileName.write("chr1\t553541\t.\tA\tG\t0.087739243\t.\tAF=0.1250;DP=8;RBQ=65;ABQ=52\n") | |
139 vcfFileName.write("chr1\t560806\t.\tT\tC\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=65;ABQ=49\n") | |
140 vcfFileName.write("chr1\t562736\t.\tC\tG\t0.087739243\t.\tAF=0.1667;DP=6;RBQ=64;ABQ=52\n") | |
141 vcfFileName.write("chr1\t563224\t.\tT\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=51;ABQ=39\n") | |
142 vcfFileName.write("chr1\t564217\t.\tT\tA\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=62;ABQ=40\n") | |
143 vcfFileName.write("chr1\t567288\t.\tC\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=40\n") | |
144 vcfFileName.write("chr1\t569652\t.\tT\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=48;ABQ=42\n") | |
145 vcfFileName.write("chr1\t570280\t.\tG\tT\t0.087739243\t.\tAF=0.1250;DP=8;RBQ=60;ABQ=53\n") | |
146 vcfFileName.write("chr1\t582185\t.\tT\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=63;ABQ=40\n") | |
147 vcfFileName.write("chr1\t582453\t.\tG\tC\t0.087739243\t.\tAF=0.5000;DP=2;RBQ=65;ABQ=38\n") | |
148 vcfFileName.write("chr1\t583477\t.\tT\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=62;ABQ=39\n") | |
149 vcfFileName.write("chr1\t584179\t.\tG\tT\t0.087739243\t.\tAF=0.2500;DP=4;RBQ=65;ABQ=41\n") | |
150 vcfFileName.write("chr1\t589074\t.\tG\tC\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=36\n") | |
151 vcfFileName.write("chr1\t596641\t.\tC\tG\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=65;ABQ=40\n") | |
152 vcfFileName.write("chr1\t599263\t.\tG\tT\t0.087739243\t.\tAF=0.3333;DP=3;RBQ=60;ABQ=38\n") | |
153 | |
154 ##fileDate=20090805 | |
155 ##source=myImputationProgramV3.1 | |
156 ##reference=1000Gchr1 10759 . C T 0.087739243 . AF=33.33%;DP=3enomesPilot-NCBI36 | |
157 ##phasing=partial | |
158 ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> | |
159 ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> | |
160 ##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency"> | |
161 ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele"> | |
162 ##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129"> | |
163 ##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership"> | |
164 ##FILTER=<ID=q10,Description="Quality below 10"> | |
165 ##FILTER=<ID=s50,Description="Less than 50% of samples have data"> | |
166 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | |
167 ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | |
168 ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> | |
169 ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> | |
170 | |
171 if __name__ == "__main__": | |
172 unittest.main() | |
173 |