Mercurial > repos > yufei-luo > s_mart
comparison smart_toolShed/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e0f8dcca02ed |
---|---|
1 import unittest | |
2 import os | |
3 | |
4 from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP | |
5 from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP | |
6 | |
7 class Test_VarscanFileForGnpSNP(unittest.TestCase): | |
8 | |
9 def test__init__(self): | |
10 expFastqFileName = "SR.fastq" | |
11 expRefFastaFileName = "ref.fasta" | |
12 expTaxonName = "Arabidopsis thaliana" | |
13 expVarscanFieldSeparator = "\t" | |
14 expVarscanHitsList = [] | |
15 | |
16 iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName) | |
17 | |
18 obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName() | |
19 obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName() | |
20 obsTaxonName = iVarscanFileForGnpSNP.getTaxonName() | |
21 obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator() | |
22 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() | |
23 | |
24 self.assertEquals(expFastqFileName, obsFastaqFileName) | |
25 self.assertEquals(expRefFastaFileName, obsRefFastaFileName) | |
26 self.assertEquals(expTaxonName, obsTaxonName) | |
27 self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator) | |
28 self.assertEquals(expVarscanHitsList, obsVarscanHitsList) | |
29 | |
30 def test_parse(self): | |
31 varscanFileName = "varscan.tab" | |
32 self._writeVarscanFile(varscanFileName) | |
33 | |
34 varscanHit1 = VarscanHitForGnpSNP() | |
35 varscanHit1.setChrom('C02HBa0291P19_LR48') | |
36 varscanHit1.setPosition('32') | |
37 varscanHit1.setRef('C') | |
38 varscanHit1.setVar('T') | |
39 varscanHit1.setReads1('1') | |
40 varscanHit1.setReads2('2') | |
41 varscanHit1.setVarFreq('66,67%') | |
42 varscanHit1.setStrands1('1') | |
43 varscanHit1.setStrands2('1') | |
44 varscanHit1.setQual1('37') | |
45 varscanHit1.setQual2('35') | |
46 varscanHit1.setPvalue('0.3999999999999999') | |
47 varscanHit1.setGnpSNPRef("C") | |
48 varscanHit1.setGnpSNPVar("T") | |
49 varscanHit1.setGnpSNPPosition(32) | |
50 varscanHit1.setOccurrence(1) | |
51 varscanHit1.setPolymType("SNP") | |
52 varscanHit1.setPolymLength(1) | |
53 | |
54 varscanHit2 = VarscanHitForGnpSNP() | |
55 varscanHit2.setChrom('C02HBa0291P19_LR48') | |
56 varscanHit2.setPosition('34') | |
57 varscanHit2.setRef('A') | |
58 varscanHit2.setVar('T') | |
59 varscanHit2.setReads1('1') | |
60 varscanHit2.setReads2('2') | |
61 varscanHit2.setVarFreq('66,67%') | |
62 varscanHit2.setStrands1('1') | |
63 varscanHit2.setStrands2('1') | |
64 varscanHit2.setQual1('40') | |
65 varscanHit2.setQual2('34') | |
66 varscanHit2.setPvalue('0.3999999999999999') | |
67 varscanHit2.setGnpSNPRef("A") | |
68 varscanHit2.setGnpSNPVar("T") | |
69 varscanHit2.setGnpSNPPosition(34) | |
70 varscanHit2.setOccurrence(1) | |
71 varscanHit2.setPolymType("SNP") | |
72 varscanHit2.setPolymLength(1) | |
73 expVarscanHitsList = [varscanHit1, varscanHit2] | |
74 | |
75 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '') | |
76 iVarscanFileForGnpSNP.parse() | |
77 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() | |
78 os.remove(varscanFileName) | |
79 | |
80 self.assertEquals(expVarscanHitsList, obsVarscanHitsList) | |
81 | |
82 def test_parse_with_same_position_and_chr_and_type(self): | |
83 varscanFileName = "varscan.tab" | |
84 self._writeVarscanFile_2(varscanFileName) | |
85 | |
86 varscanHit1 = VarscanHitForGnpSNP() | |
87 varscanHit1.setChrom('C02HBa0291P19_LR48') | |
88 varscanHit1.setPosition('32') | |
89 varscanHit1.setRef('C') | |
90 varscanHit1.setVar('T') | |
91 varscanHit1.setReads1('1') | |
92 varscanHit1.setReads2('2') | |
93 varscanHit1.setVarFreq('66,67%') | |
94 varscanHit1.setStrands1('1') | |
95 varscanHit1.setStrands2('1') | |
96 varscanHit1.setQual1('37') | |
97 varscanHit1.setQual2('35') | |
98 varscanHit1.setPvalue('0.3999999999999999') | |
99 varscanHit1.setOccurrence(1) | |
100 varscanHit1._polymType = "SNP" | |
101 varscanHit1._gnpSnp_position = 32 | |
102 varscanHit1._gnpSnp_ref = "C" | |
103 varscanHit1._gnpSnp_var = "T" | |
104 | |
105 varscanHit2 = VarscanHitForGnpSNP() | |
106 varscanHit2.setChrom('C02HBa0291P19_LR48') | |
107 varscanHit2.setPosition('32') | |
108 varscanHit2.setRef('C') | |
109 varscanHit2.setVar('A') | |
110 varscanHit2.setReads1('1') | |
111 varscanHit2.setReads2('2') | |
112 varscanHit2.setVarFreq('66,67%') | |
113 varscanHit2.setStrands1('1') | |
114 varscanHit2.setStrands2('1') | |
115 varscanHit2.setQual1('37') | |
116 varscanHit2.setQual2('35') | |
117 varscanHit2.setPvalue('0.3999999999999999') | |
118 varscanHit2.setOccurrence(2) | |
119 varscanHit2._polymType = "SNP" | |
120 varscanHit2._gnpSnp_position = 32 | |
121 varscanHit2._gnpSnp_ref = "C" | |
122 varscanHit2._gnpSnp_var = "T" | |
123 | |
124 expVarscanHitsOccurence = varscanHit2._occurrence | |
125 | |
126 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '') | |
127 iVarscanFileForGnpSNP.parse() | |
128 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() | |
129 obsVarscanHitsOccurence = obsVarscanHitsList[1]._occurrence | |
130 os.remove(varscanFileName) | |
131 | |
132 self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence) | |
133 | |
134 def test_parse_with_same_position_and_chr_and_different_type(self): | |
135 varscanFileName = "varscan.tab" | |
136 self._writeVarscanFile_3(varscanFileName) | |
137 | |
138 varscanHit1 = VarscanHitForGnpSNP() | |
139 varscanHit1.setChrom('C02HBa0291P19_LR48') | |
140 varscanHit1.setPosition('32') | |
141 varscanHit1.setRef('C') | |
142 varscanHit1.setVar('T') | |
143 varscanHit1.setReads1('1') | |
144 varscanHit1.setReads2('2') | |
145 varscanHit1.setVarFreq('66,67%') | |
146 varscanHit1.setStrands1('1') | |
147 varscanHit1.setStrands2('1') | |
148 varscanHit1.setQual1('37') | |
149 varscanHit1.setQual2('35') | |
150 varscanHit1.setPvalue('0.3999999999999999') | |
151 varscanHit1.setOccurrence(1) | |
152 varscanHit1._polymType = "SNP" | |
153 varscanHit1._gnpSnp_position = 32 | |
154 varscanHit1._gnpSnp_ref = "C" | |
155 varscanHit1._gnpSnp_var = "T" | |
156 | |
157 varscanHit2 = VarscanHitForGnpSNP() | |
158 varscanHit2.setChrom('C02HBa0291P19_LR48') | |
159 varscanHit2.setPosition('32') | |
160 varscanHit2.setRef('C') | |
161 varscanHit2.setVar('+A') | |
162 varscanHit2.setReads1('1') | |
163 varscanHit2.setReads2('2') | |
164 varscanHit2.setVarFreq('66,67%') | |
165 varscanHit2.setStrands1('1') | |
166 varscanHit2.setStrands2('1') | |
167 varscanHit2.setQual1('37') | |
168 varscanHit2.setQual2('35') | |
169 varscanHit2.setPvalue('0.3999999999999999') | |
170 varscanHit2.setOccurrence(1) | |
171 varscanHit2._polymType = "SNP" | |
172 varscanHit2._gnpSnp_position = 32 | |
173 varscanHit2._gnpSnp_ref = "C" | |
174 varscanHit2._gnpSnp_var = "T" | |
175 | |
176 expVarscanHitsOccurence = varscanHit2._occurrence | |
177 | |
178 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName) | |
179 iVarscanFileForGnpSNP.parse() | |
180 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() | |
181 obsVarscanHitsOccurence = obsVarscanHitsList[1].getOccurrence() | |
182 os.remove(varscanFileName) | |
183 | |
184 self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence) | |
185 | |
186 def test_parse_on_occurence(self): | |
187 varscanFileName = "varscan.tab" | |
188 self._writeVarscanFile_4(varscanFileName) | |
189 | |
190 expOccurrence1 = 1 | |
191 expOccurrence2 = 1 | |
192 expOccurrence3 = 2 | |
193 expOccurrence4 = 1 | |
194 expOccurrence5 = 1 | |
195 expOccurrence6 = 2 | |
196 | |
197 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName) | |
198 iVarscanFileForGnpSNP.parse() | |
199 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() | |
200 obsOccurrence1 = obsVarscanHitsList[0].getOccurrence() | |
201 obsOccurrence2 = obsVarscanHitsList[1].getOccurrence() | |
202 obsOccurrence3 = obsVarscanHitsList[2].getOccurrence() | |
203 obsOccurrence4 = obsVarscanHitsList[3].getOccurrence() | |
204 obsOccurrence5 = obsVarscanHitsList[4].getOccurrence() | |
205 obsOccurrence6 = obsVarscanHitsList[5].getOccurrence() | |
206 os.remove(varscanFileName) | |
207 | |
208 self.assertEquals(expOccurrence1, obsOccurrence1) | |
209 self.assertEquals(expOccurrence2, obsOccurrence2) | |
210 self.assertEquals(expOccurrence3, obsOccurrence3) | |
211 self.assertEquals(expOccurrence4, obsOccurrence4) | |
212 self.assertEquals(expOccurrence5, obsOccurrence5) | |
213 self.assertEquals(expOccurrence6, obsOccurrence6) | |
214 | |
215 def test__eq__notEqual(self): | |
216 fastqFileName = "SR.fastq" | |
217 refFastaFileName = "ref.fasta" | |
218 taxonName = "Arabidopsis thaliana" | |
219 | |
220 iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName) | |
221 | |
222 fastqFileName = "SR.fastq2" | |
223 refFastaFileName = "ref.fasta" | |
224 taxonName = "Arabidopsis thaliana" | |
225 | |
226 iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName) | |
227 | |
228 self.assertFalse(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2) | |
229 | |
230 def test__eq__equal(self): | |
231 fastqFileName = "SR.fastq" | |
232 refFastaFileName = "ref.fasta" | |
233 taxonName = "Arabidopsis thaliana" | |
234 | |
235 varscanHit1 = VarscanHitForGnpSNP() | |
236 varscanHit1.setChrom('C02HBa0291P19_LR48') | |
237 varscanHit1.setPosition('34') | |
238 varscanHit1.setRef('A') | |
239 varscanHit1.setVar('T') | |
240 varscanHit1.setReads1('1') | |
241 varscanHit1.setReads2('2') | |
242 varscanHit1.setVarFreq('66,67%') | |
243 varscanHit1.setStrands1('1') | |
244 varscanHit1.setStrands2('1') | |
245 varscanHit1.setQual1('40') | |
246 varscanHit1.setQual2('34') | |
247 varscanHit1.setPvalue('0.3999999999999999') | |
248 lVarscanHits1 = [varscanHit1] | |
249 | |
250 iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName) | |
251 iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1) | |
252 | |
253 varscanHit2 = VarscanHitForGnpSNP() | |
254 varscanHit2.setChrom('C02HBa0291P19_LR48') | |
255 varscanHit2.setPosition('34') | |
256 varscanHit2.setRef('A') | |
257 varscanHit2.setVar('T') | |
258 varscanHit2.setReads1('1') | |
259 varscanHit2.setReads2('2') | |
260 varscanHit2.setVarFreq('66,67%') | |
261 varscanHit2.setStrands1('1') | |
262 varscanHit2.setStrands2('1') | |
263 varscanHit2.setQual1('40') | |
264 varscanHit2.setQual2('34') | |
265 varscanHit2.setPvalue('0.3999999999999999') | |
266 lVarscanHits2 = [varscanHit2] | |
267 | |
268 iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName) | |
269 iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2) | |
270 | |
271 self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2) | |
272 | |
273 def _writeVarscanFile(self, varscanFileName): | |
274 varscanFile = open(varscanFileName, 'w') | |
275 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n") | |
276 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
277 varscanFile.write("C02HBa0291P19_LR48\t34\tA\tT\t1\t2\t66,67%\t1\t1\t40\t34\t0.3999999999999999\n") | |
278 varscanFile.close() | |
279 | |
280 def _writeVarscanFile_2(self, varscanFileName): | |
281 varscanFile = open(varscanFileName, 'w') | |
282 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n") | |
283 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
284 varscanFile.write("C02HBa0291P19_LR48\t32\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
285 varscanFile.close() | |
286 | |
287 def _writeVarscanFile_3(self, varscanFileName): | |
288 varscanFile = open(varscanFileName, 'w') | |
289 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n") | |
290 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
291 varscanFile.write("C02HBa0291P19_LR48\t32\tC\t+A\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
292 varscanFile.close() | |
293 | |
294 def _writeVarscanFile_4(self, varscanFileName): | |
295 varscanFile = open(varscanFileName, 'w') | |
296 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n") | |
297 varscanFile.write("seqname\t2\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
298 varscanFile.write("seqname\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
299 varscanFile.write("seqname\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
300 varscanFile.write("seqname\t8\tT\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
301 varscanFile.write("chrom\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
302 varscanFile.write("chrom\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") | |
303 varscanFile.close() | |
304 if __name__ == "__main__": | |
305 unittest.main() |