comparison smart_toolShed/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e0f8dcca02ed
1 import unittest
2 import os
3
4 from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP
5 from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP
6
7 class Test_VarscanFileForGnpSNP(unittest.TestCase):
8
9 def test__init__(self):
10 expFastqFileName = "SR.fastq"
11 expRefFastaFileName = "ref.fasta"
12 expTaxonName = "Arabidopsis thaliana"
13 expVarscanFieldSeparator = "\t"
14 expVarscanHitsList = []
15
16 iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName)
17
18 obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName()
19 obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName()
20 obsTaxonName = iVarscanFileForGnpSNP.getTaxonName()
21 obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator()
22 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
23
24 self.assertEquals(expFastqFileName, obsFastaqFileName)
25 self.assertEquals(expRefFastaFileName, obsRefFastaFileName)
26 self.assertEquals(expTaxonName, obsTaxonName)
27 self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator)
28 self.assertEquals(expVarscanHitsList, obsVarscanHitsList)
29
30 def test_parse(self):
31 varscanFileName = "varscan.tab"
32 self._writeVarscanFile(varscanFileName)
33
34 varscanHit1 = VarscanHitForGnpSNP()
35 varscanHit1.setChrom('C02HBa0291P19_LR48')
36 varscanHit1.setPosition('32')
37 varscanHit1.setRef('C')
38 varscanHit1.setVar('T')
39 varscanHit1.setReads1('1')
40 varscanHit1.setReads2('2')
41 varscanHit1.setVarFreq('66,67%')
42 varscanHit1.setStrands1('1')
43 varscanHit1.setStrands2('1')
44 varscanHit1.setQual1('37')
45 varscanHit1.setQual2('35')
46 varscanHit1.setPvalue('0.3999999999999999')
47 varscanHit1.setGnpSNPRef("C")
48 varscanHit1.setGnpSNPVar("T")
49 varscanHit1.setGnpSNPPosition(32)
50 varscanHit1.setOccurrence(1)
51 varscanHit1.setPolymType("SNP")
52 varscanHit1.setPolymLength(1)
53
54 varscanHit2 = VarscanHitForGnpSNP()
55 varscanHit2.setChrom('C02HBa0291P19_LR48')
56 varscanHit2.setPosition('34')
57 varscanHit2.setRef('A')
58 varscanHit2.setVar('T')
59 varscanHit2.setReads1('1')
60 varscanHit2.setReads2('2')
61 varscanHit2.setVarFreq('66,67%')
62 varscanHit2.setStrands1('1')
63 varscanHit2.setStrands2('1')
64 varscanHit2.setQual1('40')
65 varscanHit2.setQual2('34')
66 varscanHit2.setPvalue('0.3999999999999999')
67 varscanHit2.setGnpSNPRef("A")
68 varscanHit2.setGnpSNPVar("T")
69 varscanHit2.setGnpSNPPosition(34)
70 varscanHit2.setOccurrence(1)
71 varscanHit2.setPolymType("SNP")
72 varscanHit2.setPolymLength(1)
73 expVarscanHitsList = [varscanHit1, varscanHit2]
74
75 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '')
76 iVarscanFileForGnpSNP.parse()
77 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
78 os.remove(varscanFileName)
79
80 self.assertEquals(expVarscanHitsList, obsVarscanHitsList)
81
82 def test_parse_with_same_position_and_chr_and_type(self):
83 varscanFileName = "varscan.tab"
84 self._writeVarscanFile_2(varscanFileName)
85
86 varscanHit1 = VarscanHitForGnpSNP()
87 varscanHit1.setChrom('C02HBa0291P19_LR48')
88 varscanHit1.setPosition('32')
89 varscanHit1.setRef('C')
90 varscanHit1.setVar('T')
91 varscanHit1.setReads1('1')
92 varscanHit1.setReads2('2')
93 varscanHit1.setVarFreq('66,67%')
94 varscanHit1.setStrands1('1')
95 varscanHit1.setStrands2('1')
96 varscanHit1.setQual1('37')
97 varscanHit1.setQual2('35')
98 varscanHit1.setPvalue('0.3999999999999999')
99 varscanHit1.setOccurrence(1)
100 varscanHit1._polymType = "SNP"
101 varscanHit1._gnpSnp_position = 32
102 varscanHit1._gnpSnp_ref = "C"
103 varscanHit1._gnpSnp_var = "T"
104
105 varscanHit2 = VarscanHitForGnpSNP()
106 varscanHit2.setChrom('C02HBa0291P19_LR48')
107 varscanHit2.setPosition('32')
108 varscanHit2.setRef('C')
109 varscanHit2.setVar('A')
110 varscanHit2.setReads1('1')
111 varscanHit2.setReads2('2')
112 varscanHit2.setVarFreq('66,67%')
113 varscanHit2.setStrands1('1')
114 varscanHit2.setStrands2('1')
115 varscanHit2.setQual1('37')
116 varscanHit2.setQual2('35')
117 varscanHit2.setPvalue('0.3999999999999999')
118 varscanHit2.setOccurrence(2)
119 varscanHit2._polymType = "SNP"
120 varscanHit2._gnpSnp_position = 32
121 varscanHit2._gnpSnp_ref = "C"
122 varscanHit2._gnpSnp_var = "T"
123
124 expVarscanHitsOccurence = varscanHit2._occurrence
125
126 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '')
127 iVarscanFileForGnpSNP.parse()
128 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
129 obsVarscanHitsOccurence = obsVarscanHitsList[1]._occurrence
130 os.remove(varscanFileName)
131
132 self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence)
133
134 def test_parse_with_same_position_and_chr_and_different_type(self):
135 varscanFileName = "varscan.tab"
136 self._writeVarscanFile_3(varscanFileName)
137
138 varscanHit1 = VarscanHitForGnpSNP()
139 varscanHit1.setChrom('C02HBa0291P19_LR48')
140 varscanHit1.setPosition('32')
141 varscanHit1.setRef('C')
142 varscanHit1.setVar('T')
143 varscanHit1.setReads1('1')
144 varscanHit1.setReads2('2')
145 varscanHit1.setVarFreq('66,67%')
146 varscanHit1.setStrands1('1')
147 varscanHit1.setStrands2('1')
148 varscanHit1.setQual1('37')
149 varscanHit1.setQual2('35')
150 varscanHit1.setPvalue('0.3999999999999999')
151 varscanHit1.setOccurrence(1)
152 varscanHit1._polymType = "SNP"
153 varscanHit1._gnpSnp_position = 32
154 varscanHit1._gnpSnp_ref = "C"
155 varscanHit1._gnpSnp_var = "T"
156
157 varscanHit2 = VarscanHitForGnpSNP()
158 varscanHit2.setChrom('C02HBa0291P19_LR48')
159 varscanHit2.setPosition('32')
160 varscanHit2.setRef('C')
161 varscanHit2.setVar('+A')
162 varscanHit2.setReads1('1')
163 varscanHit2.setReads2('2')
164 varscanHit2.setVarFreq('66,67%')
165 varscanHit2.setStrands1('1')
166 varscanHit2.setStrands2('1')
167 varscanHit2.setQual1('37')
168 varscanHit2.setQual2('35')
169 varscanHit2.setPvalue('0.3999999999999999')
170 varscanHit2.setOccurrence(1)
171 varscanHit2._polymType = "SNP"
172 varscanHit2._gnpSnp_position = 32
173 varscanHit2._gnpSnp_ref = "C"
174 varscanHit2._gnpSnp_var = "T"
175
176 expVarscanHitsOccurence = varscanHit2._occurrence
177
178 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName)
179 iVarscanFileForGnpSNP.parse()
180 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
181 obsVarscanHitsOccurence = obsVarscanHitsList[1].getOccurrence()
182 os.remove(varscanFileName)
183
184 self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence)
185
186 def test_parse_on_occurence(self):
187 varscanFileName = "varscan.tab"
188 self._writeVarscanFile_4(varscanFileName)
189
190 expOccurrence1 = 1
191 expOccurrence2 = 1
192 expOccurrence3 = 2
193 expOccurrence4 = 1
194 expOccurrence5 = 1
195 expOccurrence6 = 2
196
197 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName)
198 iVarscanFileForGnpSNP.parse()
199 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
200 obsOccurrence1 = obsVarscanHitsList[0].getOccurrence()
201 obsOccurrence2 = obsVarscanHitsList[1].getOccurrence()
202 obsOccurrence3 = obsVarscanHitsList[2].getOccurrence()
203 obsOccurrence4 = obsVarscanHitsList[3].getOccurrence()
204 obsOccurrence5 = obsVarscanHitsList[4].getOccurrence()
205 obsOccurrence6 = obsVarscanHitsList[5].getOccurrence()
206 os.remove(varscanFileName)
207
208 self.assertEquals(expOccurrence1, obsOccurrence1)
209 self.assertEquals(expOccurrence2, obsOccurrence2)
210 self.assertEquals(expOccurrence3, obsOccurrence3)
211 self.assertEquals(expOccurrence4, obsOccurrence4)
212 self.assertEquals(expOccurrence5, obsOccurrence5)
213 self.assertEquals(expOccurrence6, obsOccurrence6)
214
215 def test__eq__notEqual(self):
216 fastqFileName = "SR.fastq"
217 refFastaFileName = "ref.fasta"
218 taxonName = "Arabidopsis thaliana"
219
220 iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
221
222 fastqFileName = "SR.fastq2"
223 refFastaFileName = "ref.fasta"
224 taxonName = "Arabidopsis thaliana"
225
226 iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
227
228 self.assertFalse(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)
229
230 def test__eq__equal(self):
231 fastqFileName = "SR.fastq"
232 refFastaFileName = "ref.fasta"
233 taxonName = "Arabidopsis thaliana"
234
235 varscanHit1 = VarscanHitForGnpSNP()
236 varscanHit1.setChrom('C02HBa0291P19_LR48')
237 varscanHit1.setPosition('34')
238 varscanHit1.setRef('A')
239 varscanHit1.setVar('T')
240 varscanHit1.setReads1('1')
241 varscanHit1.setReads2('2')
242 varscanHit1.setVarFreq('66,67%')
243 varscanHit1.setStrands1('1')
244 varscanHit1.setStrands2('1')
245 varscanHit1.setQual1('40')
246 varscanHit1.setQual2('34')
247 varscanHit1.setPvalue('0.3999999999999999')
248 lVarscanHits1 = [varscanHit1]
249
250 iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
251 iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1)
252
253 varscanHit2 = VarscanHitForGnpSNP()
254 varscanHit2.setChrom('C02HBa0291P19_LR48')
255 varscanHit2.setPosition('34')
256 varscanHit2.setRef('A')
257 varscanHit2.setVar('T')
258 varscanHit2.setReads1('1')
259 varscanHit2.setReads2('2')
260 varscanHit2.setVarFreq('66,67%')
261 varscanHit2.setStrands1('1')
262 varscanHit2.setStrands2('1')
263 varscanHit2.setQual1('40')
264 varscanHit2.setQual2('34')
265 varscanHit2.setPvalue('0.3999999999999999')
266 lVarscanHits2 = [varscanHit2]
267
268 iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
269 iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2)
270
271 self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)
272
273 def _writeVarscanFile(self, varscanFileName):
274 varscanFile = open(varscanFileName, 'w')
275 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
276 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
277 varscanFile.write("C02HBa0291P19_LR48\t34\tA\tT\t1\t2\t66,67%\t1\t1\t40\t34\t0.3999999999999999\n")
278 varscanFile.close()
279
280 def _writeVarscanFile_2(self, varscanFileName):
281 varscanFile = open(varscanFileName, 'w')
282 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
283 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
284 varscanFile.write("C02HBa0291P19_LR48\t32\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
285 varscanFile.close()
286
287 def _writeVarscanFile_3(self, varscanFileName):
288 varscanFile = open(varscanFileName, 'w')
289 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
290 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
291 varscanFile.write("C02HBa0291P19_LR48\t32\tC\t+A\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
292 varscanFile.close()
293
294 def _writeVarscanFile_4(self, varscanFileName):
295 varscanFile = open(varscanFileName, 'w')
296 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
297 varscanFile.write("seqname\t2\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
298 varscanFile.write("seqname\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
299 varscanFile.write("seqname\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
300 varscanFile.write("seqname\t8\tT\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
301 varscanFile.write("chrom\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
302 varscanFile.write("chrom\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
303 varscanFile.close()
304 if __name__ == "__main__":
305 unittest.main()