comparison commons/core/seq/test/Test_Bioseq.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children 94ab73e8a190
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31
32 import unittest
33 import os
34 import sys
35 from commons.core.seq.Bioseq import Bioseq
36 from commons.core.utils.FileUtils import FileUtils
37 from commons.core.coord.Map import Map
38
39
40 class Test_Bioseq( unittest.TestCase ):
41
42 def setUp(self):
43 self._bs = Bioseq()
44
45
46 def test_isEmpty_True(self):
47 self._bs.setHeader( "" )
48 self._bs.setSequence( "" )
49 exp = True
50 obs = self._bs.isEmpty()
51 self.assertEquals( exp, obs )
52
53
54 def test_isEmpty_False(self):
55 self._bs.setHeader( "seq1" )
56 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
57 exp = False
58 obs = self._bs.isEmpty()
59 self.assertEquals( exp, obs )
60
61
62 def test___eq__(self):
63 self._bs.setHeader( "seq1" )
64 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
65 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
66 self.assertEquals( self._bs, obs )
67
68
69 def test___ne__Header(self):
70 self._bs.setHeader( "seq2" )
71 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
72 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
73 self.assertNotEquals( self._bs, obs )
74
75
76 def test___ne__Sequence(self):
77 self._bs.setHeader( "seq1" )
78 self._bs.setSequence( "GGACGATGCAGCATGCGAATGACGAT" )
79 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
80 self.assertNotEquals( self._bs, obs )
81
82
83 def test_reverse(self):
84 self._bs.setHeader( "seq1" )
85 self._bs.setSequence( "TGCGGA" )
86 exp = "AGGCGT"
87 self._bs.reverse()
88 obs = self._bs.sequence
89 self.assertEqual( obs, exp )
90
91
92 def test_complement(self):
93 self._bs.setHeader( "seq1" )
94 self._bs.setSequence( "TGCGGA" )
95 exp = "ACGCCT"
96 self._bs.complement()
97 obs = self._bs.sequence
98 self.assertEqual( obs, exp )
99
100
101 def test_complement_with_unknown_symbol(self):
102 self._bs.setHeader( "seq1" )
103 self._bs.setSequence( "TGCGGAFMRWTYSKVHDBN" )
104 exp = "ACGCCTNKYWARSMBDHVN"
105 self._bs.complement()
106 obs = self._bs.sequence
107 self.assertEqual( obs, exp )
108
109
110 def test_reverseComplement(self):
111 self._bs.setHeader( "seq1" )
112 self._bs.setSequence( "TGCGGA" )
113 exp = "TCCGCA"
114 self._bs.reverseComplement()
115 obs = self._bs.sequence
116 self.assertEqual( obs, exp )
117
118
119 def test_cleanGap(self):
120 self._bs.setSequence("-ATTTTGC-AGTC--TTATTCGAG-----GCCATTGCT-")
121 exp = "ATTTTGCAGTCTTATTCGAGGCCATTGCT"
122 self._bs.cleanGap()
123 obs = self._bs.sequence
124 self.assertEquals( obs, exp )
125
126
127 def test_copyBioseqInstance(self):
128 self._bs.setHeader( "seq" )
129 self._bs.setSequence( "TGCGGA" )
130 obsBioseq = self._bs.copyBioseqInstance()
131 self.assertEquals(self._bs, obsBioseq)
132
133
134 def test_setFrameInfoOnHeader_without_description(self):
135 self._bs.setHeader( "seq" )
136 self._bs.setSequence( "TGCGGA" )
137 phase = -1
138 expHeader = "seq_-1"
139 self._bs.setFrameInfoOnHeader(phase)
140 self.assertEquals(expHeader, self._bs.header)
141
142
143 def test_setFrameInfoOnHeader_with_description(self):
144 self._bs.setHeader( "seq description" )
145 self._bs.setSequence( "TGCGGA" )
146 phase = -1
147 expHeader = "seq_-1 description"
148 self._bs.setFrameInfoOnHeader(phase)
149 self.assertEquals(expHeader, self._bs.header)
150
151
152 def test_read(self):
153 faFile = open("dummyFaFile.fa", "w")
154 faFile.write(">seq1 description1\n")
155 faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
156 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
157 faFile.close()
158 expBioseq = Bioseq()
159 expBioseq.header = "seq1 description1"
160 expBioseq.sequence = "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
161 obsBioseq = Bioseq()
162 faFile = open("dummyFaFile.fa", "r")
163 obsBioseq.read( faFile )
164 faFile.close()
165 os.remove("dummyFaFile.fa")
166 self.assertEquals(expBioseq, obsBioseq)
167
168
169 def test_read_WithEmptyFile(self):
170 faFile = open("dummyFaFile.fa", "w")
171 faFile.close()
172 expBioseq = Bioseq()
173 expBioseq.header = None
174 expBioseq.sequence = None
175 obsBioseq = Bioseq()
176 faFile = open("dummyFaFile.fa", "r")
177 obsBioseq.read( faFile )
178 faFile.close()
179 os.remove("dummyFaFile.fa")
180 self.assertEquals(expBioseq, obsBioseq)
181
182
183 def test_read_without_header(self):
184 faFile = open("dummyFaFile.fa", "w")
185 faFile.write("seq1 description1\n")
186 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
187 faFile.close()
188 expBioseq = Bioseq()
189 expBioseq.header = ""
190 expBioseq.sequence = ""
191 obsBioseq = Bioseq()
192 faFile = open("dummyFaFile.fa", "r")
193 obsBioseq.read( faFile )
194 faFile.close()
195 os.remove("dummyFaFile.fa")
196 self.assertEquals(expBioseq, obsBioseq)
197
198
199 def test_read_with_two_consecutive_headers(self):
200 faFile = open("dummyFaFile.fa", "w")
201 faFile.write(">seq1 description1\n")
202 faFile.write(">ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
203 faFile.close()
204 expBioseq = Bioseq()
205 expBioseq.header = "seq1 description1"
206 expBioseq.sequence = ""
207 obsBioseq = Bioseq()
208 faFile = open("dummyFaFile.fa", "r")
209 obsBioseq.read( faFile )
210 faFile.close()
211 os.remove("dummyFaFile.fa")
212 self.assertEquals(expBioseq, obsBioseq)
213
214
215 def test_read_withEmptyLines(self):
216 faFile = open("dummyFaFile.fa", "w")
217 faFile.write("\n")
218 faFile.write(">seq1 description1\n")
219 faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
220 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
221 faFile.write("\n")
222 faFile.close()
223
224 exp = Bioseq( "seq1 description1", "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" )
225
226 obs = Bioseq()
227 faFile = open("dummyFaFile.fa", "r")
228 obs.read( faFile )
229 faFile.close()
230
231 os.remove("dummyFaFile.fa")
232
233 self.assertEquals( exp, obs )
234
235 def test_read_with_70nt_by_line(self):
236 faFile = open("dummyFaFile.fa", "w")
237 faFile.write("\n")
238 faFile.write(">seq1 description1\n")
239 faFile.write("TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAA\n")
240 faFile.write("TGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT\n")
241 faFile.write("\n")
242 faFile.close()
243
244 exp = Bioseq( "seq1 description1", "TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAATGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT" )
245
246 obs = Bioseq()
247 faFile = open("dummyFaFile.fa", "r")
248 obs.read( faFile )
249 faFile.close()
250
251 os.remove("dummyFaFile.fa")
252
253 self.assertEquals( exp, obs )
254
255 def test_appendBioseqInFile(self):
256 obsFaFileName = "dummyFaFile.fa"
257 obsFaFile = open(obsFaFileName, "w")
258 obsFaFile.write(">seq1 description1\n")
259 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
260 obsFaFile.close()
261
262 bioseq = Bioseq()
263 bioseq.header = "seq2 description2"
264 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
265
266 expFaFileName = "dummyFaFile2.fa"
267 expFaFile = open(expFaFileName, "w")
268 expFaFile.write(">seq1 description1\n")
269 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
270 expFaFile.write(">seq2 description2\n")
271 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
272 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
273 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
274 expFaFile.close()
275
276 bioseq.appendBioseqInFile(obsFaFileName)
277 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
278 os.remove(obsFaFileName)
279 os.remove(expFaFileName)
280
281
282 def test_writeABioseqInAFastaFile(self):
283 obsFaFileName = "dummyFaFile.fa"
284 obsFaFile = open(obsFaFileName, "w")
285 obsFaFile.write(">seq1 description1\n")
286 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
287
288 bioseq = Bioseq()
289 bioseq.header = "seq2 description2"
290 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
291
292 expFaFileName = "dummyFaFile2.fa"
293 expFaFile = open(expFaFileName, "w")
294 expFaFile.write(">seq1 description1\n")
295 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
296 expFaFile.write(">seq2 description2\n")
297 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
298 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
299 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
300 expFaFile.close()
301
302 bioseq.writeABioseqInAFastaFile(obsFaFile)
303 obsFaFile.close()
304 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
305 os.remove(obsFaFileName)
306
307
308 def test_writeABioseqInAFastaFileWithOtherHeader(self):
309 obsFaFileName = "dummyFaFile.fa"
310 obsFaFile = open(obsFaFileName, "w")
311 obsFaFile.write(">seq1 description1\n")
312 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
313
314 bioseq = Bioseq()
315 bioseq.header = "seq2 description2"
316 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
317
318 expFaFileName = "dummyFaFile2.fa"
319 newHeader = "seq2 New header2"
320 expFaFile = open(expFaFileName, "w")
321 expFaFile.write(">seq1 description1\n")
322 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
323 expFaFile.write(">" + newHeader + "\n")
324 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
325 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
326 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
327 expFaFile.close()
328
329 bioseq.writeABioseqInAFastaFileWithOtherHeader(obsFaFile, newHeader)
330 obsFaFile.close()
331 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
332 os.remove(obsFaFileName)
333 os.remove(expFaFileName)
334
335
336 def test_writeSeqInFasta(self):
337 iBs = Bioseq()
338 iBs.header = "dummySeq"
339 iBs.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
340
341 expFaFile = "dummyExpFile.fa"
342 expFaFileHandler = open(expFaFile, "w")
343 expFaFileHandler.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
344 expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
345 expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCG\n")
346 expFaFileHandler.close()
347
348 obsFaFile = "dummyObsFile.fa"
349 obsFaFileHandler = open( obsFaFile, "w" )
350
351 iBs.writeSeqInFasta( obsFaFileHandler )
352
353 obsFaFileHandler.close()
354
355 self.assertTrue( FileUtils.are2FilesIdentical( expFaFile, obsFaFile ) )
356 os.remove(obsFaFile)
357 os.remove(expFaFile)
358
359
360 def test_subseq(self):
361 bioseq = Bioseq()
362 bioseq.header = "seq1 description1"
363 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
364 start = 10
365 end = 30
366 expSubBioseq = Bioseq()
367 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end)
368 expSubBioseq.sequence = bioseq.sequence[(start - 1) : end]
369 obsBioseq = bioseq.subseq(start, end)
370 self.assertEquals(expSubBioseq, obsBioseq)
371
372
373 def test_subseq_no_end(self):
374 bioseq = Bioseq()
375 bioseq.header = "seq1 description1"
376 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
377 start = 10
378 expSubBioseq = Bioseq()
379 expEnd = len(bioseq.sequence)
380 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(expEnd)
381 expSubBioseq.sequence = bioseq.sequence[(start - 1) : expEnd]
382 obsBioseq = bioseq.subseq(start)
383 self.assertEquals(expSubBioseq, obsBioseq)
384
385
386 def test_subseq_start_gt_end(self):
387 bioseq = Bioseq()
388 bioseq.header = "seq1 description1"
389 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
390 start = 30
391 end = 10
392 expSubBioseq = None
393 obsBioseq = bioseq.subseq(start, end)
394 self.assertEquals(expSubBioseq, obsBioseq)
395
396
397 def test_subseq_start_eq_end(self):
398 bioseq = Bioseq()
399 bioseq.header = "seq1 description1"
400 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
401 start = 10
402 end = 10
403 expSubBioseq = Bioseq()
404 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end)
405 expSubBioseq.sequence = bioseq.sequence[(start - 1) : end]
406 obsBioseq = bioseq.subseq(start, end)
407 self.assertEquals(expSubBioseq, obsBioseq)
408
409
410 def test_subseq_negative_start(self):
411 bioseq = Bioseq()
412 bioseq.header = "seq1 description1"
413 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
414 start = -10
415 end = 10
416 expSubBioseq = None
417 obsBioseq = bioseq.subseq(start, end)
418 self.assertEquals(expSubBioseq, obsBioseq)
419
420
421 def test_getNtFromPosition_1(self):
422 bioseq = Bioseq()
423 bioseq.header = "seq1 description1"
424 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
425 expNt = "G"
426 obsNt = bioseq.getNtFromPosition(1)
427 self.assertEquals(expNt, obsNt)
428
429
430 def test_getNtFromPosition_10(self):
431 bioseq = Bioseq()
432 bioseq.header = "seq1 description1"
433 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
434 expNt = "C"
435 obsNt = bioseq.getNtFromPosition(10)
436 self.assertEquals(expNt, obsNt)
437
438
439 def test_getNtFromPosition_last(self):
440 bioseq = Bioseq()
441 bioseq.header = "seq1 description1"
442 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
443 expNt = "G"
444 obsNt = bioseq.getNtFromPosition(146)
445 self.assertEquals(expNt, obsNt)
446
447
448 def test_getNtFromPosition_position_outside_range_0(self):
449 bioseq = Bioseq()
450 bioseq.header = "seq1 description1"
451 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
452 expNt = None
453 obsNt = bioseq.getNtFromPosition(0)
454 self.assertEquals(expNt, obsNt)
455
456
457 def test_getNtFromPosition_position_outside_range_negative(self):
458 bioseq = Bioseq()
459 bioseq.header = "seq1 description1"
460 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
461 expNt = None
462 obsNt = bioseq.getNtFromPosition(-10)
463 self.assertEquals(expNt, obsNt)
464
465
466 def test_getNtFromPosition_position_outside_range_positive(self):
467 bioseq = Bioseq()
468 bioseq.header = "seq1 description1"
469 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
470 expNt = None
471 obsNt = bioseq.getNtFromPosition(147)
472 self.assertEquals(expNt, obsNt)
473
474
475 def test_view(self):
476 obsFileName = "obsdummy_Bioseq_view"
477 expFileName = "expDummy_Bioseq_View"
478
479 bioseq = Bioseq()
480 bioseq.header = "seq1 description1"
481 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
482
483 obsFile = open(obsFileName,"w")
484 expFile = open(expFileName, "w")
485
486 expFile.write ( ">seq1 description1\n")
487 expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
488 expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
489 expFile.write ( "ATAGCAGACGCATATTATATTGCGCG\n")
490
491 stdoutRef = sys.stdout
492 sys.stdout = obsFile
493 bioseq.view()
494 obsFile.close()
495 expFile.close()
496 self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
497 sys.stdout = stdoutRef
498 os.remove ( obsFileName )
499 os.remove ( expFileName )
500
501
502 def test_view_with_l(self):
503 obsFileName = "obsdummy_Bioseq_view"
504 expFileName = "expDummy_Bioseq_View"
505 bioseq = Bioseq()
506 bioseq.header = "seq1 description1"
507 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
508 obsFile = open(obsFileName,"w")
509 expFile = open(expFileName, "w")
510 expFile.write ( ">seq1 description1\n")
511 expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
512 expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
513 stdoutRef = sys.stdout
514 sys.stdout = obsFile
515 bioseq.view(120)
516 obsFile.close()
517 expFile.close()
518 self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
519 sys.stdout = stdoutRef
520 os.remove ( obsFileName )
521 os.remove ( expFileName )
522
523
524 def test_getLength(self):
525 bioseq = Bioseq()
526 bioseq.header = "seq1 description1"
527 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
528 expLength = 146
529 obsLength = bioseq.getLength()
530 self.assertEquals(expLength, obsLength)
531
532
533 def test_getLength_empty_seq(self):
534 bioseq = Bioseq()
535 expLength = 0
536 obsLength = bioseq.getLength()
537 self.assertEquals(expLength, obsLength)
538
539
540 def test_getLength_WithoutN(self):
541 bioseq = Bioseq()
542 bioseq.header = "seq1 description1"
543 bioseq.sequence = "GCGANCGCTGCTTTATTAAGCGCTAGATGNNNNNNNNNNNNNNNCGACGCTGCATTTATTAAGCGCTAGCGATTATANNNNNNNNNTAGCAGACGCATATTATATTGCGCGATGCGACGCTGCTTTATTANAGCGCTAGCGNNATTATATAGCANGACGCATATTATATTGCGCG"
544 expLength = 146
545 obsLength = bioseq.getLength(False)
546 self.assertEquals(expLength, obsLength)
547
548
549 def test_getLength_WithoutN_empty_seq(self):
550 bioseq = Bioseq()
551 expLength = 0
552 obsLength = bioseq.getLength(False)
553 self.assertEquals(expLength, obsLength)
554
555
556 def test_countNt(self):
557 bioseq = Bioseq()
558 bioseq.header = "seq1 description1"
559 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
560 expCount = 3
561 obsCount = bioseq.countNt('N')
562 self.assertEquals(expCount, obsCount)
563
564
565 def test_countNt_withCharacterNotExisting(self):
566 bioseq = Bioseq()
567 bioseq.header = "seq1 description1"
568 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
569 expCount = 0
570 obsCount = bioseq.countNt('W')
571 self.assertEquals(expCount, obsCount)
572
573
574 def test_countAllNt(self):
575 bioseq = Bioseq()
576 bioseq.header = "seq1 description1"
577 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
578 dExpCount = {'A': 34, 'C': 31, 'T': 43, 'G': 35, 'N': 3}
579 dObsCount = bioseq.countAllNt()
580 self.assertEquals(dExpCount, dObsCount)
581
582
583 def test_occ_word_size_1(self):
584 bioseq = Bioseq()
585 bioseq.header = "seq1 description1"
586 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
587 dExpOccWord = {'A': 34, 'C': 31, 'T': 43, 'G': 35}
588 ExpNbWord = 143
589 dObsOccWord, ObsNbWord = bioseq.occ_word(1)
590 self.assertEquals(dExpOccWord, dObsOccWord)
591 self.assertEquals(ExpNbWord, ObsNbWord)
592
593
594 def test_occ_word_size_0(self):
595 bioseq = Bioseq()
596 bioseq.header = "seq1 description1"
597 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
598 dExpOccWord = {}
599 ExpNbWord = 0
600 dObsOccWord, ObsNbWord = bioseq.occ_word(0)
601 self.assertEquals(dExpOccWord, dObsOccWord)
602 self.assertEquals(ExpNbWord, ObsNbWord)
603
604
605 def test_occ_word_size_n(self):
606 bioseq = Bioseq()
607 bioseq.header = "seq1 description1"
608 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
609 dExpOccWord = {'ACC': 0, 'ATG': 2, 'AAG': 3, 'AAA': 0, 'ATC': 0, 'AAC': 0, 'ATA': 8, 'AGG': 0, 'CCT': 0, 'CTC': 0, 'AGC': 8, 'ACA': 0, 'AGA': 2, 'CAT': 3, 'AAT': 0, 'ATT': 9, 'CTG': 3, 'CTA': 3, 'ACT': 0, 'CAC': 0, 'ACG': 2, 'CAA': 0, 'AGT': 0, 'CAG': 2, 'CCG': 0, 'CCC': 0, 'CTT': 3, 'TAT': 13, 'GGT': 0, 'TGT': 0, 'CGA': 3, 'CCA': 0, 'TCT': 0, 'GAT': 3, 'CGG': 0, 'TTT': 3, 'TGC': 7, 'GGG': 0, 'TAG': 5, 'GGA': 0, 'TAA': 3, 'GGC': 0, 'TAC': 0, 'TTC': 0, 'TCG': 0, 'TTA': 10, 'TTG': 2, 'TCC': 0, 'GAA': 0, 'TGG': 0, 'GCA': 5, 'GTA': 0, 'GCC': 0, 'GTC': 0, 'GCG': 12, 'GTG': 0, 'GAG': 0, 'GTT': 0, 'GCT': 9, 'TGA': 0, 'GAC': 2, 'CGT': 0, 'TCA': 0, 'CGC': 10}
610 ExpNbWord = 135
611 dObsOccWord, ObsNbWord = bioseq.occ_word(3)
612 self.assertEquals(dExpOccWord, dObsOccWord)
613 self.assertEquals(ExpNbWord, ObsNbWord)
614
615
616 def test_freq_word_size_1(self):
617 bioseq = Bioseq()
618 bioseq.header = "seq1 description1"
619 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
620 dExpFreqWord = {'A': 0.23776223776223776, 'C': 0.21678321678321677, 'T': 0.30069930069930068, 'G': 0.24475524475524477}
621 dObsFreqWord = bioseq.freq_word(1)
622 self.assertEquals(dExpFreqWord, dObsFreqWord)
623
624
625 def test_freq_word_size_0(self):
626 bioseq = Bioseq()
627 bioseq.header = "seq1 description1"
628 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
629 dExpFreqWord = {}
630 dObsFreqWord = bioseq.freq_word(0)
631 self.assertEquals(dExpFreqWord, dObsFreqWord)
632
633
634 def test_freq_word_size_n(self):
635 bioseq = Bioseq()
636 bioseq.header = "seq1 description1"
637 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
638 dExpFreqWord = {'ACC': 0.0, 'ATG': 0.014814814814814815, 'AAG': 0.022222222222222223, 'AAA': 0.0, 'ATC': 0.0, 'AAC': 0.0, 'ATA': 0.059259259259259262, 'AGG': 0.0, 'CCT': 0.0, 'CTC': 0.0, 'AGC': 0.059259259259259262, 'ACA': 0.0, 'AGA': 0.014814814814814815, 'CAT': 0.022222222222222223, 'AAT': 0.0, 'ATT': 0.066666666666666666, 'CTG': 0.022222222222222223, 'CTA': 0.022222222222222223, 'ACT': 0.0, 'CAC': 0.0, 'ACG': 0.014814814814814815, 'CAA': 0.0, 'AGT': 0.0, 'CAG': 0.014814814814814815, 'CCG': 0.0, 'CCC': 0.0, 'TAT': 0.096296296296296297, 'GGT': 0.0, 'TGT': 0.0, 'CGA': 0.022222222222222223, 'CCA': 0.0, 'TCT': 0.0, 'GAT': 0.022222222222222223, 'CGG': 0.0, 'CTT': 0.022222222222222223, 'TGC': 0.05185185185185185, 'GGG': 0.0, 'TAG': 0.037037037037037035, 'GGA': 0.0, 'TAA': 0.022222222222222223, 'GGC': 0.0, 'TAC': 0.0, 'TTC': 0.0, 'TCG': 0.0, 'TTT': 0.022222222222222223, 'TTG': 0.014814814814814815, 'TCC': 0.0, 'GAA': 0.0, 'TGG': 0.0, 'GCA': 0.037037037037037035, 'GTA': 0.0, 'GCC': 0.0, 'GTC': 0.0, 'TGA': 0.0, 'GCG': 0.088888888888888892, 'GTG': 0.0, 'GAG': 0.0, 'GTT': 0.0, 'GCT': 0.066666666666666666, 'TTA': 0.07407407407407407, 'GAC': 0.014814814814814815, 'CGT': 0.0, 'TCA': 0.0, 'CGC': 0.07407407407407407}
639 dObsFreqWord = bioseq.freq_word(3)
640 self.assertEquals(dExpFreqWord, dObsFreqWord)
641
642
643 def test_findORF_no_ORF_in_sequence (self):
644 bioseq = Bioseq()
645 bioseq.header = "seq1 description1"
646 bioseq.sequence = "GCGNCGCTGCTTTATT"
647 expORF = {0:[],1:[],2:[]}
648 obsORF = bioseq.findORF()
649 self.assertEquals (expORF,obsORF)
650
651
652 def test_findORF_one_ORF_in_first_phase(self):
653 bioseq = Bioseq()
654 bioseq.header = "seq1 description1"
655 bioseq.sequence = "TAAGCGNCGCTGCTTTATT"
656 expORF = {0:[0],1:[],2:[]}
657 obsORF = bioseq.findORF()
658 self.assertEquals (expORF,obsORF)
659
660
661 def test_findORF_three_ORF_in_first_phase(self):
662 bioseq = Bioseq()
663 bioseq.header = "seq1 description1"
664 bioseq.sequence = "TAAGCGTAGNCGTGACTGCTTTATT"
665 expORF = {0:[0,6,12],1:[],2:[]}
666 obsORF = bioseq.findORF()
667 self.assertEquals (expORF,obsORF)
668
669
670 def test_findORF_two_ORF_in_first_phase_one_ORF_in_second_phase(self):
671 bioseq = Bioseq()
672 bioseq.header = "seq1 description1"
673 bioseq.sequence = "TAAGTAGAGNCGTGACTGCTTTATT"
674 expORF = {0:[0,12],1:[4],2:[]}
675 obsORF = bioseq.findORF()
676 self.assertEquals (expORF,obsORF)
677
678
679 def test_findORF_two_ORF_in_first_phase_three_ORF_in_second_phase(self):
680 bioseq = Bioseq()
681 bioseq.header = "seq1 description1"
682 bioseq.sequence = "TAAGTAGAGNCGTGACTGATAGTATT"
683 expORF = {0:[0,12],1:[4,16,19],2:[]}
684 obsORF = bioseq.findORF()
685 self.assertEquals (expORF,obsORF)
686
687
688 def test_findORF_one_ORF_in_second_phase_three_ORF_in_third_phase(self):
689 bioseq = Bioseq()
690 bioseq.header = "seq1 description1"
691 bioseq.sequence = "AATATTAGTGGAGTAGTTGATGATTTT"
692 expORF = {0:[], 1:[13], 2:[5,17,20]}
693 obsORF = bioseq.findORF()
694 self.assertEquals (expORF,obsORF)
695
696
697 def test_findORF_three_ORF_in_second_phase_one_ORF_in_third_phase(self):
698 bioseq = Bioseq()
699 bioseq.header = "seq1 description1"
700 bioseq.sequence = "TTTGAAGTGGAGGAGTTGATGATTTTAAT"
701 expORF = {0:[], 1:[16, 19, 25], 2:[2]}
702 obsORF = bioseq.findORF()
703 self.assertEquals (expORF,obsORF)
704
705
706 def test_upCase(self):
707 bioseq = Bioseq()
708 bioseq.header = "seq description"
709 bioseq.sequence = "taattcggcct"
710 expSeq = "TAATTCGGCCT"
711 bioseq.upCase()
712 obsSeq = bioseq.sequence
713 self.assertEquals( expSeq, obsSeq )
714
715
716 def test_lowCase(self):
717 bioseq = Bioseq()
718 bioseq.header = "seq description"
719 bioseq.sequence = "TAATTCGGCCT"
720 expSeq = "taattcggcct"
721 bioseq.lowCase()
722 obsSeq = bioseq.sequence
723 self.assertEquals( expSeq, obsSeq )
724
725
726 def test_getClusterID(self):
727 bioseq = Bioseq()
728 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
729 bioseq.sequence = "TAATTCGGCCT"
730 expID = "0"
731 obsID = bioseq.getClusterID()
732 self.assertEquals( expID, obsID )
733
734
735 def test_getGroupID(self):
736 bioseq = Bioseq()
737 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
738 bioseq.sequence = "TAATTCGGCCT"
739 expID = "2"
740 obsID = bioseq.getGroupID()
741 self.assertEquals( expID, obsID )
742
743
744 def test_getHeaderFullSeq(self):
745 bioseq = Bioseq()
746 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
747 bioseq.sequence = "TAATTCGGCCT"
748 expHeader = "chunk1"
749 obsHeader = bioseq.getHeaderFullSeq()
750 self.assertEquals( expHeader, obsHeader )
751
752
753 def test_getFragStrand_plus_strand(self):
754 bioseq = Bioseq()
755 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
756 bioseq.sequence = "TAATTCGGCCT"
757 expStrand = '+'
758 obsStrand = bioseq.getFragStrand()
759 self.assertEquals(expStrand, obsStrand)
760
761
762 def test_getFragStrand_minus_strand(self):
763 bioseq = Bioseq()
764 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74624..74091"
765 bioseq.sequence = "TAATTCGGCCT"
766 expStrand = '-'
767 obsStrand = bioseq.getFragStrand()
768 self.assertEquals(expStrand, obsStrand)
769
770
771 def test_getATGCNFromIUPAC_A(self):
772 bioseq = Bioseq()
773 expNucl = 'A'
774 obsNucl = bioseq.getATGCNFromIUPAC('A')
775 self.assertEquals(expNucl, obsNucl)
776
777
778 def test_getATGCNFromIUPAC_T(self):
779 bioseq = Bioseq()
780 expNucl = 'T'
781 obsNucl = bioseq.getATGCNFromIUPAC('T')
782 self.assertEquals(expNucl, obsNucl)
783
784
785 def test_getATGCNFromIUPAC_C(self):
786 bioseq = Bioseq()
787 expNucl = 'C'
788 obsNucl = bioseq.getATGCNFromIUPAC('C')
789 self.assertEquals(expNucl, obsNucl)
790
791
792 def test_getATGCNFromIUPAC_G(self):
793 bioseq = Bioseq()
794 expNucl = 'G'
795 obsNucl = bioseq.getATGCNFromIUPAC('G')
796 self.assertEquals(expNucl, obsNucl)
797
798
799 def test_getATGCNFromIUPAC_N(self):
800 bioseq = Bioseq()
801 expNucl = 'N'
802 obsNucl = bioseq.getATGCNFromIUPAC('N')
803 self.assertEquals(expNucl, obsNucl)
804
805
806 def test_getATGCNFromIUPAC_U(self):
807 bioseq = Bioseq()
808 expNucl = 'T'
809 obsNucl = bioseq.getATGCNFromIUPAC('U')
810 self.assertEquals(expNucl, obsNucl)
811
812
813 def test_getATGCNFromIUPAC_R(self):
814 bioseq = Bioseq()
815 expNucl1 = 'A'
816 expNucl2 = 'G'
817 obsNucl = bioseq.getATGCNFromIUPAC('R')
818 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
819
820
821 def test_getATGCNFromIUPAC_Y(self):
822 bioseq = Bioseq()
823 expNucl1 = 'C'
824 expNucl2 = 'T'
825 obsNucl = bioseq.getATGCNFromIUPAC('Y')
826 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
827
828
829 def test_getATGCNFromIUPAC_M(self):
830 bioseq = Bioseq()
831 expNucl1 = 'C'
832 expNucl2 = 'A'
833 obsNucl = bioseq.getATGCNFromIUPAC('M')
834 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
835
836
837 def test_getATGCNFromIUPAC_K(self):
838 bioseq = Bioseq()
839 expNucl1 = 'T'
840 expNucl2 = 'G'
841 obsNucl = bioseq.getATGCNFromIUPAC('K')
842 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
843
844
845 def test_getATGCNFromIUPAC_W(self):
846 bioseq = Bioseq()
847 expNucl1 = 'T'
848 expNucl2 = 'A'
849 obsNucl = bioseq.getATGCNFromIUPAC('W')
850 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
851
852
853 def test_getATGCNFromIUPAC_S(self):
854 bioseq = Bioseq()
855 expNucl1 = 'C'
856 expNucl2 = 'G'
857 obsNucl = bioseq.getATGCNFromIUPAC('S')
858 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
859
860
861 def test_getATGCNFromIUPAC_B(self):
862 bioseq = Bioseq()
863 expNucl1 = 'C'
864 expNucl2 = 'T'
865 expNucl3 = 'G'
866 obsNucl = bioseq.getATGCNFromIUPAC('B')
867 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
868
869
870 def test_getATGCNFromIUPAC_D(self):
871 bioseq = Bioseq()
872 expNucl1 = 'A'
873 expNucl2 = 'T'
874 expNucl3 = 'G'
875 obsNucl = bioseq.getATGCNFromIUPAC('D')
876 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
877
878
879 def test_getATGCNFromIUPAC_H(self):
880 bioseq = Bioseq()
881 expNucl1 = 'C'
882 expNucl2 = 'T'
883 expNucl3 = 'A'
884 obsNucl = bioseq.getATGCNFromIUPAC('H')
885 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
886
887
888 def test_getATGCNFromIUPAC_V(self):
889 bioseq = Bioseq()
890 expNucl1 = 'C'
891 expNucl2 = 'A'
892 expNucl3 = 'G'
893 obsNucl = bioseq.getATGCNFromIUPAC('V')
894 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
895
896
897 def test_getATGCNFromIUPAC_Z(self):
898 bioseq = Bioseq()
899 expNucl = 'N'
900 obsNucl = bioseq.getATGCNFromIUPAC('Z')
901 self.assertEquals(expNucl, obsNucl)
902
903
904 def test_partialIUPAC(self):
905 bioseq = Bioseq()
906 bioseq.sequence = "ATGCNRATGCN"
907 expSequence1 = "ATGCNAATGCN"
908 expSequence2 = "ATGCNGATGCN"
909 bioseq.partialIUPAC()
910 obsSequence = bioseq.sequence
911 self.assertTrue(expSequence1 == obsSequence or expSequence2 == obsSequence)
912
913
914 def test_checkEOF(self):
915 bioseq = Bioseq()
916 bioseq.sequence = "ATGCNRATGCN\rATGCAAT\rTATA\r"
917 bioseq.checkEOF()
918 obsSequence = bioseq.sequence
919 expSequence = "ATGCNRATGCNATGCAATTATA"
920
921 self.assertEquals(expSequence, obsSequence)
922
923
924 def test_getLMapWhithoutGap(self):
925 iBioseq = Bioseq()
926 iBioseq.header = "header"
927 iBioseq.sequence = "ATGC-RA-GCT"
928 obsLMap = iBioseq.getLMapWhithoutGap()
929 expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]
930
931 self.assertEquals(expLMap, obsLMap)
932
933
934 def test_getLMapWhithoutGap_seqStartsWithGap(self):
935 iBioseq = Bioseq()
936 iBioseq.header = "header"
937 iBioseq.sequence = "-TGC-RA-GCT"
938 obsLMap = iBioseq.getLMapWhithoutGap()
939 expLMap = [Map( "header_subSeq1", "header", 2, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]
940
941 self.assertEquals(expLMap, obsLMap)
942
943
944 def test_getLMapWhithoutGap_seqEndsWithGap(self):
945 iBioseq = Bioseq()
946 iBioseq.header = "header"
947 iBioseq.sequence = "ATGC-RA-GC-"
948 obsLMap = iBioseq.getLMapWhithoutGap()
949 expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 10 )]
950
951 self.assertEquals(expLMap, obsLMap)
952
953 def test_getGCpercentage_onlyATGC( self ):
954 iBs = Bioseq( "seq", "TGCAGCT" )
955 exp = 100 * 4 / 7.0
956 obs = iBs.getGCpercentage()
957 self.assertEqual( exp, obs )
958
959 def test_getGCpercentageInSequenceWithoutCountNInLength( self ):
960 iBs = Bioseq( "seq", "TGCAGCTNNNNN" )
961 exp = 100 * 4 / 7.0
962 obs = iBs.getGCpercentageInSequenceWithoutCountNInLength()
963 self.assertEqual( exp, obs )
964
965 def test_get5PrimeFlank(self):
966 bs = Bioseq( "line1", "AACTTTCCAGAA" )
967 position = 7
968 obsFlank = bs.get5PrimeFlank(position, 3)
969 expFlank = "TTT"
970 self.assertEquals(expFlank, obsFlank)
971
972 def test_get5PrimeFlank_flank_length_truncated(self):
973 bs = Bioseq( "line1", "AACTTTCCAGAA" )
974 position = 7
975 obsFlank = bs.get5PrimeFlank(position, 15)
976 expFlank = "AACTTT"
977 self.assertEquals(expFlank, obsFlank)
978
979 def test_get5PrimeFlank_flank_of_first_base(self):
980 bs = Bioseq( "line1", "AACTTTCCAGAA" )
981 position = 1
982 obsFlank = bs.get5PrimeFlank(position, 15)
983 expFlank = ""
984 self.assertEquals(expFlank, obsFlank)
985
986 def test_get3PrimeFlank(self):
987 bs = Bioseq( "line1", "AACTTTCCAGAA" )
988 position = 7
989 obsFlank = bs.get3PrimeFlank(position, 3)
990 expFlank = "CAG"
991 self.assertEquals(expFlank, obsFlank)
992
993 def test_get3PrimeFlank_flank_length_truncated(self):
994 bs = Bioseq( "line1", "AACTTTCCAGAA" )
995 position = 7
996 obsFlank = bs.get3PrimeFlank(position, 15)
997 expFlank = "CAGAA"
998 self.assertEquals(expFlank, obsFlank)
999
1000 def test_get3PrimeFlank_flank_of_last_base(self):
1001 bs = Bioseq( "line1", "AACTTTCCAGAA" )
1002 position = 12
1003 obsFlank = bs.get3PrimeFlank(position, 15)
1004 expFlank = ""
1005 self.assertEquals(expFlank, obsFlank)
1006
1007 def test_get3PrimeFlank_polymLength_different_of_1(self):
1008 bs = Bioseq( "line1", "AACTTTCCAGAA" )
1009 position = 7
1010 obsFlank = bs.get3PrimeFlank(position, 3, 2)
1011 expFlank = "AGA"
1012 self.assertEquals(expFlank, obsFlank)
1013
1014 test_suite = unittest.TestSuite()
1015 test_suite.addTest( unittest.makeSuite( Test_Bioseq ) )
1016 if __name__ == "__main__":
1017 unittest.TextTestRunner(verbosity=2).run( test_suite )