comparison commons/core/seq/test/Test_BioseqDB.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31
32 import unittest
33 import os
34 import time
35 from commons.core.seq.BioseqDB import BioseqDB
36 from commons.core.seq.Bioseq import Bioseq
37 from commons.core.utils.FileUtils import FileUtils
38 from commons.core.coord.Map import Map
39
40
41 class Test_BioseqDB( unittest.TestCase ):
42
43 def setUp( self ):
44 self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
45
46
47 def tearDown( self ):
48 if os._exists("dummyBioseqDB.fa"):
49 os.remove("dummyBioseqDB.fa")
50
51
52 def test__eq__(self):
53 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
54 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
55 expBioseqDB = BioseqDB()
56 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
57
58 iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
59 iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
60 obsBioseqDB = BioseqDB()
61 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
62
63 self.assertEquals( expBioseqDB, obsBioseqDB )
64
65
66 def test__eq__instances_with_different_header(self):
67 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
68 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
69 expBioseqDB = BioseqDB()
70 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
71
72 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
73 iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" )
74 obsBioseqDB = BioseqDB()
75 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
76
77 self.assertNotEquals( expBioseqDB, obsBioseqDB )
78
79
80 def test__eq__instances_with_different_sequences(self):
81 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
82 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
83 expBioseqDB = BioseqDB()
84 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
85
86 iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
87 iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )
88 obsBioseqDB = BioseqDB()
89 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
90
91 self.assertNotEquals( expBioseqDB, obsBioseqDB )
92
93
94 def test__eq__instances_with_different_sequences_and_headers(self):
95 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
96 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
97 expBioseqDB = BioseqDB()
98 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
99
100 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
101 iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )
102 obsBioseqDB = BioseqDB()
103 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
104
105 self.assertNotEquals( expBioseqDB, obsBioseqDB )
106
107
108 def test__eq__instances_with_different_sizeOfBioseq(self):
109 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
110 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
111 expBioseqDB = BioseqDB()
112 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
113
114 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
115 obsBioseqDB = BioseqDB()
116 obsBioseqDB.setData( [ iBioseq3 ] )
117
118 self.assertNotEquals( expBioseqDB, obsBioseqDB )
119
120
121 def test_setName (self):
122 expName = "myDataBank"
123 iBioseqDB = BioseqDB()
124 self.assertEquals (iBioseqDB.name, "")
125
126 iBioseqDB.setName (expName)
127 obsName = iBioseqDB.name
128 self.assertEquals (expName, obsName)
129
130
131 def test_read(self):
132 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCT")
133 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGC")
134 expBioseqDB = BioseqDB()
135 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
136
137 faFN = "dummyFaFile.fa"
138 faF = open( faFN, "w" )
139 faF.write(">consensus1\n")
140 faF.write("GAGATGGCTCATGGAGTACCTGCCT\n")
141 faF.write(">consensus2\n")
142 faF.write("GAGATGGCTCATGGAGTACCGC\n")
143 faF.close()
144
145 faF = open( faFN, "r" )
146 obsBioseqDB = BioseqDB()
147 obsBioseqDB.read( faF )
148 faF.close()
149 os.remove( faFN )
150 self.assertEquals( expBioseqDB, obsBioseqDB )
151
152
153 def test_write(self):
154 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
155 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
156 iBioseqDB = BioseqDB()
157 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
158
159 expFaFileName = "dummyFaFile.fa"
160 expFaFile = open( expFaFileName, "w" )
161 expFaFile.write(">consensus1\n")
162 expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
163 expFaFile.write("ATGGAGTACCTGCCT\n")
164 expFaFile.write(">consensus2\n")
165 expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
166 expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
167 expFaFile.close()
168
169 obsFaFileName = "obsDummyFastaFile.fa"
170 obsFaFile = open( obsFaFileName, "w" )
171 iBioseqDB.write( obsFaFile )
172 obsFaFile.close()
173
174 self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) )
175 os.remove( expFaFileName )
176 os.remove( obsFaFileName )
177
178
179 def test_save(self):
180 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
181 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
182 iBioseqDB = BioseqDB()
183 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
184
185 expFaFileName = "dummyFaFile.fa"
186 expFaFile = open( expFaFileName, "w" )
187 expFaFile.write(">consensus1\n")
188 expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
189 expFaFile.write("ATGGAGTACCTGCCT\n")
190 expFaFile.write(">consensus2\n")
191 expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
192 expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
193 expFaFile.close()
194
195 obsFaFileName = "obsDummyFastaFile.fa"
196 iBioseqDB.save( obsFaFileName )
197
198 self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) )
199 os.remove( expFaFileName )
200 os.remove( obsFaFileName )
201
202
203 def test_load(self):
204 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
205 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
206 expBioseqDB = BioseqDB()
207 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
208
209 FaFileName = "dummyFaFile.fa"
210 FaFile = open( FaFileName, "w" )
211 FaFile.write(">consensus1\n")
212 FaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
213 FaFile.write("ATGGAGTACCTGCCT\n")
214 FaFile.write(">consensus2\n")
215 FaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
216 FaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
217 FaFile.close()
218
219 obsBioseqDB = BioseqDB()
220 obsBioseqDB.load( FaFileName )
221
222 self.assertEquals( expBioseqDB, obsBioseqDB )
223 os.remove( FaFileName )
224
225
226 def test_reverse( self ):
227 iBioseq1 = Bioseq( "seq1", "ATTG" )
228 iBioseq2 = Bioseq( "seq2", "CGAAT" )
229 expBioseqDB = BioseqDB()
230 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
231
232 iBioseq3 = Bioseq( "seq1", "GTTA" )
233 iBioseq4 = Bioseq( "seq2", "TAAGC" )
234 obsBioseqDB = BioseqDB()
235 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
236 obsBioseqDB.reverse()
237 self.assertEquals( expBioseqDB, obsBioseqDB )
238
239
240 def test_complement( self ):
241 iBioseq1 = Bioseq( "seq1", "ATTG" )
242 iBioseq2 = Bioseq( "seq2", "CGAAT" )
243 expBioseqDB = BioseqDB()
244 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
245
246 iBioseq3 = Bioseq( "seq1", "TAAC" )
247 iBioseq4 = Bioseq( "seq2", "GCTTA" )
248 obsBioseqDB = BioseqDB()
249 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
250
251 obsBioseqDB.complement()
252 self.assertEquals( expBioseqDB, obsBioseqDB )
253
254
255 def test_reverseComplement( self ):
256 iBioseq1 = Bioseq( "seq1", "ATTG" )
257 iBioseq2 = Bioseq( "seq2", "CGAAT" )
258 expBioseqDB = BioseqDB()
259 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
260
261 iBioseq3 = Bioseq( "seq1", "CAAT" )
262 iBioseq4 = Bioseq( "seq2", "ATTCG" )
263 obsBioseqDB = BioseqDB()
264 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
265
266 obsBioseqDB.reverseComplement()
267 self.assertEquals( expBioseqDB, obsBioseqDB )
268
269
270 def test_setData(self):
271 iBioseq1 = Bioseq( "seq1", "ATTG" )
272 iBioseq2 = Bioseq( "seq2", "CGAAT" )
273 iBioseq3 = Bioseq( "seq3", "CAAT" )
274 iBioseq4 = Bioseq( "seq4", "ATTCG" )
275
276 lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4]
277 expBioseqDB = BioseqDB()
278 expBioseqDB.db = lBioseq
279
280 iBioseq5 = Bioseq( "seq1", "ATTG" )
281 iBioseq6 = Bioseq( "seq2", "CGAAT" )
282 iBioseq7 = Bioseq( "seq3", "CAAT" )
283 iBioseq8 = Bioseq( "seq4", "ATTCG" )
284
285 lBioseq2 = [iBioseq5, iBioseq6, iBioseq7, iBioseq8]
286 obsBioseqDB = BioseqDB()
287 obsBioseqDB.setData(lBioseq2)
288
289 self.assertEquals(expBioseqDB, obsBioseqDB)
290
291
292 def test_reset( self ):
293 iBioseq1 = Bioseq( "seq1", "ATTG" )
294 iBioseq2 = Bioseq( "seq2", "CGAAT" )
295 iBioseq3 = Bioseq( "seq3", "CAAT" )
296 iBioseq4 = Bioseq( "seq4", "ATTCG" )
297
298 lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4]
299 obsBioseqDB = BioseqDB()
300 obsBioseqDB.setData(lBioseq)
301 obsBioseqDB.reset()
302
303 expBioseqDB = BioseqDB()
304
305 self.assertEquals(expBioseqDB, obsBioseqDB)
306
307
308 def testCleanGap(self):
309 iBioseq1 = Bioseq( "seq1", "ATTG" )
310 iBioseq2 = Bioseq( "seq2", "CGAAT" )
311 expBioseqDB = BioseqDB()
312 expBioseqDB.setData([iBioseq1, iBioseq2])
313
314 iBioseq3 = Bioseq( "seq1", "AT-----TG" )
315 iBioseq4 = Bioseq( "seq2", "CGAA----T" )
316
317 obsBioseqDB = BioseqDB()
318 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
319 obsBioseqDB.cleanGap()
320
321 self.assertEquals(expBioseqDB, obsBioseqDB)
322
323
324 def testCleanGap_on_empty_db(self):
325 expBioseqDB = BioseqDB()
326
327 obsBioseqDB = BioseqDB()
328 obsBioseqDB.cleanGap()
329
330 self.assertEquals(expBioseqDB, obsBioseqDB)
331
332
333 def testCleanGap_on_size_one_db(self):
334 iBioseq1 = Bioseq( "seq1", "ATTG" )
335 expBioseqDB = BioseqDB()
336 expBioseqDB.setData([iBioseq1])
337
338 iBioseq2 = Bioseq( "seq1", "AT-----TG" )
339 obsBioseqDB = BioseqDB()
340 obsBioseqDB.setData([iBioseq2])
341
342 obsBioseqDB.cleanGap()
343
344 self.assertEquals(expBioseqDB, obsBioseqDB)
345
346
347 def test_add_to_a_empty_bioseqDB_instance (self):
348 sHeader = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
349 sHeader += "(At4g29080) mRNA, complete cds."
350
351 expDictIdx = { sHeader : 0}
352
353 sHeaderRenamed = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
354 sHeaderRenamed += "(At4g29080)_mRNA-_complete_cds."
355 expDictIdxRenamed = {sHeaderRenamed : 0}
356
357 iBioseq1 = Bioseq( sHeader, "ATTG" )
358 obsBioseqDB = BioseqDB()
359 obsBioseqDB.add(iBioseq1)
360
361 obsDictIdx = obsBioseqDB.idx
362 obsDictIdxRenamed = obsBioseqDB.idx_renamed
363
364 self.assertEquals(expDictIdx,obsDictIdx)
365 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
366
367
368 def test_add_to_a_size_one_bioseqDB_instance (self):
369 sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
370 sHeader1 += "(At4g29080) mRNA, complete cds."
371
372 sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
373 sHeader2 += "(At4g29080) mRNA, complete cds."
374
375 expDictIdx = { sHeader1 : 0, sHeader2 : 1}
376
377 sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
378 sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds."
379
380 sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
381 sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
382
383 expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1}
384
385 iBioseq1 = Bioseq( sHeader1, "ATTG" )
386 iBioseq2 = Bioseq( sHeader2, "ATTG" )
387
388 obsBioseqDB = BioseqDB()
389 obsBioseqDB.setData([ iBioseq1])
390 obsBioseqDB.add(iBioseq2)
391
392 obsDictIdx = obsBioseqDB.idx
393 obsDictIdxRenamed = obsBioseqDB.idx_renamed
394
395 self.assertEquals(expDictIdx,obsDictIdx)
396 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
397
398
399 def test_add_to_a_size_two_bioseqDB_instance (self):
400 sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
401 sHeader1 += "(At4g29080) mRNA, complete cds."
402
403 sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
404 sHeader2 += "(At4g29080) mRNA, complete cds."
405
406 sHeader3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)"
407 sHeader3 += "(At4g29080) mRNA, complete cds."
408 expDictIdx = { sHeader1 : 0, sHeader2 : 1, sHeader3 : 2}
409
410 sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
411 sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds."
412
413 sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
414 sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
415
416 sHeaderRenamed3 = "embl-AF332604-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
417 sHeaderRenamed3 += "(At4g29080)_mRNA-_complete_cds."
418 expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1, sHeaderRenamed3 :2}
419
420 iBioseq1 = Bioseq( sHeader1, "ATTG" )
421 iBioseq2 = Bioseq( sHeader2, "ATTG" )
422 iBioseq3 = Bioseq( sHeader3, "ATTG" )
423
424 obsBioseqDB = BioseqDB()
425 obsBioseqDB.setData([ iBioseq1, iBioseq2 ])
426 obsBioseqDB.add(iBioseq3)
427
428 obsDictIdx = obsBioseqDB.idx
429 obsDictIdxRenamed = obsBioseqDB.idx_renamed
430
431 self.assertEquals(expDictIdx,obsDictIdx)
432 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
433
434
435 def test__getitem__(self):
436 iBioseq1 = Bioseq("seq1","ATTG")
437 iBioseq2 = Bioseq("seq2","CGAAT")
438 iBioseqDB = BioseqDB()
439 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
440 expBioseq = Bioseq("seq2","CGAAT")
441 obsBioseq = iBioseqDB[1]
442
443 self.assertEquals(expBioseq, obsBioseq)
444
445
446 def test_getSize(self):
447 expSize = 4
448
449 iBioseq1 = Bioseq( "seq1", "ATTG" )
450 iBioseq2 = Bioseq( "seq2", "CGAAT" )
451 iBioseq3 = Bioseq( "seq3", "AT-----TG" )
452 iBioseq4 = Bioseq( "seq4", "CGAA----T" )
453
454 obsBioseqDB = BioseqDB()
455 obsBioseqDB.setData( [iBioseq1, iBioseq2 , iBioseq3, iBioseq4 ] )
456 obsSize = obsBioseqDB.getSize()
457
458 self.assertEquals(expSize,obsSize)
459
460
461 def test_getSize_emptyDB(self):
462 expSize = 0
463
464 obsBioseqDB = BioseqDB()
465 obsSize = obsBioseqDB.getSize()
466
467 self.assertEquals(expSize,obsSize)
468
469
470 def test_getLength(self):
471 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
472 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
473 iBioseqDB = BioseqDB()
474 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
475
476 expLength = 163
477 obsLength = iBioseqDB.getLength()
478
479 self.assertEquals( expLength, obsLength)
480
481 def test_getListOfSequencesLength(self):
482 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
483 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
484 iBioseqDB = BioseqDB()
485 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
486 lLength = iBioseqDB.getListOfSequencesLength()
487
488 expLLengh = [75, 88]
489 self.assertEquals( expLLengh, lLength )
490
491
492 def test_getHeaderList( self ):
493 lExpHeader = ["seq1", "seq2"]
494
495 iBioseq1 = Bioseq( "seq1", "ATTG" )
496 iBioseq2 = Bioseq( "seq2", "CGAAT" )
497
498 obsBioseqDB = BioseqDB()
499 obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
500
501 lObsHeader = obsBioseqDB.getHeaderList()
502
503 self.assertEquals( lExpHeader, lObsHeader )
504
505
506 def test_getSequencesList( self ):
507 lExpSeqs = ["ATGC", "AATTCCGG"]
508
509 iBioseq1 = Bioseq("seq1", "ATGC")
510 iBioseq2 = Bioseq("seq2", "AATTCCGG")
511
512 obsBioseqDB = BioseqDB()
513 obsBioseqDB.setData([iBioseq1, iBioseq2])
514
515 lObsSeqs = obsBioseqDB.getSequencesList()
516
517 self.assertEquals(lExpSeqs, lObsSeqs)
518
519
520 def test_fetch( self ):
521 ibioseq1 = Bioseq( "seq1", "ATTG" )
522 ibioseq2 = Bioseq( "seq2", "CGAAT" )
523 iBioseqDB = BioseqDB()
524 iBioseqDB.setData( [ ibioseq1, ibioseq2 ] )
525 expBioseq = ibioseq1
526 obsBioseq = iBioseqDB.fetch( "seq1" )
527 self.assertEquals( expBioseq, obsBioseq )
528
529
530 def test_getBioseqByRenamedHeader( self ):
531 Header1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
532 Header1 += "(At4g29080) mRNA, complete cds."
533
534 Header2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
535 Header2 += "(At4g29080) mRNA, complete cds."
536
537 Header3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)"
538 Header3 += "(At4g29080) mRNA, complete cds."
539
540 HeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
541 HeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
542
543 ibioseq1 = Bioseq( Header1, "ATTG" )
544 ibioseq2 = Bioseq( Header2, "CGAAT" )
545 ibioseq3 = Bioseq( Header3, "TGCGAAT" )
546 iBioseqDB = BioseqDB()
547 iBioseqDB.setData( [ ibioseq1, ibioseq2, ibioseq3 ] )
548 expBioseq = ibioseq2
549
550 obsBioseq = iBioseqDB.getBioseqByRenamedHeader( HeaderRenamed2 )
551
552 self.assertEquals( expBioseq, obsBioseq )
553
554
555 def test_init_with_the_parm_name( self ):
556 iBioseq1 = Bioseq("seq1","ATTG")
557 iBioseq2 = Bioseq("seq2","CGAAT")
558 expBioseqDB = BioseqDB()
559 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
560 fastaFilename = "dummyBioseqDB.fa"
561 f = open(fastaFilename, "w")
562 f.write(">seq1\n")
563 f.write("ATTG\n")
564 f.write(">seq2\n")
565 f.write("CGAAT\n")
566 f.close()
567
568 obsBioseqDB = BioseqDB(fastaFilename)
569 os.remove(fastaFilename)
570 self.assertEquals( expBioseqDB, obsBioseqDB )
571
572
573 def test_countNt(self):
574 iBioseq1 = Bioseq()
575 iBioseq1.header = "seq1 description1"
576 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
577 iBioseq2 = Bioseq()
578 iBioseq2.header = "seq2 description2"
579 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
580 iBioseqDB = BioseqDB()
581 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
582 expCount = 6
583 obsCount = iBioseqDB.countNt('N')
584 self.assertEquals(expCount, obsCount)
585
586 def test_countNt_lowercase(self):
587 iBioseq1 = Bioseq()
588 iBioseq1.header = "seq1 description1"
589 iBioseq1.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg"
590 iBioseq2 = Bioseq()
591 iBioseq2.header = "seq2 description2"
592 iBioseq2.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg"
593 iBioseqDB = BioseqDB()
594 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
595 expCount = 0
596 obsCount = iBioseqDB.countNt('N')
597 self.assertEquals(expCount, obsCount)
598
599
600 def test_countNt_withCharacterNotExisting(self):
601 iBioseq1 = Bioseq()
602 iBioseq1.header = "seq1 description1"
603 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
604 iBioseq2 = Bioseq()
605 iBioseq2.header = "seq2 description2"
606 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
607 iBioseqDB = BioseqDB()
608 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
609 expCount = 0
610 obsCount = iBioseqDB.countNt('W')
611 self.assertEquals(expCount, obsCount)
612
613
614 def test_countAllNt(self):
615 iBioseq1 = Bioseq()
616 iBioseq1.header = "seq1 description1"
617 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
618 iBioseq2 = Bioseq()
619 iBioseq2.header = "seq2 description2"
620 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
621 iBioseqDB = BioseqDB()
622 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
623
624 dExpCount = {'A': 68, 'C': 62, 'T': 86, 'G': 70, 'N': 6}
625
626 dObsCount = iBioseqDB.countAllNt()
627 self.assertEquals(dExpCount, dObsCount)
628
629
630 def test_extractPart(self):
631 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
632 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
633 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
634 iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
635
636 iBioseqDB = BioseqDB()
637 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4 ] )
638
639 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
640 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
641
642 expSubBioseqDB = BioseqDB()
643 expSubBioseqDB.setData( [ iBioseq5, iBioseq6 ] )
644
645 obsSubBioseqDB = iBioseqDB.extractPart (1, 2)
646
647 self.assertEquals(expSubBioseqDB, obsSubBioseqDB)
648
649
650 def test_bestLength(self):
651 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
652 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
653 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
654 iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
655 iBioseq5 = Bioseq("consensus5","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
656 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
657 iBioseq7 = Bioseq("consensus7","TGCCTGATGGCTCATGGAGTACCTGCCT")
658
659 iBioseqDB = BioseqDB()
660 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4, iBioseq5, iBioseq6 , iBioseq7] )
661
662 iBioseq8 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
663 iBioseq9 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
664 iBioseq10 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
665 iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
666 expBioseqDB = BioseqDB()
667 expBioseqDB.setData( [ iBioseq8, iBioseq9, iBioseq10, iBioseq11] )
668
669 obsBioseqDB = iBioseqDB.bestLength (4)
670
671 self.assertEquals(expBioseqDB, obsBioseqDB)
672
673
674 def test_bestLength_with_a_none_sequence_include(self):
675 iBioseq1 = Bioseq("consensus1", None)
676 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
677 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
678
679 iBioseqDB = BioseqDB()
680 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
681
682 iBioseq4 = Bioseq("consensus1", None)
683 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
684 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
685 expBioseqDB = BioseqDB()
686 expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] )
687
688 obsBioseqDB = iBioseqDB.bestLength (3)
689
690 self.assertEquals(expBioseqDB, obsBioseqDB)
691
692
693 def test_bestLength_with_a_none_sequence_not_include(self):
694 iBioseq1 = Bioseq("consensus1", None)
695 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
696 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
697
698 iBioseqDB = BioseqDB()
699 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
700
701 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
702 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
703 expBioseqDB = BioseqDB()
704 expBioseqDB.setData( [ iBioseq5, iBioseq6] )
705
706 obsBioseqDB = iBioseqDB.bestLength (2)
707
708 self.assertEquals(expBioseqDB, obsBioseqDB)
709
710
711 def test_bestLength_number_of_bioseq_requiered_gt_BioseqDB_size(self):
712 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
713 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
714 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
715
716 iBioseqDB = BioseqDB()
717 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
718
719 iBioseq4 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
720 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
721 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
722 expBioseqDB = BioseqDB()
723 expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] )
724
725 obsBioseqDB = iBioseqDB.bestLength (15)
726
727 self.assertEquals(expBioseqDB, obsBioseqDB)
728
729
730 def test_extractPatternOfFile(self):
731 fastaFilename = "dummyBioseqDB.fa"
732 f = open(fastaFilename, "w")
733 f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n")
734 f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
735 f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n")
736 f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
737 f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n")
738 f.close()
739
740 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
741 iBioseq2 = Bioseq("consensus11","TGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT")
742 expBioseqDB = BioseqDB()
743 expBioseqDB.setData( [ iBioseq1, iBioseq2] )
744
745 obsBioseqDB = BioseqDB()
746 obsBioseqDB.extractPatternOfFile("consensus1+" , fastaFilename)
747 os.remove(fastaFilename)
748 self.assertEquals(expBioseqDB, obsBioseqDB)
749
750
751 def test_extractPatternOfFile_WithNoExistingPattern(self):
752 fastaFilename = "dummyBioseqDB.fa"
753 f = open(fastaFilename, "w")
754 f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n")
755 f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
756 f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n")
757 f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
758 f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n")
759 f.close()
760
761 expBioseqDB = BioseqDB()
762
763 obsBioseqDB = BioseqDB()
764 obsBioseqDB.extractPatternOfFile("NoExistingPattern" , fastaFilename)
765 os.remove(fastaFilename)
766 self.assertEquals(expBioseqDB, obsBioseqDB)
767
768
769 def test_getByPattern (self):
770 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
771 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
772 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
773 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
774 iBioseqDB = BioseqDB()
775 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
776
777 iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
778 iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
779 expBioseqDB = BioseqDB()
780 expBioseqDB.setData( [ iBioseq5, iBioseq6] )
781
782 obsBioseqDB = iBioseqDB.getByPattern("consensus1+")
783 self.assertEquals(expBioseqDB, obsBioseqDB)
784
785
786 def test_getByPattern_with_no_existing_pattern (self):
787 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
788 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
789 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
790 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
791 iBioseqDB = BioseqDB()
792 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
793
794 expBioseqDB = BioseqDB()
795
796 obsBioseqDB = iBioseqDB.getByPattern("noExistingPattern+")
797 self.assertEquals(expBioseqDB, obsBioseqDB)
798
799
800 def test_getDiffFromPattern (self):
801 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
802 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
803 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
804 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
805 iBioseqDB = BioseqDB()
806 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
807
808 iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
809 iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
810 expBioseqDB = BioseqDB()
811 expBioseqDB.setData( [ iBioseq5, iBioseq6] )
812
813 obsBioseqDB = iBioseqDB.getDiffFromPattern("consensus[4|6]")
814
815 self.assertEquals(expBioseqDB, obsBioseqDB)
816
817
818 def test_getDiffFromPattern_with_no_existing_pattern (self):
819 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
820 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
821 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
822 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
823 iBioseqDB = BioseqDB()
824 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
825
826 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
827 iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
828 iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
829 iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
830 expBioseqDB = BioseqDB()
831 expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8] )
832
833 obsBioseqDB = iBioseqDB.getDiffFromPattern("noExistingPattern+")
834 self.assertEquals(expBioseqDB, obsBioseqDB)
835
836
837 def test_rmByPattern (self):
838 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
839 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
840 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
841 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
842 obsBioseqDB = BioseqDB()
843 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
844
845 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
846 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
847 expBioseqDB = BioseqDB()
848 expBioseqDB.setData( [ iBioseq5, iBioseq6 ] )
849
850 obsBioseqDB.rmByPattern("consensus1+")
851 self.assertEquals(expBioseqDB, obsBioseqDB)
852
853
854 def test_rmByPattern_with_no_existing_pattern (self):
855 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
856 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
857 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
858 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
859 obsBioseqDB = BioseqDB()
860 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
861
862 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
863 iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
864 iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
865 iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
866 expBioseqDB = BioseqDB()
867 expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8 ] )
868 obsBioseqDB.rmByPattern("noExistingPattern+")
869 self.assertEquals(expBioseqDB, obsBioseqDB)
870
871
872 def test_addBioseqFromABioseqDBIfHeaderContainPattern (self):
873 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
874 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
875 iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
876 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
877 obsBioseqDB = BioseqDB()
878 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
879
880 iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
881 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
882 inBioseqDB = BioseqDB()
883 inBioseqDB.setData( [ iBioseq5, iBioseq6 ])
884
885 iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
886 iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
887 iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
888 iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
889 iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
890
891 expBioseqDB = BioseqDB()
892 expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10, iBioseq11] )
893
894 obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB)
895 self.assertEquals(expBioseqDB, obsBioseqDB)
896
897
898 def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self):
899 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
900 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
901 iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
902 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
903 obsBioseqDB = BioseqDB()
904 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
905
906 iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
907 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
908 inBioseqDB = BioseqDB()
909 inBioseqDB.setData( [ iBioseq5, iBioseq6 ])
910
911 iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
912 iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
913 iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
914 iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
915
916 expBioseqDB = BioseqDB()
917 expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] )
918
919 obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB)
920 self.assertEquals(expBioseqDB, obsBioseqDB)
921
922
923 def test_upCase (self):
924 iBioseq1 = Bioseq("consensus4","atgacGatgca")
925 iBioseq2 = Bioseq("consensus1","atgcgaT")
926 obsBioseqDB = BioseqDB()
927 obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
928 iBioseq3 = Bioseq("consensus4","ATGACGATGCA")
929 iBioseq4 = Bioseq("consensus1","ATGCGAT")
930 expBioseqDB = BioseqDB()
931 expBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
932 obsBioseqDB.upCase()
933 self.assertEquals(expBioseqDB, obsBioseqDB)
934
935
936 def test_getMap(self):
937 iBioseq1 = Bioseq("header1","ATGC-RA-GCT")
938 iBioseq2 = Bioseq("header2","-TGC-RA-GCT")
939 iBioseq3 = Bioseq("header3","ATGC-RA-GC-")
940
941 iAlignedBioseqDB = BioseqDB()
942 iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
943
944 obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps()
945
946 expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )]
947 expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )]
948 expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )]
949
950 expDict = {
951 "header1": expLMap1,
952 "header2": expLMap2,
953 "header3": expLMap3
954 }
955
956 self.assertEquals(expDict, obsDict)
957
958 def test_getSeqLengthByListOfName(self):
959 iBioseq1 = Bioseq("header1","ATGC-RA-GCT")
960 iBioseq2 = Bioseq("header2","-TGC-RAR")
961 iBioseq3 = Bioseq("header3","ATGC")
962
963 iBioseqDB = BioseqDB()
964 iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
965
966 expList = [11, 4]
967 obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"])
968
969 self.assertEquals( expList, obsList )
970
971 test_suite = unittest.TestSuite()
972 test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) )
973 if __name__ == "__main__":
974 unittest.TextTestRunner(verbosity=2).run( test_suite )