6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 import unittest
|
|
33 import os
|
|
34 import time
|
|
35 from commons.core.seq.BioseqDB import BioseqDB
|
|
36 from commons.core.seq.Bioseq import Bioseq
|
|
37 from commons.core.utils.FileUtils import FileUtils
|
|
38 from commons.core.coord.Map import Map
|
|
39
|
|
40
|
|
41 class Test_BioseqDB( unittest.TestCase ):
|
|
42
|
|
43 def setUp( self ):
|
|
44 self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
|
|
45
|
|
46
|
|
47 def tearDown( self ):
|
|
48 if os._exists("dummyBioseqDB.fa"):
|
|
49 os.remove("dummyBioseqDB.fa")
|
|
50
|
|
51
|
|
52 def test__eq__(self):
|
|
53 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
54 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
|
|
55 expBioseqDB = BioseqDB()
|
|
56 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
57
|
|
58 iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
59 iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
|
|
60 obsBioseqDB = BioseqDB()
|
|
61 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
62
|
|
63 self.assertEquals( expBioseqDB, obsBioseqDB )
|
|
64
|
|
65
|
|
66 def test__eq__instances_with_different_header(self):
|
|
67 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
68 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
|
|
69 expBioseqDB = BioseqDB()
|
|
70 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
71
|
|
72 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
73 iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" )
|
|
74 obsBioseqDB = BioseqDB()
|
|
75 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
76
|
|
77 self.assertNotEquals( expBioseqDB, obsBioseqDB )
|
|
78
|
|
79
|
|
80 def test__eq__instances_with_different_sequences(self):
|
|
81 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
82 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
|
|
83 expBioseqDB = BioseqDB()
|
|
84 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
85
|
|
86 iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
87 iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )
|
|
88 obsBioseqDB = BioseqDB()
|
|
89 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
90
|
|
91 self.assertNotEquals( expBioseqDB, obsBioseqDB )
|
|
92
|
|
93
|
|
94 def test__eq__instances_with_different_sequences_and_headers(self):
|
|
95 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
96 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
|
|
97 expBioseqDB = BioseqDB()
|
|
98 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
99
|
|
100 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
101 iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )
|
|
102 obsBioseqDB = BioseqDB()
|
|
103 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
104
|
|
105 self.assertNotEquals( expBioseqDB, obsBioseqDB )
|
|
106
|
|
107
|
|
108 def test__eq__instances_with_different_sizeOfBioseq(self):
|
|
109 iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
110 iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
|
|
111 expBioseqDB = BioseqDB()
|
|
112 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
113
|
|
114 iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
115 obsBioseqDB = BioseqDB()
|
|
116 obsBioseqDB.setData( [ iBioseq3 ] )
|
|
117
|
|
118 self.assertNotEquals( expBioseqDB, obsBioseqDB )
|
|
119
|
|
120
|
|
121 def test_setName (self):
|
|
122 expName = "myDataBank"
|
|
123 iBioseqDB = BioseqDB()
|
|
124 self.assertEquals (iBioseqDB.name, "")
|
|
125
|
|
126 iBioseqDB.setName (expName)
|
|
127 obsName = iBioseqDB.name
|
|
128 self.assertEquals (expName, obsName)
|
|
129
|
|
130
|
|
131 def test_read(self):
|
|
132 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCT")
|
|
133 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGC")
|
|
134 expBioseqDB = BioseqDB()
|
|
135 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
136
|
|
137 faFN = "dummyFaFile.fa"
|
|
138 faF = open( faFN, "w" )
|
|
139 faF.write(">consensus1\n")
|
|
140 faF.write("GAGATGGCTCATGGAGTACCTGCCT\n")
|
|
141 faF.write(">consensus2\n")
|
|
142 faF.write("GAGATGGCTCATGGAGTACCGC\n")
|
|
143 faF.close()
|
|
144
|
|
145 faF = open( faFN, "r" )
|
|
146 obsBioseqDB = BioseqDB()
|
|
147 obsBioseqDB.read( faF )
|
|
148 faF.close()
|
|
149 os.remove( faFN )
|
|
150 self.assertEquals( expBioseqDB, obsBioseqDB )
|
|
151
|
|
152
|
|
153 def test_write(self):
|
|
154 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
155 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
156 iBioseqDB = BioseqDB()
|
|
157 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
158
|
|
159 expFaFileName = "dummyFaFile.fa"
|
|
160 expFaFile = open( expFaFileName, "w" )
|
|
161 expFaFile.write(">consensus1\n")
|
|
162 expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
|
|
163 expFaFile.write("ATGGAGTACCTGCCT\n")
|
|
164 expFaFile.write(">consensus2\n")
|
|
165 expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
|
|
166 expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
|
|
167 expFaFile.close()
|
|
168
|
|
169 obsFaFileName = "obsDummyFastaFile.fa"
|
|
170 obsFaFile = open( obsFaFileName, "w" )
|
|
171 iBioseqDB.write( obsFaFile )
|
|
172 obsFaFile.close()
|
|
173
|
|
174 self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) )
|
|
175 os.remove( expFaFileName )
|
|
176 os.remove( obsFaFileName )
|
|
177
|
|
178
|
|
179 def test_save(self):
|
|
180 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
181 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
182 iBioseqDB = BioseqDB()
|
|
183 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
184
|
|
185 expFaFileName = "dummyFaFile.fa"
|
|
186 expFaFile = open( expFaFileName, "w" )
|
|
187 expFaFile.write(">consensus1\n")
|
|
188 expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
|
|
189 expFaFile.write("ATGGAGTACCTGCCT\n")
|
|
190 expFaFile.write(">consensus2\n")
|
|
191 expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
|
|
192 expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
|
|
193 expFaFile.close()
|
|
194
|
|
195 obsFaFileName = "obsDummyFastaFile.fa"
|
|
196 iBioseqDB.save( obsFaFileName )
|
|
197
|
|
198 self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) )
|
|
199 os.remove( expFaFileName )
|
|
200 os.remove( obsFaFileName )
|
|
201
|
|
202
|
|
203 def test_load(self):
|
|
204 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
205 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
206 expBioseqDB = BioseqDB()
|
|
207 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
208
|
|
209 FaFileName = "dummyFaFile.fa"
|
|
210 FaFile = open( FaFileName, "w" )
|
|
211 FaFile.write(">consensus1\n")
|
|
212 FaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
|
|
213 FaFile.write("ATGGAGTACCTGCCT\n")
|
|
214 FaFile.write(">consensus2\n")
|
|
215 FaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
|
|
216 FaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
|
|
217 FaFile.close()
|
|
218
|
|
219 obsBioseqDB = BioseqDB()
|
|
220 obsBioseqDB.load( FaFileName )
|
|
221
|
|
222 self.assertEquals( expBioseqDB, obsBioseqDB )
|
|
223 os.remove( FaFileName )
|
|
224
|
|
225
|
|
226 def test_reverse( self ):
|
|
227 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
228 iBioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
229 expBioseqDB = BioseqDB()
|
|
230 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
231
|
|
232 iBioseq3 = Bioseq( "seq1", "GTTA" )
|
|
233 iBioseq4 = Bioseq( "seq2", "TAAGC" )
|
|
234 obsBioseqDB = BioseqDB()
|
|
235 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
236 obsBioseqDB.reverse()
|
|
237 self.assertEquals( expBioseqDB, obsBioseqDB )
|
|
238
|
|
239
|
|
240 def test_complement( self ):
|
|
241 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
242 iBioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
243 expBioseqDB = BioseqDB()
|
|
244 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
245
|
|
246 iBioseq3 = Bioseq( "seq1", "TAAC" )
|
|
247 iBioseq4 = Bioseq( "seq2", "GCTTA" )
|
|
248 obsBioseqDB = BioseqDB()
|
|
249 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
250
|
|
251 obsBioseqDB.complement()
|
|
252 self.assertEquals( expBioseqDB, obsBioseqDB )
|
|
253
|
|
254
|
|
255 def test_reverseComplement( self ):
|
|
256 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
257 iBioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
258 expBioseqDB = BioseqDB()
|
|
259 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
260
|
|
261 iBioseq3 = Bioseq( "seq1", "CAAT" )
|
|
262 iBioseq4 = Bioseq( "seq2", "ATTCG" )
|
|
263 obsBioseqDB = BioseqDB()
|
|
264 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
265
|
|
266 obsBioseqDB.reverseComplement()
|
|
267 self.assertEquals( expBioseqDB, obsBioseqDB )
|
|
268
|
|
269
|
|
270 def test_setData(self):
|
|
271 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
272 iBioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
273 iBioseq3 = Bioseq( "seq3", "CAAT" )
|
|
274 iBioseq4 = Bioseq( "seq4", "ATTCG" )
|
|
275
|
|
276 lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4]
|
|
277 expBioseqDB = BioseqDB()
|
|
278 expBioseqDB.db = lBioseq
|
|
279
|
|
280 iBioseq5 = Bioseq( "seq1", "ATTG" )
|
|
281 iBioseq6 = Bioseq( "seq2", "CGAAT" )
|
|
282 iBioseq7 = Bioseq( "seq3", "CAAT" )
|
|
283 iBioseq8 = Bioseq( "seq4", "ATTCG" )
|
|
284
|
|
285 lBioseq2 = [iBioseq5, iBioseq6, iBioseq7, iBioseq8]
|
|
286 obsBioseqDB = BioseqDB()
|
|
287 obsBioseqDB.setData(lBioseq2)
|
|
288
|
|
289 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
290
|
|
291
|
|
292 def test_reset( self ):
|
|
293 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
294 iBioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
295 iBioseq3 = Bioseq( "seq3", "CAAT" )
|
|
296 iBioseq4 = Bioseq( "seq4", "ATTCG" )
|
|
297
|
|
298 lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4]
|
|
299 obsBioseqDB = BioseqDB()
|
|
300 obsBioseqDB.setData(lBioseq)
|
|
301 obsBioseqDB.reset()
|
|
302
|
|
303 expBioseqDB = BioseqDB()
|
|
304
|
|
305 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
306
|
|
307
|
|
308 def testCleanGap(self):
|
|
309 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
310 iBioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
311 expBioseqDB = BioseqDB()
|
|
312 expBioseqDB.setData([iBioseq1, iBioseq2])
|
|
313
|
|
314 iBioseq3 = Bioseq( "seq1", "AT-----TG" )
|
|
315 iBioseq4 = Bioseq( "seq2", "CGAA----T" )
|
|
316
|
|
317 obsBioseqDB = BioseqDB()
|
|
318 obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
319 obsBioseqDB.cleanGap()
|
|
320
|
|
321 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
322
|
|
323
|
|
324 def testCleanGap_on_empty_db(self):
|
|
325 expBioseqDB = BioseqDB()
|
|
326
|
|
327 obsBioseqDB = BioseqDB()
|
|
328 obsBioseqDB.cleanGap()
|
|
329
|
|
330 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
331
|
|
332
|
|
333 def testCleanGap_on_size_one_db(self):
|
|
334 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
335 expBioseqDB = BioseqDB()
|
|
336 expBioseqDB.setData([iBioseq1])
|
|
337
|
|
338 iBioseq2 = Bioseq( "seq1", "AT-----TG" )
|
|
339 obsBioseqDB = BioseqDB()
|
|
340 obsBioseqDB.setData([iBioseq2])
|
|
341
|
|
342 obsBioseqDB.cleanGap()
|
|
343
|
|
344 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
345
|
|
346
|
|
347 def test_add_to_a_empty_bioseqDB_instance (self):
|
|
348 sHeader = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
349 sHeader += "(At4g29080) mRNA, complete cds."
|
|
350
|
|
351 expDictIdx = { sHeader : 0}
|
|
352
|
|
353 sHeaderRenamed = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
|
|
354 sHeaderRenamed += "(At4g29080)_mRNA-_complete_cds."
|
|
355 expDictIdxRenamed = {sHeaderRenamed : 0}
|
|
356
|
|
357 iBioseq1 = Bioseq( sHeader, "ATTG" )
|
|
358 obsBioseqDB = BioseqDB()
|
|
359 obsBioseqDB.add(iBioseq1)
|
|
360
|
|
361 obsDictIdx = obsBioseqDB.idx
|
|
362 obsDictIdxRenamed = obsBioseqDB.idx_renamed
|
|
363
|
|
364 self.assertEquals(expDictIdx,obsDictIdx)
|
|
365 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
|
|
366
|
|
367
|
|
368 def test_add_to_a_size_one_bioseqDB_instance (self):
|
|
369 sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
370 sHeader1 += "(At4g29080) mRNA, complete cds."
|
|
371
|
|
372 sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
373 sHeader2 += "(At4g29080) mRNA, complete cds."
|
|
374
|
|
375 expDictIdx = { sHeader1 : 0, sHeader2 : 1}
|
|
376
|
|
377 sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
|
|
378 sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds."
|
|
379
|
|
380 sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
|
|
381 sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
|
|
382
|
|
383 expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1}
|
|
384
|
|
385 iBioseq1 = Bioseq( sHeader1, "ATTG" )
|
|
386 iBioseq2 = Bioseq( sHeader2, "ATTG" )
|
|
387
|
|
388 obsBioseqDB = BioseqDB()
|
|
389 obsBioseqDB.setData([ iBioseq1])
|
|
390 obsBioseqDB.add(iBioseq2)
|
|
391
|
|
392 obsDictIdx = obsBioseqDB.idx
|
|
393 obsDictIdxRenamed = obsBioseqDB.idx_renamed
|
|
394
|
|
395 self.assertEquals(expDictIdx,obsDictIdx)
|
|
396 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
|
|
397
|
|
398
|
|
399 def test_add_to_a_size_two_bioseqDB_instance (self):
|
|
400 sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
401 sHeader1 += "(At4g29080) mRNA, complete cds."
|
|
402
|
|
403 sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
404 sHeader2 += "(At4g29080) mRNA, complete cds."
|
|
405
|
|
406 sHeader3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
407 sHeader3 += "(At4g29080) mRNA, complete cds."
|
|
408 expDictIdx = { sHeader1 : 0, sHeader2 : 1, sHeader3 : 2}
|
|
409
|
|
410 sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
|
|
411 sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds."
|
|
412
|
|
413 sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
|
|
414 sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
|
|
415
|
|
416 sHeaderRenamed3 = "embl-AF332604-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
|
|
417 sHeaderRenamed3 += "(At4g29080)_mRNA-_complete_cds."
|
|
418 expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1, sHeaderRenamed3 :2}
|
|
419
|
|
420 iBioseq1 = Bioseq( sHeader1, "ATTG" )
|
|
421 iBioseq2 = Bioseq( sHeader2, "ATTG" )
|
|
422 iBioseq3 = Bioseq( sHeader3, "ATTG" )
|
|
423
|
|
424 obsBioseqDB = BioseqDB()
|
|
425 obsBioseqDB.setData([ iBioseq1, iBioseq2 ])
|
|
426 obsBioseqDB.add(iBioseq3)
|
|
427
|
|
428 obsDictIdx = obsBioseqDB.idx
|
|
429 obsDictIdxRenamed = obsBioseqDB.idx_renamed
|
|
430
|
|
431 self.assertEquals(expDictIdx,obsDictIdx)
|
|
432 self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
|
|
433
|
|
434
|
|
435 def test__getitem__(self):
|
|
436 iBioseq1 = Bioseq("seq1","ATTG")
|
|
437 iBioseq2 = Bioseq("seq2","CGAAT")
|
|
438 iBioseqDB = BioseqDB()
|
|
439 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
440 expBioseq = Bioseq("seq2","CGAAT")
|
|
441 obsBioseq = iBioseqDB[1]
|
|
442
|
|
443 self.assertEquals(expBioseq, obsBioseq)
|
|
444
|
|
445
|
|
446 def test_getSize(self):
|
|
447 expSize = 4
|
|
448
|
|
449 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
450 iBioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
451 iBioseq3 = Bioseq( "seq3", "AT-----TG" )
|
|
452 iBioseq4 = Bioseq( "seq4", "CGAA----T" )
|
|
453
|
|
454 obsBioseqDB = BioseqDB()
|
|
455 obsBioseqDB.setData( [iBioseq1, iBioseq2 , iBioseq3, iBioseq4 ] )
|
|
456 obsSize = obsBioseqDB.getSize()
|
|
457
|
|
458 self.assertEquals(expSize,obsSize)
|
|
459
|
|
460
|
|
461 def test_getSize_emptyDB(self):
|
|
462 expSize = 0
|
|
463
|
|
464 obsBioseqDB = BioseqDB()
|
|
465 obsSize = obsBioseqDB.getSize()
|
|
466
|
|
467 self.assertEquals(expSize,obsSize)
|
|
468
|
|
469
|
|
470 def test_getLength(self):
|
|
471 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
472 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
473 iBioseqDB = BioseqDB()
|
|
474 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
475
|
|
476 expLength = 163
|
|
477 obsLength = iBioseqDB.getLength()
|
|
478
|
|
479 self.assertEquals( expLength, obsLength)
|
|
480
|
|
481 def test_getListOfSequencesLength(self):
|
|
482 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
483 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
484 iBioseqDB = BioseqDB()
|
|
485 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
486 lLength = iBioseqDB.getListOfSequencesLength()
|
|
487
|
|
488 expLLengh = [75, 88]
|
|
489 self.assertEquals( expLLengh, lLength )
|
|
490
|
|
491
|
|
492 def test_getHeaderList( self ):
|
|
493 lExpHeader = ["seq1", "seq2"]
|
|
494
|
|
495 iBioseq1 = Bioseq( "seq1", "ATTG" )
|
|
496 iBioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
497
|
|
498 obsBioseqDB = BioseqDB()
|
|
499 obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
500
|
|
501 lObsHeader = obsBioseqDB.getHeaderList()
|
|
502
|
|
503 self.assertEquals( lExpHeader, lObsHeader )
|
|
504
|
|
505
|
|
506 def test_getSequencesList( self ):
|
|
507 lExpSeqs = ["ATGC", "AATTCCGG"]
|
|
508
|
|
509 iBioseq1 = Bioseq("seq1", "ATGC")
|
|
510 iBioseq2 = Bioseq("seq2", "AATTCCGG")
|
|
511
|
|
512 obsBioseqDB = BioseqDB()
|
|
513 obsBioseqDB.setData([iBioseq1, iBioseq2])
|
|
514
|
|
515 lObsSeqs = obsBioseqDB.getSequencesList()
|
|
516
|
|
517 self.assertEquals(lExpSeqs, lObsSeqs)
|
|
518
|
|
519
|
|
520 def test_fetch( self ):
|
|
521 ibioseq1 = Bioseq( "seq1", "ATTG" )
|
|
522 ibioseq2 = Bioseq( "seq2", "CGAAT" )
|
|
523 iBioseqDB = BioseqDB()
|
|
524 iBioseqDB.setData( [ ibioseq1, ibioseq2 ] )
|
|
525 expBioseq = ibioseq1
|
|
526 obsBioseq = iBioseqDB.fetch( "seq1" )
|
|
527 self.assertEquals( expBioseq, obsBioseq )
|
|
528
|
|
529
|
|
530 def test_getBioseqByRenamedHeader( self ):
|
|
531 Header1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
532 Header1 += "(At4g29080) mRNA, complete cds."
|
|
533
|
|
534 Header2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
535 Header2 += "(At4g29080) mRNA, complete cds."
|
|
536
|
|
537 Header3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)"
|
|
538 Header3 += "(At4g29080) mRNA, complete cds."
|
|
539
|
|
540 HeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
|
|
541 HeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
|
|
542
|
|
543 ibioseq1 = Bioseq( Header1, "ATTG" )
|
|
544 ibioseq2 = Bioseq( Header2, "CGAAT" )
|
|
545 ibioseq3 = Bioseq( Header3, "TGCGAAT" )
|
|
546 iBioseqDB = BioseqDB()
|
|
547 iBioseqDB.setData( [ ibioseq1, ibioseq2, ibioseq3 ] )
|
|
548 expBioseq = ibioseq2
|
|
549
|
|
550 obsBioseq = iBioseqDB.getBioseqByRenamedHeader( HeaderRenamed2 )
|
|
551
|
|
552 self.assertEquals( expBioseq, obsBioseq )
|
|
553
|
|
554
|
|
555 def test_init_with_the_parm_name( self ):
|
|
556 iBioseq1 = Bioseq("seq1","ATTG")
|
|
557 iBioseq2 = Bioseq("seq2","CGAAT")
|
|
558 expBioseqDB = BioseqDB()
|
|
559 expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
560 fastaFilename = "dummyBioseqDB.fa"
|
|
561 f = open(fastaFilename, "w")
|
|
562 f.write(">seq1\n")
|
|
563 f.write("ATTG\n")
|
|
564 f.write(">seq2\n")
|
|
565 f.write("CGAAT\n")
|
|
566 f.close()
|
|
567
|
|
568 obsBioseqDB = BioseqDB(fastaFilename)
|
|
569 os.remove(fastaFilename)
|
|
570 self.assertEquals( expBioseqDB, obsBioseqDB )
|
|
571
|
|
572
|
|
573 def test_countNt(self):
|
|
574 iBioseq1 = Bioseq()
|
|
575 iBioseq1.header = "seq1 description1"
|
|
576 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
577 iBioseq2 = Bioseq()
|
|
578 iBioseq2.header = "seq2 description2"
|
|
579 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
580 iBioseqDB = BioseqDB()
|
|
581 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
582 expCount = 6
|
|
583 obsCount = iBioseqDB.countNt('N')
|
|
584 self.assertEquals(expCount, obsCount)
|
|
585
|
|
586 def test_countNt_lowercase(self):
|
|
587 iBioseq1 = Bioseq()
|
|
588 iBioseq1.header = "seq1 description1"
|
|
589 iBioseq1.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg"
|
|
590 iBioseq2 = Bioseq()
|
|
591 iBioseq2.header = "seq2 description2"
|
|
592 iBioseq2.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg"
|
|
593 iBioseqDB = BioseqDB()
|
|
594 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
595 expCount = 0
|
|
596 obsCount = iBioseqDB.countNt('N')
|
|
597 self.assertEquals(expCount, obsCount)
|
|
598
|
|
599
|
|
600 def test_countNt_withCharacterNotExisting(self):
|
|
601 iBioseq1 = Bioseq()
|
|
602 iBioseq1.header = "seq1 description1"
|
|
603 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
604 iBioseq2 = Bioseq()
|
|
605 iBioseq2.header = "seq2 description2"
|
|
606 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
607 iBioseqDB = BioseqDB()
|
|
608 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
609 expCount = 0
|
|
610 obsCount = iBioseqDB.countNt('W')
|
|
611 self.assertEquals(expCount, obsCount)
|
|
612
|
|
613
|
|
614 def test_countAllNt(self):
|
|
615 iBioseq1 = Bioseq()
|
|
616 iBioseq1.header = "seq1 description1"
|
|
617 iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
618 iBioseq2 = Bioseq()
|
|
619 iBioseq2.header = "seq2 description2"
|
|
620 iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
621 iBioseqDB = BioseqDB()
|
|
622 iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
623
|
|
624 dExpCount = {'A': 68, 'C': 62, 'T': 86, 'G': 70, 'N': 6}
|
|
625
|
|
626 dObsCount = iBioseqDB.countAllNt()
|
|
627 self.assertEquals(dExpCount, dObsCount)
|
|
628
|
|
629
|
|
630 def test_extractPart(self):
|
|
631 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
632 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
633 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
634 iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
635
|
|
636 iBioseqDB = BioseqDB()
|
|
637 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4 ] )
|
|
638
|
|
639 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
640 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
641
|
|
642 expSubBioseqDB = BioseqDB()
|
|
643 expSubBioseqDB.setData( [ iBioseq5, iBioseq6 ] )
|
|
644
|
|
645 obsSubBioseqDB = iBioseqDB.extractPart (1, 2)
|
|
646
|
|
647 self.assertEquals(expSubBioseqDB, obsSubBioseqDB)
|
|
648
|
|
649
|
|
650 def test_bestLength(self):
|
|
651 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
|
|
652 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
653 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
|
|
654 iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
655 iBioseq5 = Bioseq("consensus5","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
656 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
657 iBioseq7 = Bioseq("consensus7","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
658
|
|
659 iBioseqDB = BioseqDB()
|
|
660 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4, iBioseq5, iBioseq6 , iBioseq7] )
|
|
661
|
|
662 iBioseq8 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
|
|
663 iBioseq9 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
664 iBioseq10 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
665 iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
666 expBioseqDB = BioseqDB()
|
|
667 expBioseqDB.setData( [ iBioseq8, iBioseq9, iBioseq10, iBioseq11] )
|
|
668
|
|
669 obsBioseqDB = iBioseqDB.bestLength (4)
|
|
670
|
|
671 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
672
|
|
673
|
|
674 def test_bestLength_with_a_none_sequence_include(self):
|
|
675 iBioseq1 = Bioseq("consensus1", None)
|
|
676 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
677 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
|
|
678
|
|
679 iBioseqDB = BioseqDB()
|
|
680 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
|
|
681
|
|
682 iBioseq4 = Bioseq("consensus1", None)
|
|
683 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
684 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
|
|
685 expBioseqDB = BioseqDB()
|
|
686 expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] )
|
|
687
|
|
688 obsBioseqDB = iBioseqDB.bestLength (3)
|
|
689
|
|
690 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
691
|
|
692
|
|
693 def test_bestLength_with_a_none_sequence_not_include(self):
|
|
694 iBioseq1 = Bioseq("consensus1", None)
|
|
695 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
696 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
|
|
697
|
|
698 iBioseqDB = BioseqDB()
|
|
699 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
|
|
700
|
|
701 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
702 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
|
|
703 expBioseqDB = BioseqDB()
|
|
704 expBioseqDB.setData( [ iBioseq5, iBioseq6] )
|
|
705
|
|
706 obsBioseqDB = iBioseqDB.bestLength (2)
|
|
707
|
|
708 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
709
|
|
710
|
|
711 def test_bestLength_number_of_bioseq_requiered_gt_BioseqDB_size(self):
|
|
712 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
|
|
713 iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
714 iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
|
|
715
|
|
716 iBioseqDB = BioseqDB()
|
|
717 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
|
|
718
|
|
719 iBioseq4 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
|
|
720 iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
721 iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
|
|
722 expBioseqDB = BioseqDB()
|
|
723 expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] )
|
|
724
|
|
725 obsBioseqDB = iBioseqDB.bestLength (15)
|
|
726
|
|
727 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
728
|
|
729
|
|
730 def test_extractPatternOfFile(self):
|
|
731 fastaFilename = "dummyBioseqDB.fa"
|
|
732 f = open(fastaFilename, "w")
|
|
733 f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n")
|
|
734 f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
|
|
735 f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n")
|
|
736 f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
|
|
737 f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n")
|
|
738 f.close()
|
|
739
|
|
740 iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
|
|
741 iBioseq2 = Bioseq("consensus11","TGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
742 expBioseqDB = BioseqDB()
|
|
743 expBioseqDB.setData( [ iBioseq1, iBioseq2] )
|
|
744
|
|
745 obsBioseqDB = BioseqDB()
|
|
746 obsBioseqDB.extractPatternOfFile("consensus1+" , fastaFilename)
|
|
747 os.remove(fastaFilename)
|
|
748 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
749
|
|
750
|
|
751 def test_extractPatternOfFile_WithNoExistingPattern(self):
|
|
752 fastaFilename = "dummyBioseqDB.fa"
|
|
753 f = open(fastaFilename, "w")
|
|
754 f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n")
|
|
755 f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
|
|
756 f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n")
|
|
757 f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
|
|
758 f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n")
|
|
759 f.close()
|
|
760
|
|
761 expBioseqDB = BioseqDB()
|
|
762
|
|
763 obsBioseqDB = BioseqDB()
|
|
764 obsBioseqDB.extractPatternOfFile("NoExistingPattern" , fastaFilename)
|
|
765 os.remove(fastaFilename)
|
|
766 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
767
|
|
768
|
|
769 def test_getByPattern (self):
|
|
770 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
771 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
772 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
773 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
774 iBioseqDB = BioseqDB()
|
|
775 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
|
|
776
|
|
777 iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
778 iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
779 expBioseqDB = BioseqDB()
|
|
780 expBioseqDB.setData( [ iBioseq5, iBioseq6] )
|
|
781
|
|
782 obsBioseqDB = iBioseqDB.getByPattern("consensus1+")
|
|
783 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
784
|
|
785
|
|
786 def test_getByPattern_with_no_existing_pattern (self):
|
|
787 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
788 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
789 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
790 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
791 iBioseqDB = BioseqDB()
|
|
792 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
|
|
793
|
|
794 expBioseqDB = BioseqDB()
|
|
795
|
|
796 obsBioseqDB = iBioseqDB.getByPattern("noExistingPattern+")
|
|
797 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
798
|
|
799
|
|
800 def test_getDiffFromPattern (self):
|
|
801 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
802 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
803 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
804 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
805 iBioseqDB = BioseqDB()
|
|
806 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
|
|
807
|
|
808 iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
809 iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
810 expBioseqDB = BioseqDB()
|
|
811 expBioseqDB.setData( [ iBioseq5, iBioseq6] )
|
|
812
|
|
813 obsBioseqDB = iBioseqDB.getDiffFromPattern("consensus[4|6]")
|
|
814
|
|
815 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
816
|
|
817
|
|
818 def test_getDiffFromPattern_with_no_existing_pattern (self):
|
|
819 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
820 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
821 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
822 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
823 iBioseqDB = BioseqDB()
|
|
824 iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
|
|
825
|
|
826 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
827 iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
828 iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
829 iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
830 expBioseqDB = BioseqDB()
|
|
831 expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8] )
|
|
832
|
|
833 obsBioseqDB = iBioseqDB.getDiffFromPattern("noExistingPattern+")
|
|
834 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
835
|
|
836
|
|
837 def test_rmByPattern (self):
|
|
838 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
839 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
840 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
841 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
842 obsBioseqDB = BioseqDB()
|
|
843 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
|
|
844
|
|
845 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
846 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
847 expBioseqDB = BioseqDB()
|
|
848 expBioseqDB.setData( [ iBioseq5, iBioseq6 ] )
|
|
849
|
|
850 obsBioseqDB.rmByPattern("consensus1+")
|
|
851 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
852
|
|
853
|
|
854 def test_rmByPattern_with_no_existing_pattern (self):
|
|
855 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
856 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
857 iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
858 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
859 obsBioseqDB = BioseqDB()
|
|
860 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
|
|
861
|
|
862 iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
863 iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
864 iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
865 iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
866 expBioseqDB = BioseqDB()
|
|
867 expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8 ] )
|
|
868 obsBioseqDB.rmByPattern("noExistingPattern+")
|
|
869 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
870
|
|
871
|
|
872 def test_addBioseqFromABioseqDBIfHeaderContainPattern (self):
|
|
873 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
874 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
875 iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
876 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
877 obsBioseqDB = BioseqDB()
|
|
878 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
|
|
879
|
|
880 iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
881 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
882 inBioseqDB = BioseqDB()
|
|
883 inBioseqDB.setData( [ iBioseq5, iBioseq6 ])
|
|
884
|
|
885 iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
886 iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
887 iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
888 iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
889 iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
890
|
|
891 expBioseqDB = BioseqDB()
|
|
892 expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10, iBioseq11] )
|
|
893
|
|
894 obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB)
|
|
895 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
896
|
|
897
|
|
898 def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self):
|
|
899 iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
900 iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
901 iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
902 iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
903 obsBioseqDB = BioseqDB()
|
|
904 obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
|
|
905
|
|
906 iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
907 iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
908 inBioseqDB = BioseqDB()
|
|
909 inBioseqDB.setData( [ iBioseq5, iBioseq6 ])
|
|
910
|
|
911 iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
|
|
912 iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
|
|
913 iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
|
|
914 iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
|
|
915
|
|
916 expBioseqDB = BioseqDB()
|
|
917 expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] )
|
|
918
|
|
919 obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB)
|
|
920 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
921
|
|
922
|
|
923 def test_upCase (self):
|
|
924 iBioseq1 = Bioseq("consensus4","atgacGatgca")
|
|
925 iBioseq2 = Bioseq("consensus1","atgcgaT")
|
|
926 obsBioseqDB = BioseqDB()
|
|
927 obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
|
|
928 iBioseq3 = Bioseq("consensus4","ATGACGATGCA")
|
|
929 iBioseq4 = Bioseq("consensus1","ATGCGAT")
|
|
930 expBioseqDB = BioseqDB()
|
|
931 expBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
|
|
932 obsBioseqDB.upCase()
|
|
933 self.assertEquals(expBioseqDB, obsBioseqDB)
|
|
934
|
|
935
|
|
936 def test_getMap(self):
|
|
937 iBioseq1 = Bioseq("header1","ATGC-RA-GCT")
|
|
938 iBioseq2 = Bioseq("header2","-TGC-RA-GCT")
|
|
939 iBioseq3 = Bioseq("header3","ATGC-RA-GC-")
|
|
940
|
|
941 iAlignedBioseqDB = BioseqDB()
|
|
942 iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
|
|
943
|
|
944 obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps()
|
|
945
|
|
946 expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )]
|
|
947 expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )]
|
|
948 expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )]
|
|
949
|
|
950 expDict = {
|
|
951 "header1": expLMap1,
|
|
952 "header2": expLMap2,
|
|
953 "header3": expLMap3
|
|
954 }
|
|
955
|
|
956 self.assertEquals(expDict, obsDict)
|
|
957
|
|
958 def test_getSeqLengthByListOfName(self):
|
|
959 iBioseq1 = Bioseq("header1","ATGC-RA-GCT")
|
|
960 iBioseq2 = Bioseq("header2","-TGC-RAR")
|
|
961 iBioseq3 = Bioseq("header3","ATGC")
|
|
962
|
|
963 iBioseqDB = BioseqDB()
|
|
964 iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
|
|
965
|
|
966 expList = [11, 4]
|
|
967 obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"])
|
|
968
|
|
969 self.assertEquals( expList, obsList )
|
|
970
|
|
971 test_suite = unittest.TestSuite()
|
|
972 test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) )
|
|
973 if __name__ == "__main__":
|
|
974 unittest.TextTestRunner(verbosity=2).run( test_suite )
|