6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 import unittest
|
|
33 import os
|
|
34 import sys
|
|
35 from commons.core.seq.Bioseq import Bioseq
|
|
36 from commons.core.utils.FileUtils import FileUtils
|
|
37 from commons.core.coord.Map import Map
|
18
|
38 from commons.core.checker.RepetException import RepetException
|
6
|
39
|
|
40
|
|
41 class Test_Bioseq( unittest.TestCase ):
|
|
42
|
|
43 def setUp(self):
|
|
44 self._bs = Bioseq()
|
|
45
|
|
46
|
|
47 def test_isEmpty_True(self):
|
|
48 self._bs.setHeader( "" )
|
|
49 self._bs.setSequence( "" )
|
|
50 exp = True
|
|
51 obs = self._bs.isEmpty()
|
|
52 self.assertEquals( exp, obs )
|
|
53
|
|
54
|
|
55 def test_isEmpty_False(self):
|
|
56 self._bs.setHeader( "seq1" )
|
|
57 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
58 exp = False
|
|
59 obs = self._bs.isEmpty()
|
|
60 self.assertEquals( exp, obs )
|
|
61
|
|
62
|
|
63 def test___eq__(self):
|
|
64 self._bs.setHeader( "seq1" )
|
|
65 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
66 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
67 self.assertEquals( self._bs, obs )
|
|
68
|
|
69
|
|
70 def test___ne__Header(self):
|
|
71 self._bs.setHeader( "seq2" )
|
|
72 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
73 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
74 self.assertNotEquals( self._bs, obs )
|
|
75
|
|
76
|
|
77 def test___ne__Sequence(self):
|
|
78 self._bs.setHeader( "seq1" )
|
|
79 self._bs.setSequence( "GGACGATGCAGCATGCGAATGACGAT" )
|
|
80 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
81 self.assertNotEquals( self._bs, obs )
|
|
82
|
|
83
|
|
84 def test_reverse(self):
|
|
85 self._bs.setHeader( "seq1" )
|
|
86 self._bs.setSequence( "TGCGGA" )
|
|
87 exp = "AGGCGT"
|
|
88 self._bs.reverse()
|
|
89 obs = self._bs.sequence
|
|
90 self.assertEqual( obs, exp )
|
|
91
|
|
92
|
|
93 def test_complement(self):
|
|
94 self._bs.setHeader( "seq1" )
|
|
95 self._bs.setSequence( "TGCGGA" )
|
|
96 exp = "ACGCCT"
|
|
97 self._bs.complement()
|
|
98 obs = self._bs.sequence
|
|
99 self.assertEqual( obs, exp )
|
|
100
|
|
101
|
|
102 def test_complement_with_unknown_symbol(self):
|
|
103 self._bs.setHeader( "seq1" )
|
|
104 self._bs.setSequence( "TGCGGAFMRWTYSKVHDBN" )
|
|
105 exp = "ACGCCTNKYWARSMBDHVN"
|
|
106 self._bs.complement()
|
|
107 obs = self._bs.sequence
|
|
108 self.assertEqual( obs, exp )
|
|
109
|
|
110
|
|
111 def test_reverseComplement(self):
|
|
112 self._bs.setHeader( "seq1" )
|
|
113 self._bs.setSequence( "TGCGGA" )
|
|
114 exp = "TCCGCA"
|
|
115 self._bs.reverseComplement()
|
|
116 obs = self._bs.sequence
|
|
117 self.assertEqual( obs, exp )
|
|
118
|
|
119
|
|
120 def test_cleanGap(self):
|
|
121 self._bs.setSequence("-ATTTTGC-AGTC--TTATTCGAG-----GCCATTGCT-")
|
|
122 exp = "ATTTTGCAGTCTTATTCGAGGCCATTGCT"
|
|
123 self._bs.cleanGap()
|
|
124 obs = self._bs.sequence
|
|
125 self.assertEquals( obs, exp )
|
|
126
|
|
127
|
|
128 def test_copyBioseqInstance(self):
|
|
129 self._bs.setHeader( "seq" )
|
|
130 self._bs.setSequence( "TGCGGA" )
|
|
131 obsBioseq = self._bs.copyBioseqInstance()
|
|
132 self.assertEquals(self._bs, obsBioseq)
|
|
133
|
|
134
|
|
135 def test_setFrameInfoOnHeader_without_description(self):
|
|
136 self._bs.setHeader( "seq" )
|
|
137 self._bs.setSequence( "TGCGGA" )
|
|
138 phase = -1
|
|
139 expHeader = "seq_-1"
|
|
140 self._bs.setFrameInfoOnHeader(phase)
|
|
141 self.assertEquals(expHeader, self._bs.header)
|
|
142
|
|
143
|
|
144 def test_setFrameInfoOnHeader_with_description(self):
|
|
145 self._bs.setHeader( "seq description" )
|
|
146 self._bs.setSequence( "TGCGGA" )
|
|
147 phase = -1
|
|
148 expHeader = "seq_-1 description"
|
|
149 self._bs.setFrameInfoOnHeader(phase)
|
|
150 self.assertEquals(expHeader, self._bs.header)
|
|
151
|
|
152
|
|
153 def test_read(self):
|
|
154 faFile = open("dummyFaFile.fa", "w")
|
|
155 faFile.write(">seq1 description1\n")
|
|
156 faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
157 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
158 faFile.close()
|
|
159 expBioseq = Bioseq()
|
|
160 expBioseq.header = "seq1 description1"
|
|
161 expBioseq.sequence = "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
162 obsBioseq = Bioseq()
|
|
163 faFile = open("dummyFaFile.fa", "r")
|
|
164 obsBioseq.read( faFile )
|
|
165 faFile.close()
|
|
166 os.remove("dummyFaFile.fa")
|
|
167 self.assertEquals(expBioseq, obsBioseq)
|
|
168
|
|
169
|
|
170 def test_read_WithEmptyFile(self):
|
|
171 faFile = open("dummyFaFile.fa", "w")
|
|
172 faFile.close()
|
|
173 expBioseq = Bioseq()
|
|
174 expBioseq.header = None
|
|
175 expBioseq.sequence = None
|
|
176 obsBioseq = Bioseq()
|
|
177 faFile = open("dummyFaFile.fa", "r")
|
|
178 obsBioseq.read( faFile )
|
|
179 faFile.close()
|
|
180 os.remove("dummyFaFile.fa")
|
|
181 self.assertEquals(expBioseq, obsBioseq)
|
|
182
|
|
183
|
|
184 def test_read_without_header(self):
|
|
185 faFile = open("dummyFaFile.fa", "w")
|
|
186 faFile.write("seq1 description1\n")
|
|
187 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
188 faFile.close()
|
|
189 expBioseq = Bioseq()
|
|
190 expBioseq.header = ""
|
|
191 expBioseq.sequence = ""
|
|
192 obsBioseq = Bioseq()
|
|
193 faFile = open("dummyFaFile.fa", "r")
|
|
194 obsBioseq.read( faFile )
|
|
195 faFile.close()
|
|
196 os.remove("dummyFaFile.fa")
|
|
197 self.assertEquals(expBioseq, obsBioseq)
|
|
198
|
|
199
|
|
200 def test_read_with_two_consecutive_headers(self):
|
|
201 faFile = open("dummyFaFile.fa", "w")
|
|
202 faFile.write(">seq1 description1\n")
|
|
203 faFile.write(">ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
204 faFile.close()
|
|
205 expBioseq = Bioseq()
|
|
206 expBioseq.header = "seq1 description1"
|
|
207 expBioseq.sequence = ""
|
|
208 obsBioseq = Bioseq()
|
|
209 faFile = open("dummyFaFile.fa", "r")
|
|
210 obsBioseq.read( faFile )
|
|
211 faFile.close()
|
|
212 os.remove("dummyFaFile.fa")
|
|
213 self.assertEquals(expBioseq, obsBioseq)
|
|
214
|
|
215
|
|
216 def test_read_withEmptyLines(self):
|
|
217 faFile = open("dummyFaFile.fa", "w")
|
|
218 faFile.write("\n")
|
|
219 faFile.write(">seq1 description1\n")
|
|
220 faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
221 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
222 faFile.write("\n")
|
|
223 faFile.close()
|
|
224
|
|
225 exp = Bioseq( "seq1 description1", "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" )
|
|
226
|
|
227 obs = Bioseq()
|
|
228 faFile = open("dummyFaFile.fa", "r")
|
|
229 obs.read( faFile )
|
|
230 faFile.close()
|
|
231
|
|
232 os.remove("dummyFaFile.fa")
|
|
233
|
|
234 self.assertEquals( exp, obs )
|
|
235
|
|
236 def test_read_with_70nt_by_line(self):
|
|
237 faFile = open("dummyFaFile.fa", "w")
|
|
238 faFile.write("\n")
|
|
239 faFile.write(">seq1 description1\n")
|
|
240 faFile.write("TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAA\n")
|
|
241 faFile.write("TGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT\n")
|
|
242 faFile.write("\n")
|
|
243 faFile.close()
|
|
244
|
|
245 exp = Bioseq( "seq1 description1", "TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAATGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT" )
|
|
246
|
|
247 obs = Bioseq()
|
|
248 faFile = open("dummyFaFile.fa", "r")
|
|
249 obs.read( faFile )
|
|
250 faFile.close()
|
|
251
|
|
252 os.remove("dummyFaFile.fa")
|
|
253
|
|
254 self.assertEquals( exp, obs )
|
|
255
|
|
256 def test_appendBioseqInFile(self):
|
|
257 obsFaFileName = "dummyFaFile.fa"
|
|
258 obsFaFile = open(obsFaFileName, "w")
|
|
259 obsFaFile.write(">seq1 description1\n")
|
|
260 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
261 obsFaFile.close()
|
|
262
|
|
263 bioseq = Bioseq()
|
|
264 bioseq.header = "seq2 description2"
|
|
265 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
266
|
|
267 expFaFileName = "dummyFaFile2.fa"
|
|
268 expFaFile = open(expFaFileName, "w")
|
|
269 expFaFile.write(">seq1 description1\n")
|
|
270 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
271 expFaFile.write(">seq2 description2\n")
|
|
272 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
273 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
274 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
|
|
275 expFaFile.close()
|
|
276
|
|
277 bioseq.appendBioseqInFile(obsFaFileName)
|
|
278 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
|
|
279 os.remove(obsFaFileName)
|
|
280 os.remove(expFaFileName)
|
|
281
|
|
282
|
|
283 def test_writeABioseqInAFastaFile(self):
|
|
284 obsFaFileName = "dummyFaFile.fa"
|
|
285 obsFaFile = open(obsFaFileName, "w")
|
|
286 obsFaFile.write(">seq1 description1\n")
|
|
287 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
288
|
|
289 bioseq = Bioseq()
|
|
290 bioseq.header = "seq2 description2"
|
|
291 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
292
|
|
293 expFaFileName = "dummyFaFile2.fa"
|
|
294 expFaFile = open(expFaFileName, "w")
|
|
295 expFaFile.write(">seq1 description1\n")
|
|
296 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
297 expFaFile.write(">seq2 description2\n")
|
|
298 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
299 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
300 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
|
|
301 expFaFile.close()
|
|
302
|
|
303 bioseq.writeABioseqInAFastaFile(obsFaFile)
|
|
304 obsFaFile.close()
|
|
305 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
|
|
306 os.remove(obsFaFileName)
|
|
307
|
|
308
|
|
309 def test_writeABioseqInAFastaFileWithOtherHeader(self):
|
|
310 obsFaFileName = "dummyFaFile.fa"
|
|
311 obsFaFile = open(obsFaFileName, "w")
|
|
312 obsFaFile.write(">seq1 description1\n")
|
|
313 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
314
|
|
315 bioseq = Bioseq()
|
|
316 bioseq.header = "seq2 description2"
|
|
317 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
318
|
|
319 expFaFileName = "dummyFaFile2.fa"
|
|
320 newHeader = "seq2 New header2"
|
|
321 expFaFile = open(expFaFileName, "w")
|
|
322 expFaFile.write(">seq1 description1\n")
|
|
323 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
324 expFaFile.write(">" + newHeader + "\n")
|
|
325 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
326 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
327 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
|
|
328 expFaFile.close()
|
|
329
|
|
330 bioseq.writeABioseqInAFastaFileWithOtherHeader(obsFaFile, newHeader)
|
|
331 obsFaFile.close()
|
|
332 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
|
|
333 os.remove(obsFaFileName)
|
|
334 os.remove(expFaFileName)
|
|
335
|
|
336
|
|
337 def test_writeSeqInFasta(self):
|
|
338 iBs = Bioseq()
|
|
339 iBs.header = "dummySeq"
|
|
340 iBs.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
341
|
|
342 expFaFile = "dummyExpFile.fa"
|
|
343 expFaFileHandler = open(expFaFile, "w")
|
|
344 expFaFileHandler.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
345 expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
346 expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCG\n")
|
|
347 expFaFileHandler.close()
|
|
348
|
|
349 obsFaFile = "dummyObsFile.fa"
|
|
350 obsFaFileHandler = open( obsFaFile, "w" )
|
|
351
|
|
352 iBs.writeSeqInFasta( obsFaFileHandler )
|
|
353
|
|
354 obsFaFileHandler.close()
|
|
355
|
|
356 self.assertTrue( FileUtils.are2FilesIdentical( expFaFile, obsFaFile ) )
|
|
357 os.remove(obsFaFile)
|
|
358 os.remove(expFaFile)
|
|
359
|
|
360
|
|
361 def test_subseq(self):
|
|
362 bioseq = Bioseq()
|
|
363 bioseq.header = "seq1 description1"
|
|
364 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
365 start = 10
|
|
366 end = 30
|
|
367 expSubBioseq = Bioseq()
|
|
368 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end)
|
|
369 expSubBioseq.sequence = bioseq.sequence[(start - 1) : end]
|
|
370 obsBioseq = bioseq.subseq(start, end)
|
|
371 self.assertEquals(expSubBioseq, obsBioseq)
|
|
372
|
|
373
|
|
374 def test_subseq_no_end(self):
|
|
375 bioseq = Bioseq()
|
|
376 bioseq.header = "seq1 description1"
|
|
377 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
378 start = 10
|
|
379 expSubBioseq = Bioseq()
|
|
380 expEnd = len(bioseq.sequence)
|
|
381 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(expEnd)
|
|
382 expSubBioseq.sequence = bioseq.sequence[(start - 1) : expEnd]
|
|
383 obsBioseq = bioseq.subseq(start)
|
|
384 self.assertEquals(expSubBioseq, obsBioseq)
|
|
385
|
|
386
|
|
387 def test_subseq_start_gt_end(self):
|
|
388 bioseq = Bioseq()
|
|
389 bioseq.header = "seq1 description1"
|
|
390 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
391 start = 30
|
|
392 end = 10
|
|
393 expSubBioseq = None
|
|
394 obsBioseq = bioseq.subseq(start, end)
|
|
395 self.assertEquals(expSubBioseq, obsBioseq)
|
|
396
|
|
397
|
|
398 def test_subseq_start_eq_end(self):
|
|
399 bioseq = Bioseq()
|
|
400 bioseq.header = "seq1 description1"
|
|
401 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
402 start = 10
|
|
403 end = 10
|
|
404 expSubBioseq = Bioseq()
|
|
405 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end)
|
|
406 expSubBioseq.sequence = bioseq.sequence[(start - 1) : end]
|
|
407 obsBioseq = bioseq.subseq(start, end)
|
|
408 self.assertEquals(expSubBioseq, obsBioseq)
|
|
409
|
|
410
|
|
411 def test_subseq_negative_start(self):
|
|
412 bioseq = Bioseq()
|
|
413 bioseq.header = "seq1 description1"
|
|
414 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
415 start = -10
|
|
416 end = 10
|
|
417 expSubBioseq = None
|
|
418 obsBioseq = bioseq.subseq(start, end)
|
|
419 self.assertEquals(expSubBioseq, obsBioseq)
|
|
420
|
|
421
|
|
422 def test_getNtFromPosition_1(self):
|
|
423 bioseq = Bioseq()
|
|
424 bioseq.header = "seq1 description1"
|
|
425 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
426 expNt = "G"
|
|
427 obsNt = bioseq.getNtFromPosition(1)
|
|
428 self.assertEquals(expNt, obsNt)
|
|
429
|
|
430
|
|
431 def test_getNtFromPosition_10(self):
|
|
432 bioseq = Bioseq()
|
|
433 bioseq.header = "seq1 description1"
|
|
434 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
435 expNt = "C"
|
|
436 obsNt = bioseq.getNtFromPosition(10)
|
|
437 self.assertEquals(expNt, obsNt)
|
|
438
|
|
439
|
|
440 def test_getNtFromPosition_last(self):
|
|
441 bioseq = Bioseq()
|
|
442 bioseq.header = "seq1 description1"
|
|
443 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
444 expNt = "G"
|
|
445 obsNt = bioseq.getNtFromPosition(146)
|
|
446 self.assertEquals(expNt, obsNt)
|
|
447
|
|
448
|
|
449 def test_getNtFromPosition_position_outside_range_0(self):
|
|
450 bioseq = Bioseq()
|
|
451 bioseq.header = "seq1 description1"
|
|
452 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
453 expNt = None
|
|
454 obsNt = bioseq.getNtFromPosition(0)
|
|
455 self.assertEquals(expNt, obsNt)
|
|
456
|
|
457
|
|
458 def test_getNtFromPosition_position_outside_range_negative(self):
|
|
459 bioseq = Bioseq()
|
|
460 bioseq.header = "seq1 description1"
|
|
461 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
462 expNt = None
|
|
463 obsNt = bioseq.getNtFromPosition(-10)
|
|
464 self.assertEquals(expNt, obsNt)
|
|
465
|
|
466
|
|
467 def test_getNtFromPosition_position_outside_range_positive(self):
|
|
468 bioseq = Bioseq()
|
|
469 bioseq.header = "seq1 description1"
|
|
470 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
471 expNt = None
|
|
472 obsNt = bioseq.getNtFromPosition(147)
|
|
473 self.assertEquals(expNt, obsNt)
|
|
474
|
|
475
|
|
476 def test_view(self):
|
|
477 obsFileName = "obsdummy_Bioseq_view"
|
|
478 expFileName = "expDummy_Bioseq_View"
|
|
479
|
|
480 bioseq = Bioseq()
|
|
481 bioseq.header = "seq1 description1"
|
|
482 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
483
|
|
484 obsFile = open(obsFileName,"w")
|
|
485 expFile = open(expFileName, "w")
|
|
486
|
|
487 expFile.write ( ">seq1 description1\n")
|
|
488 expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
489 expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
490 expFile.write ( "ATAGCAGACGCATATTATATTGCGCG\n")
|
|
491
|
|
492 stdoutRef = sys.stdout
|
|
493 sys.stdout = obsFile
|
|
494 bioseq.view()
|
|
495 obsFile.close()
|
|
496 expFile.close()
|
|
497 self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
|
|
498 sys.stdout = stdoutRef
|
|
499 os.remove ( obsFileName )
|
|
500 os.remove ( expFileName )
|
|
501
|
|
502
|
|
503 def test_view_with_l(self):
|
|
504 obsFileName = "obsdummy_Bioseq_view"
|
|
505 expFileName = "expDummy_Bioseq_View"
|
|
506 bioseq = Bioseq()
|
|
507 bioseq.header = "seq1 description1"
|
|
508 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
509 obsFile = open(obsFileName,"w")
|
|
510 expFile = open(expFileName, "w")
|
|
511 expFile.write ( ">seq1 description1\n")
|
|
512 expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
513 expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
514 stdoutRef = sys.stdout
|
|
515 sys.stdout = obsFile
|
|
516 bioseq.view(120)
|
|
517 obsFile.close()
|
|
518 expFile.close()
|
|
519 self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
|
|
520 sys.stdout = stdoutRef
|
|
521 os.remove ( obsFileName )
|
|
522 os.remove ( expFileName )
|
|
523
|
|
524
|
|
525 def test_getLength(self):
|
|
526 bioseq = Bioseq()
|
|
527 bioseq.header = "seq1 description1"
|
|
528 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
529 expLength = 146
|
|
530 obsLength = bioseq.getLength()
|
|
531 self.assertEquals(expLength, obsLength)
|
|
532
|
|
533
|
|
534 def test_getLength_empty_seq(self):
|
|
535 bioseq = Bioseq()
|
|
536 expLength = 0
|
|
537 obsLength = bioseq.getLength()
|
|
538 self.assertEquals(expLength, obsLength)
|
|
539
|
|
540
|
|
541 def test_getLength_WithoutN(self):
|
|
542 bioseq = Bioseq()
|
|
543 bioseq.header = "seq1 description1"
|
|
544 bioseq.sequence = "GCGANCGCTGCTTTATTAAGCGCTAGATGNNNNNNNNNNNNNNNCGACGCTGCATTTATTAAGCGCTAGCGATTATANNNNNNNNNTAGCAGACGCATATTATATTGCGCGATGCGACGCTGCTTTATTANAGCGCTAGCGNNATTATATAGCANGACGCATATTATATTGCGCG"
|
|
545 expLength = 146
|
|
546 obsLength = bioseq.getLength(False)
|
|
547 self.assertEquals(expLength, obsLength)
|
|
548
|
|
549
|
|
550 def test_getLength_WithoutN_empty_seq(self):
|
|
551 bioseq = Bioseq()
|
|
552 expLength = 0
|
|
553 obsLength = bioseq.getLength(False)
|
|
554 self.assertEquals(expLength, obsLength)
|
|
555
|
|
556
|
|
557 def test_countNt(self):
|
|
558 bioseq = Bioseq()
|
|
559 bioseq.header = "seq1 description1"
|
|
560 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
561 expCount = 3
|
|
562 obsCount = bioseq.countNt('N')
|
|
563 self.assertEquals(expCount, obsCount)
|
|
564
|
|
565
|
|
566 def test_countNt_withCharacterNotExisting(self):
|
|
567 bioseq = Bioseq()
|
|
568 bioseq.header = "seq1 description1"
|
|
569 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
570 expCount = 0
|
|
571 obsCount = bioseq.countNt('W')
|
|
572 self.assertEquals(expCount, obsCount)
|
|
573
|
|
574
|
|
575 def test_countAllNt(self):
|
|
576 bioseq = Bioseq()
|
|
577 bioseq.header = "seq1 description1"
|
|
578 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
579 dExpCount = {'A': 34, 'C': 31, 'T': 43, 'G': 35, 'N': 3}
|
|
580 dObsCount = bioseq.countAllNt()
|
|
581 self.assertEquals(dExpCount, dObsCount)
|
|
582
|
|
583
|
|
584 def test_occ_word_size_1(self):
|
|
585 bioseq = Bioseq()
|
|
586 bioseq.header = "seq1 description1"
|
|
587 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
588 dExpOccWord = {'A': 34, 'C': 31, 'T': 43, 'G': 35}
|
|
589 ExpNbWord = 143
|
|
590 dObsOccWord, ObsNbWord = bioseq.occ_word(1)
|
|
591 self.assertEquals(dExpOccWord, dObsOccWord)
|
|
592 self.assertEquals(ExpNbWord, ObsNbWord)
|
|
593
|
|
594
|
|
595 def test_occ_word_size_0(self):
|
|
596 bioseq = Bioseq()
|
|
597 bioseq.header = "seq1 description1"
|
|
598 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
599 dExpOccWord = {}
|
|
600 ExpNbWord = 0
|
|
601 dObsOccWord, ObsNbWord = bioseq.occ_word(0)
|
|
602 self.assertEquals(dExpOccWord, dObsOccWord)
|
|
603 self.assertEquals(ExpNbWord, ObsNbWord)
|
|
604
|
|
605
|
|
606 def test_occ_word_size_n(self):
|
|
607 bioseq = Bioseq()
|
|
608 bioseq.header = "seq1 description1"
|
|
609 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
610 dExpOccWord = {'ACC': 0, 'ATG': 2, 'AAG': 3, 'AAA': 0, 'ATC': 0, 'AAC': 0, 'ATA': 8, 'AGG': 0, 'CCT': 0, 'CTC': 0, 'AGC': 8, 'ACA': 0, 'AGA': 2, 'CAT': 3, 'AAT': 0, 'ATT': 9, 'CTG': 3, 'CTA': 3, 'ACT': 0, 'CAC': 0, 'ACG': 2, 'CAA': 0, 'AGT': 0, 'CAG': 2, 'CCG': 0, 'CCC': 0, 'CTT': 3, 'TAT': 13, 'GGT': 0, 'TGT': 0, 'CGA': 3, 'CCA': 0, 'TCT': 0, 'GAT': 3, 'CGG': 0, 'TTT': 3, 'TGC': 7, 'GGG': 0, 'TAG': 5, 'GGA': 0, 'TAA': 3, 'GGC': 0, 'TAC': 0, 'TTC': 0, 'TCG': 0, 'TTA': 10, 'TTG': 2, 'TCC': 0, 'GAA': 0, 'TGG': 0, 'GCA': 5, 'GTA': 0, 'GCC': 0, 'GTC': 0, 'GCG': 12, 'GTG': 0, 'GAG': 0, 'GTT': 0, 'GCT': 9, 'TGA': 0, 'GAC': 2, 'CGT': 0, 'TCA': 0, 'CGC': 10}
|
|
611 ExpNbWord = 135
|
|
612 dObsOccWord, ObsNbWord = bioseq.occ_word(3)
|
|
613 self.assertEquals(dExpOccWord, dObsOccWord)
|
|
614 self.assertEquals(ExpNbWord, ObsNbWord)
|
|
615
|
|
616
|
|
617 def test_freq_word_size_1(self):
|
|
618 bioseq = Bioseq()
|
|
619 bioseq.header = "seq1 description1"
|
|
620 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
621 dExpFreqWord = {'A': 0.23776223776223776, 'C': 0.21678321678321677, 'T': 0.30069930069930068, 'G': 0.24475524475524477}
|
|
622 dObsFreqWord = bioseq.freq_word(1)
|
|
623 self.assertEquals(dExpFreqWord, dObsFreqWord)
|
|
624
|
|
625
|
|
626 def test_freq_word_size_0(self):
|
|
627 bioseq = Bioseq()
|
|
628 bioseq.header = "seq1 description1"
|
|
629 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
630 dExpFreqWord = {}
|
|
631 dObsFreqWord = bioseq.freq_word(0)
|
|
632 self.assertEquals(dExpFreqWord, dObsFreqWord)
|
|
633
|
|
634
|
|
635 def test_freq_word_size_n(self):
|
|
636 bioseq = Bioseq()
|
|
637 bioseq.header = "seq1 description1"
|
|
638 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
639 dExpFreqWord = {'ACC': 0.0, 'ATG': 0.014814814814814815, 'AAG': 0.022222222222222223, 'AAA': 0.0, 'ATC': 0.0, 'AAC': 0.0, 'ATA': 0.059259259259259262, 'AGG': 0.0, 'CCT': 0.0, 'CTC': 0.0, 'AGC': 0.059259259259259262, 'ACA': 0.0, 'AGA': 0.014814814814814815, 'CAT': 0.022222222222222223, 'AAT': 0.0, 'ATT': 0.066666666666666666, 'CTG': 0.022222222222222223, 'CTA': 0.022222222222222223, 'ACT': 0.0, 'CAC': 0.0, 'ACG': 0.014814814814814815, 'CAA': 0.0, 'AGT': 0.0, 'CAG': 0.014814814814814815, 'CCG': 0.0, 'CCC': 0.0, 'TAT': 0.096296296296296297, 'GGT': 0.0, 'TGT': 0.0, 'CGA': 0.022222222222222223, 'CCA': 0.0, 'TCT': 0.0, 'GAT': 0.022222222222222223, 'CGG': 0.0, 'CTT': 0.022222222222222223, 'TGC': 0.05185185185185185, 'GGG': 0.0, 'TAG': 0.037037037037037035, 'GGA': 0.0, 'TAA': 0.022222222222222223, 'GGC': 0.0, 'TAC': 0.0, 'TTC': 0.0, 'TCG': 0.0, 'TTT': 0.022222222222222223, 'TTG': 0.014814814814814815, 'TCC': 0.0, 'GAA': 0.0, 'TGG': 0.0, 'GCA': 0.037037037037037035, 'GTA': 0.0, 'GCC': 0.0, 'GTC': 0.0, 'TGA': 0.0, 'GCG': 0.088888888888888892, 'GTG': 0.0, 'GAG': 0.0, 'GTT': 0.0, 'GCT': 0.066666666666666666, 'TTA': 0.07407407407407407, 'GAC': 0.014814814814814815, 'CGT': 0.0, 'TCA': 0.0, 'CGC': 0.07407407407407407}
|
|
640 dObsFreqWord = bioseq.freq_word(3)
|
|
641 self.assertEquals(dExpFreqWord, dObsFreqWord)
|
|
642
|
|
643
|
|
644 def test_findORF_no_ORF_in_sequence (self):
|
|
645 bioseq = Bioseq()
|
|
646 bioseq.header = "seq1 description1"
|
|
647 bioseq.sequence = "GCGNCGCTGCTTTATT"
|
|
648 expORF = {0:[],1:[],2:[]}
|
|
649 obsORF = bioseq.findORF()
|
|
650 self.assertEquals (expORF,obsORF)
|
|
651
|
|
652
|
|
653 def test_findORF_one_ORF_in_first_phase(self):
|
|
654 bioseq = Bioseq()
|
|
655 bioseq.header = "seq1 description1"
|
|
656 bioseq.sequence = "TAAGCGNCGCTGCTTTATT"
|
|
657 expORF = {0:[0],1:[],2:[]}
|
|
658 obsORF = bioseq.findORF()
|
|
659 self.assertEquals (expORF,obsORF)
|
|
660
|
|
661
|
|
662 def test_findORF_three_ORF_in_first_phase(self):
|
|
663 bioseq = Bioseq()
|
|
664 bioseq.header = "seq1 description1"
|
|
665 bioseq.sequence = "TAAGCGTAGNCGTGACTGCTTTATT"
|
|
666 expORF = {0:[0,6,12],1:[],2:[]}
|
|
667 obsORF = bioseq.findORF()
|
|
668 self.assertEquals (expORF,obsORF)
|
|
669
|
|
670
|
|
671 def test_findORF_two_ORF_in_first_phase_one_ORF_in_second_phase(self):
|
|
672 bioseq = Bioseq()
|
|
673 bioseq.header = "seq1 description1"
|
|
674 bioseq.sequence = "TAAGTAGAGNCGTGACTGCTTTATT"
|
|
675 expORF = {0:[0,12],1:[4],2:[]}
|
|
676 obsORF = bioseq.findORF()
|
|
677 self.assertEquals (expORF,obsORF)
|
|
678
|
|
679
|
|
680 def test_findORF_two_ORF_in_first_phase_three_ORF_in_second_phase(self):
|
|
681 bioseq = Bioseq()
|
|
682 bioseq.header = "seq1 description1"
|
|
683 bioseq.sequence = "TAAGTAGAGNCGTGACTGATAGTATT"
|
|
684 expORF = {0:[0,12],1:[4,16,19],2:[]}
|
|
685 obsORF = bioseq.findORF()
|
|
686 self.assertEquals (expORF,obsORF)
|
|
687
|
|
688
|
|
689 def test_findORF_one_ORF_in_second_phase_three_ORF_in_third_phase(self):
|
|
690 bioseq = Bioseq()
|
|
691 bioseq.header = "seq1 description1"
|
|
692 bioseq.sequence = "AATATTAGTGGAGTAGTTGATGATTTT"
|
|
693 expORF = {0:[], 1:[13], 2:[5,17,20]}
|
|
694 obsORF = bioseq.findORF()
|
|
695 self.assertEquals (expORF,obsORF)
|
|
696
|
|
697
|
|
698 def test_findORF_three_ORF_in_second_phase_one_ORF_in_third_phase(self):
|
|
699 bioseq = Bioseq()
|
|
700 bioseq.header = "seq1 description1"
|
|
701 bioseq.sequence = "TTTGAAGTGGAGGAGTTGATGATTTTAAT"
|
|
702 expORF = {0:[], 1:[16, 19, 25], 2:[2]}
|
|
703 obsORF = bioseq.findORF()
|
|
704 self.assertEquals (expORF,obsORF)
|
|
705
|
|
706
|
|
707 def test_upCase(self):
|
|
708 bioseq = Bioseq()
|
|
709 bioseq.header = "seq description"
|
|
710 bioseq.sequence = "taattcggcct"
|
|
711 expSeq = "TAATTCGGCCT"
|
|
712 bioseq.upCase()
|
|
713 obsSeq = bioseq.sequence
|
|
714 self.assertEquals( expSeq, obsSeq )
|
|
715
|
|
716
|
|
717 def test_lowCase(self):
|
|
718 bioseq = Bioseq()
|
|
719 bioseq.header = "seq description"
|
|
720 bioseq.sequence = "TAATTCGGCCT"
|
|
721 expSeq = "taattcggcct"
|
|
722 bioseq.lowCase()
|
|
723 obsSeq = bioseq.sequence
|
|
724 self.assertEquals( expSeq, obsSeq )
|
|
725
|
|
726
|
|
727 def test_getClusterID(self):
|
|
728 bioseq = Bioseq()
|
|
729 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
|
|
730 bioseq.sequence = "TAATTCGGCCT"
|
|
731 expID = "0"
|
|
732 obsID = bioseq.getClusterID()
|
|
733 self.assertEquals( expID, obsID )
|
|
734
|
|
735
|
|
736 def test_getGroupID(self):
|
|
737 bioseq = Bioseq()
|
|
738 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
|
|
739 bioseq.sequence = "TAATTCGGCCT"
|
|
740 expID = "2"
|
|
741 obsID = bioseq.getGroupID()
|
|
742 self.assertEquals( expID, obsID )
|
|
743
|
|
744
|
|
745 def test_getHeaderFullSeq(self):
|
|
746 bioseq = Bioseq()
|
|
747 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
|
|
748 bioseq.sequence = "TAATTCGGCCT"
|
|
749 expHeader = "chunk1"
|
|
750 obsHeader = bioseq.getHeaderFullSeq()
|
|
751 self.assertEquals( expHeader, obsHeader )
|
|
752
|
|
753
|
|
754 def test_getFragStrand_plus_strand(self):
|
|
755 bioseq = Bioseq()
|
|
756 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
|
|
757 bioseq.sequence = "TAATTCGGCCT"
|
|
758 expStrand = '+'
|
|
759 obsStrand = bioseq.getFragStrand()
|
|
760 self.assertEquals(expStrand, obsStrand)
|
|
761
|
|
762
|
|
763 def test_getFragStrand_minus_strand(self):
|
|
764 bioseq = Bioseq()
|
|
765 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74624..74091"
|
|
766 bioseq.sequence = "TAATTCGGCCT"
|
|
767 expStrand = '-'
|
|
768 obsStrand = bioseq.getFragStrand()
|
|
769 self.assertEquals(expStrand, obsStrand)
|
|
770
|
|
771
|
|
772 def test_getATGCNFromIUPAC_A(self):
|
|
773 bioseq = Bioseq()
|
|
774 expNucl = 'A'
|
|
775 obsNucl = bioseq.getATGCNFromIUPAC('A')
|
|
776 self.assertEquals(expNucl, obsNucl)
|
|
777
|
|
778
|
|
779 def test_getATGCNFromIUPAC_T(self):
|
|
780 bioseq = Bioseq()
|
|
781 expNucl = 'T'
|
|
782 obsNucl = bioseq.getATGCNFromIUPAC('T')
|
|
783 self.assertEquals(expNucl, obsNucl)
|
|
784
|
|
785
|
|
786 def test_getATGCNFromIUPAC_C(self):
|
|
787 bioseq = Bioseq()
|
|
788 expNucl = 'C'
|
|
789 obsNucl = bioseq.getATGCNFromIUPAC('C')
|
|
790 self.assertEquals(expNucl, obsNucl)
|
|
791
|
|
792
|
|
793 def test_getATGCNFromIUPAC_G(self):
|
|
794 bioseq = Bioseq()
|
|
795 expNucl = 'G'
|
|
796 obsNucl = bioseq.getATGCNFromIUPAC('G')
|
|
797 self.assertEquals(expNucl, obsNucl)
|
|
798
|
|
799
|
|
800 def test_getATGCNFromIUPAC_N(self):
|
|
801 bioseq = Bioseq()
|
|
802 expNucl = 'N'
|
|
803 obsNucl = bioseq.getATGCNFromIUPAC('N')
|
|
804 self.assertEquals(expNucl, obsNucl)
|
|
805
|
|
806
|
|
807 def test_getATGCNFromIUPAC_U(self):
|
|
808 bioseq = Bioseq()
|
|
809 expNucl = 'T'
|
|
810 obsNucl = bioseq.getATGCNFromIUPAC('U')
|
|
811 self.assertEquals(expNucl, obsNucl)
|
|
812
|
|
813
|
|
814 def test_getATGCNFromIUPAC_R(self):
|
|
815 bioseq = Bioseq()
|
|
816 expNucl1 = 'A'
|
|
817 expNucl2 = 'G'
|
|
818 obsNucl = bioseq.getATGCNFromIUPAC('R')
|
|
819 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
820
|
|
821
|
|
822 def test_getATGCNFromIUPAC_Y(self):
|
|
823 bioseq = Bioseq()
|
|
824 expNucl1 = 'C'
|
|
825 expNucl2 = 'T'
|
|
826 obsNucl = bioseq.getATGCNFromIUPAC('Y')
|
|
827 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
828
|
|
829
|
|
830 def test_getATGCNFromIUPAC_M(self):
|
|
831 bioseq = Bioseq()
|
|
832 expNucl1 = 'C'
|
|
833 expNucl2 = 'A'
|
|
834 obsNucl = bioseq.getATGCNFromIUPAC('M')
|
|
835 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
836
|
|
837
|
|
838 def test_getATGCNFromIUPAC_K(self):
|
|
839 bioseq = Bioseq()
|
|
840 expNucl1 = 'T'
|
|
841 expNucl2 = 'G'
|
|
842 obsNucl = bioseq.getATGCNFromIUPAC('K')
|
|
843 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
844
|
|
845
|
|
846 def test_getATGCNFromIUPAC_W(self):
|
|
847 bioseq = Bioseq()
|
|
848 expNucl1 = 'T'
|
|
849 expNucl2 = 'A'
|
|
850 obsNucl = bioseq.getATGCNFromIUPAC('W')
|
|
851 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
852
|
|
853
|
|
854 def test_getATGCNFromIUPAC_S(self):
|
|
855 bioseq = Bioseq()
|
|
856 expNucl1 = 'C'
|
|
857 expNucl2 = 'G'
|
|
858 obsNucl = bioseq.getATGCNFromIUPAC('S')
|
|
859 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
860
|
|
861
|
|
862 def test_getATGCNFromIUPAC_B(self):
|
|
863 bioseq = Bioseq()
|
|
864 expNucl1 = 'C'
|
|
865 expNucl2 = 'T'
|
|
866 expNucl3 = 'G'
|
|
867 obsNucl = bioseq.getATGCNFromIUPAC('B')
|
|
868 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
|
|
869
|
|
870
|
|
871 def test_getATGCNFromIUPAC_D(self):
|
|
872 bioseq = Bioseq()
|
|
873 expNucl1 = 'A'
|
|
874 expNucl2 = 'T'
|
|
875 expNucl3 = 'G'
|
|
876 obsNucl = bioseq.getATGCNFromIUPAC('D')
|
|
877 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
|
|
878
|
|
879
|
|
880 def test_getATGCNFromIUPAC_H(self):
|
|
881 bioseq = Bioseq()
|
|
882 expNucl1 = 'C'
|
|
883 expNucl2 = 'T'
|
|
884 expNucl3 = 'A'
|
|
885 obsNucl = bioseq.getATGCNFromIUPAC('H')
|
|
886 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
|
|
887
|
|
888
|
|
889 def test_getATGCNFromIUPAC_V(self):
|
|
890 bioseq = Bioseq()
|
|
891 expNucl1 = 'C'
|
|
892 expNucl2 = 'A'
|
|
893 expNucl3 = 'G'
|
|
894 obsNucl = bioseq.getATGCNFromIUPAC('V')
|
|
895 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
|
|
896
|
|
897
|
|
898 def test_getATGCNFromIUPAC_Z(self):
|
|
899 bioseq = Bioseq()
|
|
900 expNucl = 'N'
|
|
901 obsNucl = bioseq.getATGCNFromIUPAC('Z')
|
|
902 self.assertEquals(expNucl, obsNucl)
|
|
903
|
|
904
|
18
|
905 def test_getATGCNFromIUPACandATGCN(self):
|
|
906 self.assertEquals("G", Bioseq().getATGCNFromIUPACandATGCN('R', 'A'))
|
|
907 self.assertEquals("A", Bioseq().getATGCNFromIUPACandATGCN('R', 'G'))
|
|
908 self.assertEquals("C", Bioseq().getATGCNFromIUPACandATGCN('Y', 'T'))
|
|
909 self.assertEquals("T", Bioseq().getATGCNFromIUPACandATGCN('Y', 'C'))
|
|
910 self.assertEquals("C", Bioseq().getATGCNFromIUPACandATGCN('M', 'A'))
|
|
911 self.assertEquals("A", Bioseq().getATGCNFromIUPACandATGCN('M', 'C'))
|
|
912 self.assertEquals("T", Bioseq().getATGCNFromIUPACandATGCN('K', 'G'))
|
|
913 self.assertEquals("G", Bioseq().getATGCNFromIUPACandATGCN('K', 'T'))
|
|
914 self.assertEquals("A", Bioseq().getATGCNFromIUPACandATGCN('W', 'T'))
|
|
915 self.assertEquals("T", Bioseq().getATGCNFromIUPACandATGCN('W', 'A'))
|
|
916 self.assertEquals("G", Bioseq().getATGCNFromIUPACandATGCN('S', 'C'))
|
|
917 self.assertEquals("C", Bioseq().getATGCNFromIUPACandATGCN('S', 'G'))
|
|
918
|
|
919 def test_getATGCNFromIUPACandATGCN_invalid_combination(self):
|
|
920 obsMsg = ""
|
|
921 try:
|
|
922 Bioseq().getATGCNFromIUPACandATGCN('R', 'T')
|
|
923 except RepetException as e:
|
|
924 obsMsg = e.getMessage()
|
|
925
|
|
926 self.assertEqual("IUPAC code 'R' and nucleotide 'T' are not compatible", obsMsg)
|
|
927
|
|
928 def test_getATGCNFromIUPACandATGCN_invalid_IUPAC(self):
|
|
929 bioseq = Bioseq()
|
|
930 obsMsg = ""
|
|
931 try:
|
|
932 bioseq.getATGCNFromIUPACandATGCN('B', 'T')
|
|
933 except RepetException as e:
|
|
934 obsMsg = e.getMessage()
|
|
935
|
|
936 self.assertEqual("Can't retrieve the third nucleotide from IUPAC code 'B' and nucleotide 'T'", obsMsg)
|
|
937
|
6
|
938 def test_partialIUPAC(self):
|
|
939 bioseq = Bioseq()
|
|
940 bioseq.sequence = "ATGCNRATGCN"
|
|
941 expSequence1 = "ATGCNAATGCN"
|
|
942 expSequence2 = "ATGCNGATGCN"
|
|
943 bioseq.partialIUPAC()
|
|
944 obsSequence = bioseq.sequence
|
|
945 self.assertTrue(expSequence1 == obsSequence or expSequence2 == obsSequence)
|
|
946
|
|
947
|
|
948 def test_checkEOF(self):
|
|
949 bioseq = Bioseq()
|
|
950 bioseq.sequence = "ATGCNRATGCN\rATGCAAT\rTATA\r"
|
|
951 bioseq.checkEOF()
|
|
952 obsSequence = bioseq.sequence
|
|
953 expSequence = "ATGCNRATGCNATGCAATTATA"
|
|
954
|
|
955 self.assertEquals(expSequence, obsSequence)
|
|
956
|
|
957
|
|
958 def test_getLMapWhithoutGap(self):
|
|
959 iBioseq = Bioseq()
|
|
960 iBioseq.header = "header"
|
|
961 iBioseq.sequence = "ATGC-RA-GCT"
|
|
962 obsLMap = iBioseq.getLMapWhithoutGap()
|
|
963 expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]
|
|
964
|
|
965 self.assertEquals(expLMap, obsLMap)
|
|
966
|
|
967
|
|
968 def test_getLMapWhithoutGap_seqStartsWithGap(self):
|
|
969 iBioseq = Bioseq()
|
|
970 iBioseq.header = "header"
|
|
971 iBioseq.sequence = "-TGC-RA-GCT"
|
|
972 obsLMap = iBioseq.getLMapWhithoutGap()
|
|
973 expLMap = [Map( "header_subSeq1", "header", 2, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]
|
|
974
|
|
975 self.assertEquals(expLMap, obsLMap)
|
|
976
|
|
977
|
|
978 def test_getLMapWhithoutGap_seqEndsWithGap(self):
|
|
979 iBioseq = Bioseq()
|
|
980 iBioseq.header = "header"
|
|
981 iBioseq.sequence = "ATGC-RA-GC-"
|
|
982 obsLMap = iBioseq.getLMapWhithoutGap()
|
|
983 expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 10 )]
|
|
984
|
|
985 self.assertEquals(expLMap, obsLMap)
|
|
986
|
|
987 def test_getGCpercentage_onlyATGC( self ):
|
|
988 iBs = Bioseq( "seq", "TGCAGCT" )
|
|
989 exp = 100 * 4 / 7.0
|
|
990 obs = iBs.getGCpercentage()
|
|
991 self.assertEqual( exp, obs )
|
|
992
|
|
993 def test_getGCpercentageInSequenceWithoutCountNInLength( self ):
|
|
994 iBs = Bioseq( "seq", "TGCAGCTNNNNN" )
|
|
995 exp = 100 * 4 / 7.0
|
|
996 obs = iBs.getGCpercentageInSequenceWithoutCountNInLength()
|
|
997 self.assertEqual( exp, obs )
|
|
998
|
|
999 def test_get5PrimeFlank(self):
|
|
1000 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1001 position = 7
|
|
1002 obsFlank = bs.get5PrimeFlank(position, 3)
|
|
1003 expFlank = "TTT"
|
|
1004 self.assertEquals(expFlank, obsFlank)
|
|
1005
|
|
1006 def test_get5PrimeFlank_flank_length_truncated(self):
|
|
1007 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1008 position = 7
|
|
1009 obsFlank = bs.get5PrimeFlank(position, 15)
|
|
1010 expFlank = "AACTTT"
|
|
1011 self.assertEquals(expFlank, obsFlank)
|
|
1012
|
|
1013 def test_get5PrimeFlank_flank_of_first_base(self):
|
|
1014 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1015 position = 1
|
|
1016 obsFlank = bs.get5PrimeFlank(position, 15)
|
|
1017 expFlank = ""
|
|
1018 self.assertEquals(expFlank, obsFlank)
|
|
1019
|
|
1020 def test_get3PrimeFlank(self):
|
|
1021 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1022 position = 7
|
|
1023 obsFlank = bs.get3PrimeFlank(position, 3)
|
|
1024 expFlank = "CAG"
|
|
1025 self.assertEquals(expFlank, obsFlank)
|
|
1026
|
|
1027 def test_get3PrimeFlank_flank_length_truncated(self):
|
|
1028 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1029 position = 7
|
|
1030 obsFlank = bs.get3PrimeFlank(position, 15)
|
|
1031 expFlank = "CAGAA"
|
|
1032 self.assertEquals(expFlank, obsFlank)
|
|
1033
|
|
1034 def test_get3PrimeFlank_flank_of_last_base(self):
|
|
1035 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1036 position = 12
|
|
1037 obsFlank = bs.get3PrimeFlank(position, 15)
|
|
1038 expFlank = ""
|
|
1039 self.assertEquals(expFlank, obsFlank)
|
|
1040
|
|
1041 def test_get3PrimeFlank_polymLength_different_of_1(self):
|
|
1042 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1043 position = 7
|
|
1044 obsFlank = bs.get3PrimeFlank(position, 3, 2)
|
|
1045 expFlank = "AGA"
|
|
1046 self.assertEquals(expFlank, obsFlank)
|
|
1047
|
|
1048 test_suite = unittest.TestSuite()
|
|
1049 test_suite.addTest( unittest.makeSuite( Test_Bioseq ) )
|
|
1050 if __name__ == "__main__":
|
|
1051 unittest.TextTestRunner(verbosity=2).run( test_suite )
|