6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 import unittest
|
|
33 import os
|
|
34 import sys
|
|
35 from commons.core.seq.Bioseq import Bioseq
|
|
36 from commons.core.utils.FileUtils import FileUtils
|
|
37 from commons.core.coord.Map import Map
|
|
38
|
|
39
|
|
40 class Test_Bioseq( unittest.TestCase ):
|
|
41
|
|
42 def setUp(self):
|
|
43 self._bs = Bioseq()
|
|
44
|
|
45
|
|
46 def test_isEmpty_True(self):
|
|
47 self._bs.setHeader( "" )
|
|
48 self._bs.setSequence( "" )
|
|
49 exp = True
|
|
50 obs = self._bs.isEmpty()
|
|
51 self.assertEquals( exp, obs )
|
|
52
|
|
53
|
|
54 def test_isEmpty_False(self):
|
|
55 self._bs.setHeader( "seq1" )
|
|
56 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
57 exp = False
|
|
58 obs = self._bs.isEmpty()
|
|
59 self.assertEquals( exp, obs )
|
|
60
|
|
61
|
|
62 def test___eq__(self):
|
|
63 self._bs.setHeader( "seq1" )
|
|
64 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
65 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
66 self.assertEquals( self._bs, obs )
|
|
67
|
|
68
|
|
69 def test___ne__Header(self):
|
|
70 self._bs.setHeader( "seq2" )
|
|
71 self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
72 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
73 self.assertNotEquals( self._bs, obs )
|
|
74
|
|
75
|
|
76 def test___ne__Sequence(self):
|
|
77 self._bs.setHeader( "seq1" )
|
|
78 self._bs.setSequence( "GGACGATGCAGCATGCGAATGACGAT" )
|
|
79 obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
|
|
80 self.assertNotEquals( self._bs, obs )
|
|
81
|
|
82
|
|
83 def test_reverse(self):
|
|
84 self._bs.setHeader( "seq1" )
|
|
85 self._bs.setSequence( "TGCGGA" )
|
|
86 exp = "AGGCGT"
|
|
87 self._bs.reverse()
|
|
88 obs = self._bs.sequence
|
|
89 self.assertEqual( obs, exp )
|
|
90
|
|
91
|
|
92 def test_complement(self):
|
|
93 self._bs.setHeader( "seq1" )
|
|
94 self._bs.setSequence( "TGCGGA" )
|
|
95 exp = "ACGCCT"
|
|
96 self._bs.complement()
|
|
97 obs = self._bs.sequence
|
|
98 self.assertEqual( obs, exp )
|
|
99
|
|
100
|
|
101 def test_complement_with_unknown_symbol(self):
|
|
102 self._bs.setHeader( "seq1" )
|
|
103 self._bs.setSequence( "TGCGGAFMRWTYSKVHDBN" )
|
|
104 exp = "ACGCCTNKYWARSMBDHVN"
|
|
105 self._bs.complement()
|
|
106 obs = self._bs.sequence
|
|
107 self.assertEqual( obs, exp )
|
|
108
|
|
109
|
|
110 def test_reverseComplement(self):
|
|
111 self._bs.setHeader( "seq1" )
|
|
112 self._bs.setSequence( "TGCGGA" )
|
|
113 exp = "TCCGCA"
|
|
114 self._bs.reverseComplement()
|
|
115 obs = self._bs.sequence
|
|
116 self.assertEqual( obs, exp )
|
|
117
|
|
118
|
|
119 def test_cleanGap(self):
|
|
120 self._bs.setSequence("-ATTTTGC-AGTC--TTATTCGAG-----GCCATTGCT-")
|
|
121 exp = "ATTTTGCAGTCTTATTCGAGGCCATTGCT"
|
|
122 self._bs.cleanGap()
|
|
123 obs = self._bs.sequence
|
|
124 self.assertEquals( obs, exp )
|
|
125
|
|
126
|
|
127 def test_copyBioseqInstance(self):
|
|
128 self._bs.setHeader( "seq" )
|
|
129 self._bs.setSequence( "TGCGGA" )
|
|
130 obsBioseq = self._bs.copyBioseqInstance()
|
|
131 self.assertEquals(self._bs, obsBioseq)
|
|
132
|
|
133
|
|
134 def test_setFrameInfoOnHeader_without_description(self):
|
|
135 self._bs.setHeader( "seq" )
|
|
136 self._bs.setSequence( "TGCGGA" )
|
|
137 phase = -1
|
|
138 expHeader = "seq_-1"
|
|
139 self._bs.setFrameInfoOnHeader(phase)
|
|
140 self.assertEquals(expHeader, self._bs.header)
|
|
141
|
|
142
|
|
143 def test_setFrameInfoOnHeader_with_description(self):
|
|
144 self._bs.setHeader( "seq description" )
|
|
145 self._bs.setSequence( "TGCGGA" )
|
|
146 phase = -1
|
|
147 expHeader = "seq_-1 description"
|
|
148 self._bs.setFrameInfoOnHeader(phase)
|
|
149 self.assertEquals(expHeader, self._bs.header)
|
|
150
|
|
151
|
|
152 def test_read(self):
|
|
153 faFile = open("dummyFaFile.fa", "w")
|
|
154 faFile.write(">seq1 description1\n")
|
|
155 faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
156 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
157 faFile.close()
|
|
158 expBioseq = Bioseq()
|
|
159 expBioseq.header = "seq1 description1"
|
|
160 expBioseq.sequence = "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
161 obsBioseq = Bioseq()
|
|
162 faFile = open("dummyFaFile.fa", "r")
|
|
163 obsBioseq.read( faFile )
|
|
164 faFile.close()
|
|
165 os.remove("dummyFaFile.fa")
|
|
166 self.assertEquals(expBioseq, obsBioseq)
|
|
167
|
|
168
|
|
169 def test_read_WithEmptyFile(self):
|
|
170 faFile = open("dummyFaFile.fa", "w")
|
|
171 faFile.close()
|
|
172 expBioseq = Bioseq()
|
|
173 expBioseq.header = None
|
|
174 expBioseq.sequence = None
|
|
175 obsBioseq = Bioseq()
|
|
176 faFile = open("dummyFaFile.fa", "r")
|
|
177 obsBioseq.read( faFile )
|
|
178 faFile.close()
|
|
179 os.remove("dummyFaFile.fa")
|
|
180 self.assertEquals(expBioseq, obsBioseq)
|
|
181
|
|
182
|
|
183 def test_read_without_header(self):
|
|
184 faFile = open("dummyFaFile.fa", "w")
|
|
185 faFile.write("seq1 description1\n")
|
|
186 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
187 faFile.close()
|
|
188 expBioseq = Bioseq()
|
|
189 expBioseq.header = ""
|
|
190 expBioseq.sequence = ""
|
|
191 obsBioseq = Bioseq()
|
|
192 faFile = open("dummyFaFile.fa", "r")
|
|
193 obsBioseq.read( faFile )
|
|
194 faFile.close()
|
|
195 os.remove("dummyFaFile.fa")
|
|
196 self.assertEquals(expBioseq, obsBioseq)
|
|
197
|
|
198
|
|
199 def test_read_with_two_consecutive_headers(self):
|
|
200 faFile = open("dummyFaFile.fa", "w")
|
|
201 faFile.write(">seq1 description1\n")
|
|
202 faFile.write(">ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
203 faFile.close()
|
|
204 expBioseq = Bioseq()
|
|
205 expBioseq.header = "seq1 description1"
|
|
206 expBioseq.sequence = ""
|
|
207 obsBioseq = Bioseq()
|
|
208 faFile = open("dummyFaFile.fa", "r")
|
|
209 obsBioseq.read( faFile )
|
|
210 faFile.close()
|
|
211 os.remove("dummyFaFile.fa")
|
|
212 self.assertEquals(expBioseq, obsBioseq)
|
|
213
|
|
214
|
|
215 def test_read_withEmptyLines(self):
|
|
216 faFile = open("dummyFaFile.fa", "w")
|
|
217 faFile.write("\n")
|
|
218 faFile.write(">seq1 description1\n")
|
|
219 faFile.write("ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
220 faFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
221 faFile.write("\n")
|
|
222 faFile.close()
|
|
223
|
|
224 exp = Bioseq( "seq1 description1", "ATGCGTCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG" )
|
|
225
|
|
226 obs = Bioseq()
|
|
227 faFile = open("dummyFaFile.fa", "r")
|
|
228 obs.read( faFile )
|
|
229 faFile.close()
|
|
230
|
|
231 os.remove("dummyFaFile.fa")
|
|
232
|
|
233 self.assertEquals( exp, obs )
|
|
234
|
|
235 def test_read_with_70nt_by_line(self):
|
|
236 faFile = open("dummyFaFile.fa", "w")
|
|
237 faFile.write("\n")
|
|
238 faFile.write(">seq1 description1\n")
|
|
239 faFile.write("TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAA\n")
|
|
240 faFile.write("TGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT\n")
|
|
241 faFile.write("\n")
|
|
242 faFile.close()
|
|
243
|
|
244 exp = Bioseq( "seq1 description1", "TGTCACATCCTGATTTTCGTTTCAGGATTTATAAATTATTTAATAAATTAATAATAGAATTTATATTAAATGTTTTTTAATTTACAAGTGAAGTTAAATGTGGGAAATAAAATTTCTTAAATCTAAAGCATGGATGGATT" )
|
|
245
|
|
246 obs = Bioseq()
|
|
247 faFile = open("dummyFaFile.fa", "r")
|
|
248 obs.read( faFile )
|
|
249 faFile.close()
|
|
250
|
|
251 os.remove("dummyFaFile.fa")
|
|
252
|
|
253 self.assertEquals( exp, obs )
|
|
254
|
|
255 def test_appendBioseqInFile(self):
|
|
256 obsFaFileName = "dummyFaFile.fa"
|
|
257 obsFaFile = open(obsFaFileName, "w")
|
|
258 obsFaFile.write(">seq1 description1\n")
|
|
259 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
260 obsFaFile.close()
|
|
261
|
|
262 bioseq = Bioseq()
|
|
263 bioseq.header = "seq2 description2"
|
|
264 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
265
|
|
266 expFaFileName = "dummyFaFile2.fa"
|
|
267 expFaFile = open(expFaFileName, "w")
|
|
268 expFaFile.write(">seq1 description1\n")
|
|
269 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
270 expFaFile.write(">seq2 description2\n")
|
|
271 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
272 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
273 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
|
|
274 expFaFile.close()
|
|
275
|
|
276 bioseq.appendBioseqInFile(obsFaFileName)
|
|
277 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
|
|
278 os.remove(obsFaFileName)
|
|
279 os.remove(expFaFileName)
|
|
280
|
|
281
|
|
282 def test_writeABioseqInAFastaFile(self):
|
|
283 obsFaFileName = "dummyFaFile.fa"
|
|
284 obsFaFile = open(obsFaFileName, "w")
|
|
285 obsFaFile.write(">seq1 description1\n")
|
|
286 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
287
|
|
288 bioseq = Bioseq()
|
|
289 bioseq.header = "seq2 description2"
|
|
290 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
291
|
|
292 expFaFileName = "dummyFaFile2.fa"
|
|
293 expFaFile = open(expFaFileName, "w")
|
|
294 expFaFile.write(">seq1 description1\n")
|
|
295 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
296 expFaFile.write(">seq2 description2\n")
|
|
297 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
298 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
299 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
|
|
300 expFaFile.close()
|
|
301
|
|
302 bioseq.writeABioseqInAFastaFile(obsFaFile)
|
|
303 obsFaFile.close()
|
|
304 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
|
|
305 os.remove(obsFaFileName)
|
|
306
|
|
307
|
|
308 def test_writeABioseqInAFastaFileWithOtherHeader(self):
|
|
309 obsFaFileName = "dummyFaFile.fa"
|
|
310 obsFaFile = open(obsFaFileName, "w")
|
|
311 obsFaFile.write(">seq1 description1\n")
|
|
312 obsFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
313
|
|
314 bioseq = Bioseq()
|
|
315 bioseq.header = "seq2 description2"
|
|
316 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
317
|
|
318 expFaFileName = "dummyFaFile2.fa"
|
|
319 newHeader = "seq2 New header2"
|
|
320 expFaFile = open(expFaFileName, "w")
|
|
321 expFaFile.write(">seq1 description1\n")
|
|
322 expFaFile.write("ATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG\n")
|
|
323 expFaFile.write(">" + newHeader + "\n")
|
|
324 expFaFile.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
325 expFaFile.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
326 expFaFile.write("ATAGCAGACGCATATTATATTGCGCG\n")
|
|
327 expFaFile.close()
|
|
328
|
|
329 bioseq.writeABioseqInAFastaFileWithOtherHeader(obsFaFile, newHeader)
|
|
330 obsFaFile.close()
|
|
331 self.assertTrue(FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName))
|
|
332 os.remove(obsFaFileName)
|
|
333 os.remove(expFaFileName)
|
|
334
|
|
335
|
|
336 def test_writeSeqInFasta(self):
|
|
337 iBs = Bioseq()
|
|
338 iBs.header = "dummySeq"
|
|
339 iBs.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
340
|
|
341 expFaFile = "dummyExpFile.fa"
|
|
342 expFaFileHandler = open(expFaFile, "w")
|
|
343 expFaFileHandler.write("GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
344 expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
345 expFaFileHandler.write("ATAGCAGACGCATATTATATTGCGCG\n")
|
|
346 expFaFileHandler.close()
|
|
347
|
|
348 obsFaFile = "dummyObsFile.fa"
|
|
349 obsFaFileHandler = open( obsFaFile, "w" )
|
|
350
|
|
351 iBs.writeSeqInFasta( obsFaFileHandler )
|
|
352
|
|
353 obsFaFileHandler.close()
|
|
354
|
|
355 self.assertTrue( FileUtils.are2FilesIdentical( expFaFile, obsFaFile ) )
|
|
356 os.remove(obsFaFile)
|
|
357 os.remove(expFaFile)
|
|
358
|
|
359
|
|
360 def test_subseq(self):
|
|
361 bioseq = Bioseq()
|
|
362 bioseq.header = "seq1 description1"
|
|
363 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
364 start = 10
|
|
365 end = 30
|
|
366 expSubBioseq = Bioseq()
|
|
367 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end)
|
|
368 expSubBioseq.sequence = bioseq.sequence[(start - 1) : end]
|
|
369 obsBioseq = bioseq.subseq(start, end)
|
|
370 self.assertEquals(expSubBioseq, obsBioseq)
|
|
371
|
|
372
|
|
373 def test_subseq_no_end(self):
|
|
374 bioseq = Bioseq()
|
|
375 bioseq.header = "seq1 description1"
|
|
376 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
377 start = 10
|
|
378 expSubBioseq = Bioseq()
|
|
379 expEnd = len(bioseq.sequence)
|
|
380 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(expEnd)
|
|
381 expSubBioseq.sequence = bioseq.sequence[(start - 1) : expEnd]
|
|
382 obsBioseq = bioseq.subseq(start)
|
|
383 self.assertEquals(expSubBioseq, obsBioseq)
|
|
384
|
|
385
|
|
386 def test_subseq_start_gt_end(self):
|
|
387 bioseq = Bioseq()
|
|
388 bioseq.header = "seq1 description1"
|
|
389 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
390 start = 30
|
|
391 end = 10
|
|
392 expSubBioseq = None
|
|
393 obsBioseq = bioseq.subseq(start, end)
|
|
394 self.assertEquals(expSubBioseq, obsBioseq)
|
|
395
|
|
396
|
|
397 def test_subseq_start_eq_end(self):
|
|
398 bioseq = Bioseq()
|
|
399 bioseq.header = "seq1 description1"
|
|
400 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
401 start = 10
|
|
402 end = 10
|
|
403 expSubBioseq = Bioseq()
|
|
404 expSubBioseq.header = "seq1 description1 fragment " + str(start) + ".." + str(end)
|
|
405 expSubBioseq.sequence = bioseq.sequence[(start - 1) : end]
|
|
406 obsBioseq = bioseq.subseq(start, end)
|
|
407 self.assertEquals(expSubBioseq, obsBioseq)
|
|
408
|
|
409
|
|
410 def test_subseq_negative_start(self):
|
|
411 bioseq = Bioseq()
|
|
412 bioseq.header = "seq1 description1"
|
|
413 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
414 start = -10
|
|
415 end = 10
|
|
416 expSubBioseq = None
|
|
417 obsBioseq = bioseq.subseq(start, end)
|
|
418 self.assertEquals(expSubBioseq, obsBioseq)
|
|
419
|
|
420
|
|
421 def test_getNtFromPosition_1(self):
|
|
422 bioseq = Bioseq()
|
|
423 bioseq.header = "seq1 description1"
|
|
424 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
425 expNt = "G"
|
|
426 obsNt = bioseq.getNtFromPosition(1)
|
|
427 self.assertEquals(expNt, obsNt)
|
|
428
|
|
429
|
|
430 def test_getNtFromPosition_10(self):
|
|
431 bioseq = Bioseq()
|
|
432 bioseq.header = "seq1 description1"
|
|
433 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
434 expNt = "C"
|
|
435 obsNt = bioseq.getNtFromPosition(10)
|
|
436 self.assertEquals(expNt, obsNt)
|
|
437
|
|
438
|
|
439 def test_getNtFromPosition_last(self):
|
|
440 bioseq = Bioseq()
|
|
441 bioseq.header = "seq1 description1"
|
|
442 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
443 expNt = "G"
|
|
444 obsNt = bioseq.getNtFromPosition(146)
|
|
445 self.assertEquals(expNt, obsNt)
|
|
446
|
|
447
|
|
448 def test_getNtFromPosition_position_outside_range_0(self):
|
|
449 bioseq = Bioseq()
|
|
450 bioseq.header = "seq1 description1"
|
|
451 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
452 expNt = None
|
|
453 obsNt = bioseq.getNtFromPosition(0)
|
|
454 self.assertEquals(expNt, obsNt)
|
|
455
|
|
456
|
|
457 def test_getNtFromPosition_position_outside_range_negative(self):
|
|
458 bioseq = Bioseq()
|
|
459 bioseq.header = "seq1 description1"
|
|
460 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
461 expNt = None
|
|
462 obsNt = bioseq.getNtFromPosition(-10)
|
|
463 self.assertEquals(expNt, obsNt)
|
|
464
|
|
465
|
|
466 def test_getNtFromPosition_position_outside_range_positive(self):
|
|
467 bioseq = Bioseq()
|
|
468 bioseq.header = "seq1 description1"
|
|
469 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
470 expNt = None
|
|
471 obsNt = bioseq.getNtFromPosition(147)
|
|
472 self.assertEquals(expNt, obsNt)
|
|
473
|
|
474
|
|
475 def test_view(self):
|
|
476 obsFileName = "obsdummy_Bioseq_view"
|
|
477 expFileName = "expDummy_Bioseq_View"
|
|
478
|
|
479 bioseq = Bioseq()
|
|
480 bioseq.header = "seq1 description1"
|
|
481 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
482
|
|
483 obsFile = open(obsFileName,"w")
|
|
484 expFile = open(expFileName, "w")
|
|
485
|
|
486 expFile.write ( ">seq1 description1\n")
|
|
487 expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
488 expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
489 expFile.write ( "ATAGCAGACGCATATTATATTGCGCG\n")
|
|
490
|
|
491 stdoutRef = sys.stdout
|
|
492 sys.stdout = obsFile
|
|
493 bioseq.view()
|
|
494 obsFile.close()
|
|
495 expFile.close()
|
|
496 self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
|
|
497 sys.stdout = stdoutRef
|
|
498 os.remove ( obsFileName )
|
|
499 os.remove ( expFileName )
|
|
500
|
|
501
|
|
502 def test_view_with_l(self):
|
|
503 obsFileName = "obsdummy_Bioseq_view"
|
|
504 expFileName = "expDummy_Bioseq_View"
|
|
505 bioseq = Bioseq()
|
|
506 bioseq.header = "seq1 description1"
|
|
507 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
508 obsFile = open(obsFileName,"w")
|
|
509 expFile = open(expFileName, "w")
|
|
510 expFile.write ( ">seq1 description1\n")
|
|
511 expFile.write ( "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
512 expFile.write ( "ATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTAT\n")
|
|
513 stdoutRef = sys.stdout
|
|
514 sys.stdout = obsFile
|
|
515 bioseq.view(120)
|
|
516 obsFile.close()
|
|
517 expFile.close()
|
|
518 self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
|
|
519 sys.stdout = stdoutRef
|
|
520 os.remove ( obsFileName )
|
|
521 os.remove ( expFileName )
|
|
522
|
|
523
|
|
524 def test_getLength(self):
|
|
525 bioseq = Bioseq()
|
|
526 bioseq.header = "seq1 description1"
|
|
527 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
528 expLength = 146
|
|
529 obsLength = bioseq.getLength()
|
|
530 self.assertEquals(expLength, obsLength)
|
|
531
|
|
532
|
|
533 def test_getLength_empty_seq(self):
|
|
534 bioseq = Bioseq()
|
|
535 expLength = 0
|
|
536 obsLength = bioseq.getLength()
|
|
537 self.assertEquals(expLength, obsLength)
|
|
538
|
|
539
|
|
540 def test_getLength_WithoutN(self):
|
|
541 bioseq = Bioseq()
|
|
542 bioseq.header = "seq1 description1"
|
|
543 bioseq.sequence = "GCGANCGCTGCTTTATTAAGCGCTAGATGNNNNNNNNNNNNNNNCGACGCTGCATTTATTAAGCGCTAGCGATTATANNNNNNNNNTAGCAGACGCATATTATATTGCGCGATGCGACGCTGCTTTATTANAGCGCTAGCGNNATTATATAGCANGACGCATATTATATTGCGCG"
|
|
544 expLength = 146
|
|
545 obsLength = bioseq.getLength(False)
|
|
546 self.assertEquals(expLength, obsLength)
|
|
547
|
|
548
|
|
549 def test_getLength_WithoutN_empty_seq(self):
|
|
550 bioseq = Bioseq()
|
|
551 expLength = 0
|
|
552 obsLength = bioseq.getLength(False)
|
|
553 self.assertEquals(expLength, obsLength)
|
|
554
|
|
555
|
|
556 def test_countNt(self):
|
|
557 bioseq = Bioseq()
|
|
558 bioseq.header = "seq1 description1"
|
|
559 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
560 expCount = 3
|
|
561 obsCount = bioseq.countNt('N')
|
|
562 self.assertEquals(expCount, obsCount)
|
|
563
|
|
564
|
|
565 def test_countNt_withCharacterNotExisting(self):
|
|
566 bioseq = Bioseq()
|
|
567 bioseq.header = "seq1 description1"
|
|
568 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
569 expCount = 0
|
|
570 obsCount = bioseq.countNt('W')
|
|
571 self.assertEquals(expCount, obsCount)
|
|
572
|
|
573
|
|
574 def test_countAllNt(self):
|
|
575 bioseq = Bioseq()
|
|
576 bioseq.header = "seq1 description1"
|
|
577 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
578 dExpCount = {'A': 34, 'C': 31, 'T': 43, 'G': 35, 'N': 3}
|
|
579 dObsCount = bioseq.countAllNt()
|
|
580 self.assertEquals(dExpCount, dObsCount)
|
|
581
|
|
582
|
|
583 def test_occ_word_size_1(self):
|
|
584 bioseq = Bioseq()
|
|
585 bioseq.header = "seq1 description1"
|
|
586 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
587 dExpOccWord = {'A': 34, 'C': 31, 'T': 43, 'G': 35}
|
|
588 ExpNbWord = 143
|
|
589 dObsOccWord, ObsNbWord = bioseq.occ_word(1)
|
|
590 self.assertEquals(dExpOccWord, dObsOccWord)
|
|
591 self.assertEquals(ExpNbWord, ObsNbWord)
|
|
592
|
|
593
|
|
594 def test_occ_word_size_0(self):
|
|
595 bioseq = Bioseq()
|
|
596 bioseq.header = "seq1 description1"
|
|
597 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
598 dExpOccWord = {}
|
|
599 ExpNbWord = 0
|
|
600 dObsOccWord, ObsNbWord = bioseq.occ_word(0)
|
|
601 self.assertEquals(dExpOccWord, dObsOccWord)
|
|
602 self.assertEquals(ExpNbWord, ObsNbWord)
|
|
603
|
|
604
|
|
605 def test_occ_word_size_n(self):
|
|
606 bioseq = Bioseq()
|
|
607 bioseq.header = "seq1 description1"
|
|
608 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
609 dExpOccWord = {'ACC': 0, 'ATG': 2, 'AAG': 3, 'AAA': 0, 'ATC': 0, 'AAC': 0, 'ATA': 8, 'AGG': 0, 'CCT': 0, 'CTC': 0, 'AGC': 8, 'ACA': 0, 'AGA': 2, 'CAT': 3, 'AAT': 0, 'ATT': 9, 'CTG': 3, 'CTA': 3, 'ACT': 0, 'CAC': 0, 'ACG': 2, 'CAA': 0, 'AGT': 0, 'CAG': 2, 'CCG': 0, 'CCC': 0, 'CTT': 3, 'TAT': 13, 'GGT': 0, 'TGT': 0, 'CGA': 3, 'CCA': 0, 'TCT': 0, 'GAT': 3, 'CGG': 0, 'TTT': 3, 'TGC': 7, 'GGG': 0, 'TAG': 5, 'GGA': 0, 'TAA': 3, 'GGC': 0, 'TAC': 0, 'TTC': 0, 'TCG': 0, 'TTA': 10, 'TTG': 2, 'TCC': 0, 'GAA': 0, 'TGG': 0, 'GCA': 5, 'GTA': 0, 'GCC': 0, 'GTC': 0, 'GCG': 12, 'GTG': 0, 'GAG': 0, 'GTT': 0, 'GCT': 9, 'TGA': 0, 'GAC': 2, 'CGT': 0, 'TCA': 0, 'CGC': 10}
|
|
610 ExpNbWord = 135
|
|
611 dObsOccWord, ObsNbWord = bioseq.occ_word(3)
|
|
612 self.assertEquals(dExpOccWord, dObsOccWord)
|
|
613 self.assertEquals(ExpNbWord, ObsNbWord)
|
|
614
|
|
615
|
|
616 def test_freq_word_size_1(self):
|
|
617 bioseq = Bioseq()
|
|
618 bioseq.header = "seq1 description1"
|
|
619 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
620 dExpFreqWord = {'A': 0.23776223776223776, 'C': 0.21678321678321677, 'T': 0.30069930069930068, 'G': 0.24475524475524477}
|
|
621 dObsFreqWord = bioseq.freq_word(1)
|
|
622 self.assertEquals(dExpFreqWord, dObsFreqWord)
|
|
623
|
|
624
|
|
625 def test_freq_word_size_0(self):
|
|
626 bioseq = Bioseq()
|
|
627 bioseq.header = "seq1 description1"
|
|
628 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
629 dExpFreqWord = {}
|
|
630 dObsFreqWord = bioseq.freq_word(0)
|
|
631 self.assertEquals(dExpFreqWord, dObsFreqWord)
|
|
632
|
|
633
|
|
634 def test_freq_word_size_n(self):
|
|
635 bioseq = Bioseq()
|
|
636 bioseq.header = "seq1 description1"
|
|
637 bioseq.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
|
|
638 dExpFreqWord = {'ACC': 0.0, 'ATG': 0.014814814814814815, 'AAG': 0.022222222222222223, 'AAA': 0.0, 'ATC': 0.0, 'AAC': 0.0, 'ATA': 0.059259259259259262, 'AGG': 0.0, 'CCT': 0.0, 'CTC': 0.0, 'AGC': 0.059259259259259262, 'ACA': 0.0, 'AGA': 0.014814814814814815, 'CAT': 0.022222222222222223, 'AAT': 0.0, 'ATT': 0.066666666666666666, 'CTG': 0.022222222222222223, 'CTA': 0.022222222222222223, 'ACT': 0.0, 'CAC': 0.0, 'ACG': 0.014814814814814815, 'CAA': 0.0, 'AGT': 0.0, 'CAG': 0.014814814814814815, 'CCG': 0.0, 'CCC': 0.0, 'TAT': 0.096296296296296297, 'GGT': 0.0, 'TGT': 0.0, 'CGA': 0.022222222222222223, 'CCA': 0.0, 'TCT': 0.0, 'GAT': 0.022222222222222223, 'CGG': 0.0, 'CTT': 0.022222222222222223, 'TGC': 0.05185185185185185, 'GGG': 0.0, 'TAG': 0.037037037037037035, 'GGA': 0.0, 'TAA': 0.022222222222222223, 'GGC': 0.0, 'TAC': 0.0, 'TTC': 0.0, 'TCG': 0.0, 'TTT': 0.022222222222222223, 'TTG': 0.014814814814814815, 'TCC': 0.0, 'GAA': 0.0, 'TGG': 0.0, 'GCA': 0.037037037037037035, 'GTA': 0.0, 'GCC': 0.0, 'GTC': 0.0, 'TGA': 0.0, 'GCG': 0.088888888888888892, 'GTG': 0.0, 'GAG': 0.0, 'GTT': 0.0, 'GCT': 0.066666666666666666, 'TTA': 0.07407407407407407, 'GAC': 0.014814814814814815, 'CGT': 0.0, 'TCA': 0.0, 'CGC': 0.07407407407407407}
|
|
639 dObsFreqWord = bioseq.freq_word(3)
|
|
640 self.assertEquals(dExpFreqWord, dObsFreqWord)
|
|
641
|
|
642
|
|
643 def test_findORF_no_ORF_in_sequence (self):
|
|
644 bioseq = Bioseq()
|
|
645 bioseq.header = "seq1 description1"
|
|
646 bioseq.sequence = "GCGNCGCTGCTTTATT"
|
|
647 expORF = {0:[],1:[],2:[]}
|
|
648 obsORF = bioseq.findORF()
|
|
649 self.assertEquals (expORF,obsORF)
|
|
650
|
|
651
|
|
652 def test_findORF_one_ORF_in_first_phase(self):
|
|
653 bioseq = Bioseq()
|
|
654 bioseq.header = "seq1 description1"
|
|
655 bioseq.sequence = "TAAGCGNCGCTGCTTTATT"
|
|
656 expORF = {0:[0],1:[],2:[]}
|
|
657 obsORF = bioseq.findORF()
|
|
658 self.assertEquals (expORF,obsORF)
|
|
659
|
|
660
|
|
661 def test_findORF_three_ORF_in_first_phase(self):
|
|
662 bioseq = Bioseq()
|
|
663 bioseq.header = "seq1 description1"
|
|
664 bioseq.sequence = "TAAGCGTAGNCGTGACTGCTTTATT"
|
|
665 expORF = {0:[0,6,12],1:[],2:[]}
|
|
666 obsORF = bioseq.findORF()
|
|
667 self.assertEquals (expORF,obsORF)
|
|
668
|
|
669
|
|
670 def test_findORF_two_ORF_in_first_phase_one_ORF_in_second_phase(self):
|
|
671 bioseq = Bioseq()
|
|
672 bioseq.header = "seq1 description1"
|
|
673 bioseq.sequence = "TAAGTAGAGNCGTGACTGCTTTATT"
|
|
674 expORF = {0:[0,12],1:[4],2:[]}
|
|
675 obsORF = bioseq.findORF()
|
|
676 self.assertEquals (expORF,obsORF)
|
|
677
|
|
678
|
|
679 def test_findORF_two_ORF_in_first_phase_three_ORF_in_second_phase(self):
|
|
680 bioseq = Bioseq()
|
|
681 bioseq.header = "seq1 description1"
|
|
682 bioseq.sequence = "TAAGTAGAGNCGTGACTGATAGTATT"
|
|
683 expORF = {0:[0,12],1:[4,16,19],2:[]}
|
|
684 obsORF = bioseq.findORF()
|
|
685 self.assertEquals (expORF,obsORF)
|
|
686
|
|
687
|
|
688 def test_findORF_one_ORF_in_second_phase_three_ORF_in_third_phase(self):
|
|
689 bioseq = Bioseq()
|
|
690 bioseq.header = "seq1 description1"
|
|
691 bioseq.sequence = "AATATTAGTGGAGTAGTTGATGATTTT"
|
|
692 expORF = {0:[], 1:[13], 2:[5,17,20]}
|
|
693 obsORF = bioseq.findORF()
|
|
694 self.assertEquals (expORF,obsORF)
|
|
695
|
|
696
|
|
697 def test_findORF_three_ORF_in_second_phase_one_ORF_in_third_phase(self):
|
|
698 bioseq = Bioseq()
|
|
699 bioseq.header = "seq1 description1"
|
|
700 bioseq.sequence = "TTTGAAGTGGAGGAGTTGATGATTTTAAT"
|
|
701 expORF = {0:[], 1:[16, 19, 25], 2:[2]}
|
|
702 obsORF = bioseq.findORF()
|
|
703 self.assertEquals (expORF,obsORF)
|
|
704
|
|
705
|
|
706 def test_upCase(self):
|
|
707 bioseq = Bioseq()
|
|
708 bioseq.header = "seq description"
|
|
709 bioseq.sequence = "taattcggcct"
|
|
710 expSeq = "TAATTCGGCCT"
|
|
711 bioseq.upCase()
|
|
712 obsSeq = bioseq.sequence
|
|
713 self.assertEquals( expSeq, obsSeq )
|
|
714
|
|
715
|
|
716 def test_lowCase(self):
|
|
717 bioseq = Bioseq()
|
|
718 bioseq.header = "seq description"
|
|
719 bioseq.sequence = "TAATTCGGCCT"
|
|
720 expSeq = "taattcggcct"
|
|
721 bioseq.lowCase()
|
|
722 obsSeq = bioseq.sequence
|
|
723 self.assertEquals( expSeq, obsSeq )
|
|
724
|
|
725
|
|
726 def test_getClusterID(self):
|
|
727 bioseq = Bioseq()
|
|
728 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
|
|
729 bioseq.sequence = "TAATTCGGCCT"
|
|
730 expID = "0"
|
|
731 obsID = bioseq.getClusterID()
|
|
732 self.assertEquals( expID, obsID )
|
|
733
|
|
734
|
|
735 def test_getGroupID(self):
|
|
736 bioseq = Bioseq()
|
|
737 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
|
|
738 bioseq.sequence = "TAATTCGGCCT"
|
|
739 expID = "2"
|
|
740 obsID = bioseq.getGroupID()
|
|
741 self.assertEquals( expID, obsID )
|
|
742
|
|
743
|
|
744 def test_getHeaderFullSeq(self):
|
|
745 bioseq = Bioseq()
|
|
746 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
|
|
747 bioseq.sequence = "TAATTCGGCCT"
|
|
748 expHeader = "chunk1"
|
|
749 obsHeader = bioseq.getHeaderFullSeq()
|
|
750 self.assertEquals( expHeader, obsHeader )
|
|
751
|
|
752
|
|
753 def test_getFragStrand_plus_strand(self):
|
|
754 bioseq = Bioseq()
|
|
755 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74091..74624"
|
|
756 bioseq.sequence = "TAATTCGGCCT"
|
|
757 expStrand = '+'
|
|
758 obsStrand = bioseq.getFragStrand()
|
|
759 self.assertEquals(expStrand, obsStrand)
|
|
760
|
|
761
|
|
762 def test_getFragStrand_minus_strand(self):
|
|
763 bioseq = Bioseq()
|
|
764 bioseq.header = "MbQ58Gr2Cl0 chunk1 {Fragment} 74624..74091"
|
|
765 bioseq.sequence = "TAATTCGGCCT"
|
|
766 expStrand = '-'
|
|
767 obsStrand = bioseq.getFragStrand()
|
|
768 self.assertEquals(expStrand, obsStrand)
|
|
769
|
|
770
|
|
771 def test_getATGCNFromIUPAC_A(self):
|
|
772 bioseq = Bioseq()
|
|
773 expNucl = 'A'
|
|
774 obsNucl = bioseq.getATGCNFromIUPAC('A')
|
|
775 self.assertEquals(expNucl, obsNucl)
|
|
776
|
|
777
|
|
778 def test_getATGCNFromIUPAC_T(self):
|
|
779 bioseq = Bioseq()
|
|
780 expNucl = 'T'
|
|
781 obsNucl = bioseq.getATGCNFromIUPAC('T')
|
|
782 self.assertEquals(expNucl, obsNucl)
|
|
783
|
|
784
|
|
785 def test_getATGCNFromIUPAC_C(self):
|
|
786 bioseq = Bioseq()
|
|
787 expNucl = 'C'
|
|
788 obsNucl = bioseq.getATGCNFromIUPAC('C')
|
|
789 self.assertEquals(expNucl, obsNucl)
|
|
790
|
|
791
|
|
792 def test_getATGCNFromIUPAC_G(self):
|
|
793 bioseq = Bioseq()
|
|
794 expNucl = 'G'
|
|
795 obsNucl = bioseq.getATGCNFromIUPAC('G')
|
|
796 self.assertEquals(expNucl, obsNucl)
|
|
797
|
|
798
|
|
799 def test_getATGCNFromIUPAC_N(self):
|
|
800 bioseq = Bioseq()
|
|
801 expNucl = 'N'
|
|
802 obsNucl = bioseq.getATGCNFromIUPAC('N')
|
|
803 self.assertEquals(expNucl, obsNucl)
|
|
804
|
|
805
|
|
806 def test_getATGCNFromIUPAC_U(self):
|
|
807 bioseq = Bioseq()
|
|
808 expNucl = 'T'
|
|
809 obsNucl = bioseq.getATGCNFromIUPAC('U')
|
|
810 self.assertEquals(expNucl, obsNucl)
|
|
811
|
|
812
|
|
813 def test_getATGCNFromIUPAC_R(self):
|
|
814 bioseq = Bioseq()
|
|
815 expNucl1 = 'A'
|
|
816 expNucl2 = 'G'
|
|
817 obsNucl = bioseq.getATGCNFromIUPAC('R')
|
|
818 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
819
|
|
820
|
|
821 def test_getATGCNFromIUPAC_Y(self):
|
|
822 bioseq = Bioseq()
|
|
823 expNucl1 = 'C'
|
|
824 expNucl2 = 'T'
|
|
825 obsNucl = bioseq.getATGCNFromIUPAC('Y')
|
|
826 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
827
|
|
828
|
|
829 def test_getATGCNFromIUPAC_M(self):
|
|
830 bioseq = Bioseq()
|
|
831 expNucl1 = 'C'
|
|
832 expNucl2 = 'A'
|
|
833 obsNucl = bioseq.getATGCNFromIUPAC('M')
|
|
834 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
835
|
|
836
|
|
837 def test_getATGCNFromIUPAC_K(self):
|
|
838 bioseq = Bioseq()
|
|
839 expNucl1 = 'T'
|
|
840 expNucl2 = 'G'
|
|
841 obsNucl = bioseq.getATGCNFromIUPAC('K')
|
|
842 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
843
|
|
844
|
|
845 def test_getATGCNFromIUPAC_W(self):
|
|
846 bioseq = Bioseq()
|
|
847 expNucl1 = 'T'
|
|
848 expNucl2 = 'A'
|
|
849 obsNucl = bioseq.getATGCNFromIUPAC('W')
|
|
850 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
851
|
|
852
|
|
853 def test_getATGCNFromIUPAC_S(self):
|
|
854 bioseq = Bioseq()
|
|
855 expNucl1 = 'C'
|
|
856 expNucl2 = 'G'
|
|
857 obsNucl = bioseq.getATGCNFromIUPAC('S')
|
|
858 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl)
|
|
859
|
|
860
|
|
861 def test_getATGCNFromIUPAC_B(self):
|
|
862 bioseq = Bioseq()
|
|
863 expNucl1 = 'C'
|
|
864 expNucl2 = 'T'
|
|
865 expNucl3 = 'G'
|
|
866 obsNucl = bioseq.getATGCNFromIUPAC('B')
|
|
867 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
|
|
868
|
|
869
|
|
870 def test_getATGCNFromIUPAC_D(self):
|
|
871 bioseq = Bioseq()
|
|
872 expNucl1 = 'A'
|
|
873 expNucl2 = 'T'
|
|
874 expNucl3 = 'G'
|
|
875 obsNucl = bioseq.getATGCNFromIUPAC('D')
|
|
876 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
|
|
877
|
|
878
|
|
879 def test_getATGCNFromIUPAC_H(self):
|
|
880 bioseq = Bioseq()
|
|
881 expNucl1 = 'C'
|
|
882 expNucl2 = 'T'
|
|
883 expNucl3 = 'A'
|
|
884 obsNucl = bioseq.getATGCNFromIUPAC('H')
|
|
885 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
|
|
886
|
|
887
|
|
888 def test_getATGCNFromIUPAC_V(self):
|
|
889 bioseq = Bioseq()
|
|
890 expNucl1 = 'C'
|
|
891 expNucl2 = 'A'
|
|
892 expNucl3 = 'G'
|
|
893 obsNucl = bioseq.getATGCNFromIUPAC('V')
|
|
894 self.assertTrue(expNucl1 == obsNucl or expNucl2 == obsNucl or expNucl3 == obsNucl)
|
|
895
|
|
896
|
|
897 def test_getATGCNFromIUPAC_Z(self):
|
|
898 bioseq = Bioseq()
|
|
899 expNucl = 'N'
|
|
900 obsNucl = bioseq.getATGCNFromIUPAC('Z')
|
|
901 self.assertEquals(expNucl, obsNucl)
|
|
902
|
|
903
|
|
904 def test_partialIUPAC(self):
|
|
905 bioseq = Bioseq()
|
|
906 bioseq.sequence = "ATGCNRATGCN"
|
|
907 expSequence1 = "ATGCNAATGCN"
|
|
908 expSequence2 = "ATGCNGATGCN"
|
|
909 bioseq.partialIUPAC()
|
|
910 obsSequence = bioseq.sequence
|
|
911 self.assertTrue(expSequence1 == obsSequence or expSequence2 == obsSequence)
|
|
912
|
|
913
|
|
914 def test_checkEOF(self):
|
|
915 bioseq = Bioseq()
|
|
916 bioseq.sequence = "ATGCNRATGCN\rATGCAAT\rTATA\r"
|
|
917 bioseq.checkEOF()
|
|
918 obsSequence = bioseq.sequence
|
|
919 expSequence = "ATGCNRATGCNATGCAATTATA"
|
|
920
|
|
921 self.assertEquals(expSequence, obsSequence)
|
|
922
|
|
923
|
|
924 def test_getLMapWhithoutGap(self):
|
|
925 iBioseq = Bioseq()
|
|
926 iBioseq.header = "header"
|
|
927 iBioseq.sequence = "ATGC-RA-GCT"
|
|
928 obsLMap = iBioseq.getLMapWhithoutGap()
|
|
929 expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]
|
|
930
|
|
931 self.assertEquals(expLMap, obsLMap)
|
|
932
|
|
933
|
|
934 def test_getLMapWhithoutGap_seqStartsWithGap(self):
|
|
935 iBioseq = Bioseq()
|
|
936 iBioseq.header = "header"
|
|
937 iBioseq.sequence = "-TGC-RA-GCT"
|
|
938 obsLMap = iBioseq.getLMapWhithoutGap()
|
|
939 expLMap = [Map( "header_subSeq1", "header", 2, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]
|
|
940
|
|
941 self.assertEquals(expLMap, obsLMap)
|
|
942
|
|
943
|
|
944 def test_getLMapWhithoutGap_seqEndsWithGap(self):
|
|
945 iBioseq = Bioseq()
|
|
946 iBioseq.header = "header"
|
|
947 iBioseq.sequence = "ATGC-RA-GC-"
|
|
948 obsLMap = iBioseq.getLMapWhithoutGap()
|
|
949 expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 10 )]
|
|
950
|
|
951 self.assertEquals(expLMap, obsLMap)
|
|
952
|
|
953 def test_getGCpercentage_onlyATGC( self ):
|
|
954 iBs = Bioseq( "seq", "TGCAGCT" )
|
|
955 exp = 100 * 4 / 7.0
|
|
956 obs = iBs.getGCpercentage()
|
|
957 self.assertEqual( exp, obs )
|
|
958
|
|
959 def test_getGCpercentageInSequenceWithoutCountNInLength( self ):
|
|
960 iBs = Bioseq( "seq", "TGCAGCTNNNNN" )
|
|
961 exp = 100 * 4 / 7.0
|
|
962 obs = iBs.getGCpercentageInSequenceWithoutCountNInLength()
|
|
963 self.assertEqual( exp, obs )
|
|
964
|
|
965 def test_get5PrimeFlank(self):
|
|
966 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
967 position = 7
|
|
968 obsFlank = bs.get5PrimeFlank(position, 3)
|
|
969 expFlank = "TTT"
|
|
970 self.assertEquals(expFlank, obsFlank)
|
|
971
|
|
972 def test_get5PrimeFlank_flank_length_truncated(self):
|
|
973 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
974 position = 7
|
|
975 obsFlank = bs.get5PrimeFlank(position, 15)
|
|
976 expFlank = "AACTTT"
|
|
977 self.assertEquals(expFlank, obsFlank)
|
|
978
|
|
979 def test_get5PrimeFlank_flank_of_first_base(self):
|
|
980 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
981 position = 1
|
|
982 obsFlank = bs.get5PrimeFlank(position, 15)
|
|
983 expFlank = ""
|
|
984 self.assertEquals(expFlank, obsFlank)
|
|
985
|
|
986 def test_get3PrimeFlank(self):
|
|
987 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
988 position = 7
|
|
989 obsFlank = bs.get3PrimeFlank(position, 3)
|
|
990 expFlank = "CAG"
|
|
991 self.assertEquals(expFlank, obsFlank)
|
|
992
|
|
993 def test_get3PrimeFlank_flank_length_truncated(self):
|
|
994 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
995 position = 7
|
|
996 obsFlank = bs.get3PrimeFlank(position, 15)
|
|
997 expFlank = "CAGAA"
|
|
998 self.assertEquals(expFlank, obsFlank)
|
|
999
|
|
1000 def test_get3PrimeFlank_flank_of_last_base(self):
|
|
1001 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1002 position = 12
|
|
1003 obsFlank = bs.get3PrimeFlank(position, 15)
|
|
1004 expFlank = ""
|
|
1005 self.assertEquals(expFlank, obsFlank)
|
|
1006
|
|
1007 def test_get3PrimeFlank_polymLength_different_of_1(self):
|
|
1008 bs = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1009 position = 7
|
|
1010 obsFlank = bs.get3PrimeFlank(position, 3, 2)
|
|
1011 expFlank = "AGA"
|
|
1012 self.assertEquals(expFlank, obsFlank)
|
|
1013
|
|
1014 test_suite = unittest.TestSuite()
|
|
1015 test_suite.addTest( unittest.makeSuite( Test_Bioseq ) )
|
|
1016 if __name__ == "__main__":
|
|
1017 unittest.TextTestRunner(verbosity=2).run( test_suite )
|