6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 from commons.core.seq.FastaUtils import FastaUtils
|
|
33 from commons.core.seq.test.Utils_for_T_FastaUtils import Utils_for_T_FastaUtils
|
|
34 from commons.core.utils.FileUtils import FileUtils
|
|
35 import glob
|
|
36 import os
|
|
37 import shutil
|
|
38 import unittest
|
|
39
|
|
40
|
|
41 class Test_FastaUtils( unittest.TestCase ):
|
|
42
|
|
43
|
|
44 def test_dbSize_for_empty_file(self):
|
|
45 fileName = "dummyFastaFile.fa"
|
|
46 Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)
|
|
47
|
|
48 obsNb = FastaUtils.dbSize( fileName )
|
|
49
|
|
50 expNb = 0
|
|
51 os.remove(fileName)
|
|
52 self.assertEquals(expNb, obsNb)
|
|
53
|
|
54
|
|
55 def test_dbSize_one_sequence(self):
|
|
56 fileName = "dummyFastaFile.fa"
|
|
57 Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName)
|
|
58
|
|
59 obsNb = FastaUtils.dbSize( fileName )
|
|
60
|
|
61 expNb = 1
|
|
62 os.remove(fileName)
|
|
63 self.assertEquals(expNb, obsNb)
|
|
64
|
|
65
|
|
66 def test_dbSize_four_sequences(self):
|
|
67 fileName = "dummyFastaFile.fa"
|
|
68 Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)
|
|
69
|
|
70 obsNb = FastaUtils.dbSize( fileName )
|
|
71
|
|
72 expNb = 4
|
|
73 os.remove(fileName)
|
|
74 self.assertEquals(expNb, obsNb)
|
|
75
|
|
76
|
|
77 def test_dbChunks(self):
|
|
78 inFileName = "dummyBigSeqFastaFile.fa"
|
|
79 expChunksFileName = 'exp' + inFileName +'_chunks.fa'
|
|
80 expChunksMapFileName = 'exp' + inFileName +'_chunks.map'
|
|
81 expCutFileName = 'exp' + inFileName +'_cut'
|
|
82 expNStretchFileName = 'exp' + inFileName +'.Nstretch.map'
|
|
83 Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName)
|
|
84 Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName)
|
|
85 Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName)
|
|
86 Utils_for_T_FastaUtils._createFastaFile_of_cut(expCutFileName)
|
|
87 Utils_for_T_FastaUtils._createFastaFile_of_Nstretch(expNStretchFileName)
|
|
88
|
|
89 FastaUtils.dbChunks(inFileName, '60', '10', '11', '', False, 0)
|
|
90
|
|
91 obsChunksFileName = inFileName +'_chunks.fa'
|
|
92 obsChunksMapFileName = inFileName +'_chunks.map'
|
|
93 obsCutFileName = inFileName +'_cut'
|
|
94 obsNStretchFileName = inFileName +'.Nstretch.map'
|
|
95
|
|
96 self.assertTrue(FileUtils.are2FilesIdentical(expChunksFileName, obsChunksFileName))
|
|
97 self.assertTrue(FileUtils.are2FilesIdentical(expChunksMapFileName, obsChunksMapFileName))
|
|
98 self.assertTrue(FileUtils.are2FilesIdentical(expCutFileName, obsCutFileName))
|
|
99 self.assertTrue(FileUtils.are2FilesIdentical(expNStretchFileName, obsNStretchFileName))
|
|
100
|
|
101 os.remove(inFileName)
|
|
102 os.remove(expChunksFileName)
|
|
103 os.remove(expChunksMapFileName)
|
|
104 os.remove(expCutFileName)
|
|
105 os.remove(expNStretchFileName)
|
|
106 os.remove(obsChunksFileName)
|
|
107 os.remove(obsChunksMapFileName)
|
|
108 os.remove(obsCutFileName)
|
|
109 os.remove(obsNStretchFileName)
|
|
110
|
|
111
|
|
112 def test_dbChunks_with_clean_and_prefix(self):
|
|
113 inFileName = "dummyBigSeqFastaFile.fa"
|
|
114 expChunksFileName = 'exp' + inFileName +'_chunks.fa'
|
|
115 expChunksMapFileName = 'exp' + inFileName +'_chunks.map'
|
|
116 Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName)
|
|
117 Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName)
|
|
118 Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName)
|
|
119
|
|
120 FastaUtils.dbChunks(inFileName, '60', '10', '11', 'outFile_chunks', True, 0)
|
|
121
|
|
122 obsChunksFileName = "outFile_chunks.fa"
|
|
123 obsChunksMapFileName = "outFile_chunks.map"
|
|
124
|
|
125 self.assertTrue(FileUtils.are2FilesIdentical(expChunksFileName, obsChunksFileName))
|
|
126 self.assertTrue(FileUtils.are2FilesIdentical(expChunksMapFileName, obsChunksMapFileName))
|
|
127
|
|
128 os.remove(inFileName)
|
|
129 os.remove(expChunksFileName)
|
|
130 os.remove(expChunksMapFileName)
|
|
131 os.remove(obsChunksFileName)
|
|
132 os.remove(obsChunksMapFileName)
|
|
133
|
|
134
|
|
135 def test_dbCumLength_with_empty_file(self):
|
|
136 inFileName = "dummyFastaFile.fa"
|
|
137 Utils_for_T_FastaUtils._createFastaFile_for_empty_file(inFileName)
|
|
138
|
|
139 expCumulLength = 0
|
|
140
|
|
141 inFileHandler = open(inFileName, "r")
|
|
142 obsCumulLength = FastaUtils.dbCumLength(inFileHandler)
|
|
143 inFileHandler.close()
|
|
144 os.remove(inFileName)
|
|
145
|
|
146 self.assertEquals(expCumulLength, obsCumulLength)
|
|
147
|
|
148 def test_dbCumLength_four_sequences(self):
|
|
149 inFileName = "dummyFastaFile.fa"
|
|
150 Utils_for_T_FastaUtils._createFastaFile_four_sequences(inFileName)
|
|
151
|
|
152 expCumulLength = 1168
|
|
153
|
|
154 inFileHandler = open(inFileName, "r")
|
|
155 obsCumulLength = FastaUtils.dbCumLength(inFileHandler)
|
|
156 inFileHandler.close()
|
|
157 os.remove(inFileName)
|
|
158
|
|
159 self.assertEquals(expCumulLength, obsCumulLength)
|
|
160
|
|
161
|
|
162 def test_dbLengths( self ):
|
|
163 inFileName = "dummyFastaFile.fa"
|
|
164 inF = open( inFileName, "w" )
|
|
165 inF.write(">seq1\nATGACGT\n")
|
|
166 inF.write(">seq2\nATGGCGAGACGT\n")
|
|
167 inF.close()
|
|
168 lExp = [ 7, 12 ]
|
|
169 lObs = FastaUtils.dbLengths( inFileName )
|
|
170 self.assertEquals( lExp, lObs )
|
|
171 os.remove( inFileName )
|
|
172
|
|
173
|
|
174 def test_dbHeaders_with_empty_file(self):
|
|
175 inFile = "dummyFastaFile.fa"
|
|
176 Utils_for_T_FastaUtils._createFastaFile_for_empty_file( inFile )
|
|
177 lExp = []
|
|
178 lObs = FastaUtils.dbHeaders( inFile )
|
|
179 self.assertEquals( lExp, lObs )
|
|
180 os.remove( inFile )
|
|
181
|
|
182
|
|
183 def test_dbHeaders_with_one_sequence_without_header(self):
|
|
184 inFile = "dummyFastaFile.fa"
|
|
185 Utils_for_T_FastaUtils._createFastaFile_sequence_without_header( inFile )
|
|
186 lExp = []
|
|
187 lObs = FastaUtils.dbHeaders( inFile )
|
|
188 self.assertEquals( lExp, lObs )
|
|
189 os.remove( inFile )
|
|
190
|
|
191
|
|
192 def test_dbHeaders_four_sequences(self):
|
|
193 inFile = "dummyFastaFile.fa"
|
|
194 Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile )
|
|
195 lExp = [ "seq 1", "seq 2", "seq 3", "seq 4" ]
|
|
196 lObs = FastaUtils.dbHeaders( inFile )
|
|
197 self.assertEquals( lExp, lObs )
|
|
198 os.remove( inFile )
|
|
199
|
|
200
|
|
201 def test_dbSplit_no_in_file( self ):
|
|
202 inFileName = "dummyFastaFile.fa"
|
|
203 isSysExitRaised = False
|
|
204 try:
|
|
205 FastaUtils.dbSplit( inFileName, 1, False )
|
|
206 except SystemExit:
|
|
207 isSysExitRaised = True
|
|
208 self.assertTrue( isSysExitRaised )
|
|
209
|
|
210
|
|
211 def test_dbSplit_emptyFile( self ):
|
|
212 inFile = "dummyFastaFile.fa"
|
|
213 Utils_for_T_FastaUtils._createFastaFile_for_empty_file( inFile )
|
|
214 FastaUtils.dbSplit( inFile, 10, False, 1 )
|
|
215 self.assertTrue( not os.path.exists( "batch_1.fa" ) )
|
|
216 os.remove( inFile )
|
|
217
|
|
218
|
|
219 def test_dbSplit_oneSequence_tenSequencesPerBatch( self ):
|
|
220 inFile = "dummyFastaFile.fa"
|
|
221 Utils_for_T_FastaUtils._createFastaFile_one_sequence( inFile )
|
|
222
|
|
223 expBatchFile = "dummyExpBatch_1.fa"
|
|
224 Utils_for_T_FastaUtils._createFastaFile_one_sequence( expBatchFile )
|
|
225
|
|
226 FastaUtils.dbSplit( inFile, 10, False )
|
|
227
|
|
228 obsBatchFile = "batch_1.fa"
|
|
229
|
|
230 self.assertTrue( FileUtils.are2FilesIdentical( expBatchFile, obsBatchFile ) )
|
|
231
|
|
232 for f in [ inFile, expBatchFile, obsBatchFile ]:
|
|
233 os.remove( f )
|
|
234
|
|
235
|
|
236 def test_dbSplit_fourSequences_threeSequencesPerBatch( self ):
|
|
237 inFile = "dummyFastaFile.fa"
|
|
238 Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile )
|
|
239
|
|
240 expBatch1File = "dummyExpBatch_1.fa"
|
|
241 expBatch2File = "dummyExpBatch_2.fa"
|
|
242 Utils_for_T_FastaUtils._createBatch1_three_sequences( expBatch1File )
|
|
243 Utils_for_T_FastaUtils._createBatch2_one_sequence( expBatch2File )
|
|
244
|
|
245 FastaUtils.dbSplit( inFile, 3, False )
|
|
246
|
|
247 obsBatch1File = "batch_1.fa"
|
|
248 obsBatch2File = "batch_2.fa"
|
|
249
|
|
250 self.assertTrue( FileUtils.are2FilesIdentical( expBatch1File, obsBatch1File ) )
|
|
251 self.assertTrue( FileUtils.are2FilesIdentical( expBatch2File, obsBatch2File ) )
|
|
252
|
|
253 for f in [ inFile, expBatch1File, expBatch2File, obsBatch1File, obsBatch2File ]:
|
|
254 os.remove( f )
|
|
255
|
|
256
|
|
257 def test_dbSplit_fourSequences_twoSequencesPerBatch_inBatchDirectory( self ):
|
|
258 inFile = "dummyFastaFile.fa"
|
|
259 Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile )
|
|
260
|
|
261 expBatch1File = "dummyExp_batch_1.fa"
|
|
262 expBatch2File = "dummyExp_batch_2.fa"
|
|
263 Utils_for_T_FastaUtils._createBatch1_two_sequences( expBatch1File )
|
|
264 Utils_for_T_FastaUtils._createBatch2_two_sequences( expBatch2File )
|
|
265
|
|
266 FastaUtils.dbSplit( inFile, 2, True, 1 )
|
|
267
|
|
268 obsBatch1File = "batches/batch_1.fa"
|
|
269 obsBatch2File = "batches/batch_2.fa"
|
|
270
|
|
271 self.assertTrue( FileUtils.are2FilesIdentical( expBatch1File, obsBatch1File ) )
|
|
272 self.assertTrue( FileUtils.are2FilesIdentical( expBatch2File, obsBatch2File ) )
|
|
273
|
|
274 for f in [ inFile, expBatch1File, expBatch2File, obsBatch1File, obsBatch2File ]:
|
|
275 os.remove( f )
|
|
276
|
|
277
|
|
278 def test_dbSplit_tenSequences_oneSequencePerBatch_inBatchDirectory( self ):
|
|
279 inFile = "dummyFastaFile.fa"
|
|
280 Utils_for_T_FastaUtils._createFastaFile_ten_sequences( inFile )
|
|
281
|
|
282 FastaUtils.dbSplit( inFile, 1, True )
|
|
283
|
|
284 nb = 1
|
|
285 for s in [ '01', '02', '03', '04', '05', '06', '07', '08', '09', '10' ]:
|
|
286 expBatchFile = "exp_batch_%s.fa" % ( s )
|
|
287 Utils_for_T_FastaUtils._createBatch_one_small_sequence( expBatchFile, "seq " + str(nb) )
|
|
288 nb += 1
|
|
289 obsBatchFile = "batches/batch_%s.fa" % ( s )
|
|
290 self.assertTrue( FileUtils.are2FilesIdentical( expBatchFile, obsBatchFile ) )
|
|
291 os.remove( expBatchFile )
|
|
292 os.remove( obsBatchFile )
|
|
293
|
|
294 os.remove( inFile )
|
|
295 os.rmdir( "batches" )
|
|
296
|
|
297
|
|
298 def test_dbSplit_twoSequences_oneSequencePerBatch_useSeqHeader( self ):
|
|
299 inFile = "dummyFastaFile.fa"
|
|
300 Utils_for_T_FastaUtils.createFastaFile_twoSequences( inFile )
|
|
301
|
|
302 lExpFileNames = [ "seq_1.fa", "seq_2.fa" ]
|
|
303 lExpFiles = [ "dummyExp_seq_1.fa", "dummyExp_seq_2.fa" ]
|
|
304 Utils_for_T_FastaUtils.createFastaFile_seq_1( lExpFiles[0] )
|
|
305 Utils_for_T_FastaUtils.createFastaFile_seq_2( lExpFiles[1] )
|
|
306
|
|
307 FastaUtils.dbSplit( inFile, 1, False, True )
|
|
308
|
|
309 lObsFiles = glob.glob( "seq*.fa" )
|
|
310 lObsFiles.sort()
|
|
311 for i in range( 0, len(lExpFileNames) ):
|
|
312 self.assertEqual( lExpFileNames[i], lObsFiles[i] )
|
|
313 self.assertTrue( FileUtils.are2FilesIdentical( lExpFiles[i], lObsFiles[i] ) )
|
|
314
|
|
315 for f in [ inFile ] + lExpFiles + lObsFiles:
|
|
316 os.remove( f )
|
|
317
|
|
318
|
|
319 def test_dbSplit_twoSequences_otherPrefix( self ):
|
|
320 inFile = "dummyFastaFile.fa"
|
|
321 Utils_for_T_FastaUtils.createFastaFile_twoSequences( inFile )
|
|
322
|
|
323 lExpFileNames = [ "query_1.fa", "query_2.fa" ]
|
|
324 lExpFiles = [ "dummyExp_seq_1.fa", "dummyExp_seq_2.fa" ]
|
|
325 Utils_for_T_FastaUtils.createFastaFile_seq_1( lExpFiles[0] )
|
|
326 Utils_for_T_FastaUtils.createFastaFile_seq_2( lExpFiles[1] )
|
|
327
|
|
328 FastaUtils.dbSplit( inFile, 1, False, False, "query" )
|
|
329
|
|
330 lObsFiles = glob.glob( "query_*.fa" )
|
|
331 lObsFiles.sort()
|
|
332 for i in range( 0, len(lExpFileNames) ):
|
|
333 self.assertEqual( lExpFileNames[i], lObsFiles[i] )
|
|
334 self.assertTrue( FileUtils.are2FilesIdentical( lExpFiles[i], lObsFiles[i] ) )
|
|
335
|
|
336 for f in [ inFile ] + lExpFiles + lObsFiles:
|
|
337 os.remove( f )
|
|
338
|
|
339
|
|
340 def test_splitFastaFileInBatches(self):
|
|
341 inFileName = "dummyFastaFile.fa"
|
|
342 with open(inFileName, "w") as f:
|
|
343 f.write(">seq1\n")
|
|
344 f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n")
|
|
345 f.write(">seq2\n")
|
|
346 f.write("ATCGCTAGCTAGCTCG\n")
|
|
347 f.write(">seq3\n")
|
|
348 f.write("GTTTGGATCGCT\n")
|
|
349 f.write(">seq6\n")
|
|
350 f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCCTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n")
|
|
351 f.write(">seq5\n")
|
|
352 f.write("TTGGATCGCTCTCTGCTCGGAAATCCCGTC\n")
|
|
353 expBatch1 = "expBatch_1.fa"
|
|
354 with open(expBatch1, "w") as f:
|
|
355 f.write(">seq6\n")
|
|
356 f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCCTCT\n")
|
|
357 f.write("GTTTGGATCGCTCTCTGCTCGGAAATCC\n")
|
|
358 expBatch2 = "expBatch_2.fa"
|
|
359 with open(expBatch2, "w") as f:
|
|
360 f.write(">seq1\n")
|
|
361 f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n")
|
|
362 expBatch3 = "expBatch_3.fa"
|
|
363 with open(expBatch3, "w") as f:
|
|
364 f.write(">seq5\n")
|
|
365 f.write("TTGGATCGCTCTCTGCTCGGAAATCCCGTC\n")
|
|
366 f.write(">seq2\n")
|
|
367 f.write("ATCGCTAGCTAGCTCG\n")
|
|
368 f.write(">seq3\n")
|
|
369 f.write("GTTTGGATCGCT\n")
|
|
370
|
|
371 FastaUtils.splitFastaFileInBatches(inFileName, 60)
|
|
372
|
|
373 obsBatch1 = "batches/batch_1.fa"
|
|
374 obsBatch2 = "batches/batch_2.fa"
|
|
375 obsBatch3 = "batches/batch_3.fa"
|
|
376
|
|
377 self.assertTrue(FileUtils.are2FilesIdentical(expBatch1, obsBatch1))
|
|
378 self.assertTrue(FileUtils.are2FilesIdentical(expBatch2, obsBatch2))
|
|
379 self.assertTrue(FileUtils.are2FilesIdentical(expBatch3, obsBatch3))
|
|
380
|
|
381 os.remove(inFileName)
|
|
382 os.remove(expBatch1)
|
|
383 os.remove(expBatch2)
|
|
384 os.remove(expBatch3)
|
|
385 shutil.rmtree("batches")
|
|
386
|
|
387
|
|
388 def test_splitFastaFileInBatches_one_seq(self):
|
|
389 inFileName = "dummyFastaFile.fa"
|
|
390 with open(inFileName, "w") as f:
|
|
391 f.write(">seq2\n")
|
|
392 f.write("ATCGCTAGCTAGCTCG\n")
|
|
393 expBatch1 = "expBatch_1.fa"
|
|
394 with open(expBatch1, "w") as f:
|
|
395 f.write(">seq2\n")
|
|
396 f.write("ATCGCTAGCTAGCTCG\n")
|
|
397
|
|
398 FastaUtils.splitFastaFileInBatches(inFileName, 60)
|
|
399
|
|
400 obsBatch1 = "batches/batch_1.fa"
|
|
401
|
|
402 self.assertTrue(FileUtils.are2FilesIdentical(expBatch1, obsBatch1))
|
|
403
|
|
404 os.remove(inFileName)
|
|
405 os.remove(expBatch1)
|
|
406 shutil.rmtree("batches")
|
|
407
|
|
408
|
|
409 def test_splitSeqPerCluster_no_in_file(self):
|
|
410 inFileName = "dummyFastaFile.fa"
|
|
411 isSysExitRaised = False
|
|
412 try:
|
|
413 FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
|
|
414 except SystemExit:
|
|
415 isSysExitRaised = True
|
|
416 self.assertTrue(isSysExitRaised)
|
|
417
|
|
418
|
|
419 def test_splitSeqPerCluster_in_file_empty(self):
|
|
420 inFileName = "dummyFastaFile.fa"
|
|
421 with open(inFileName, 'w'):
|
|
422 pass
|
|
423
|
|
424 FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
|
|
425
|
|
426 self.assertEquals(glob.glob("seqCluster*.fa"), [])
|
|
427
|
|
428 os.remove(inFileName)
|
|
429
|
|
430
|
|
431 def test_splitSeqPerCluster_four_sequences_without_dir(self):
|
|
432 inFileName = "dummyFastaFile.fa"
|
|
433 Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName)
|
|
434
|
|
435 expFirstClusterFileName = "exp_seqCluster1.fa"
|
|
436 Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expFirstClusterFileName)
|
|
437 expSecondClusterFileName = "exp_seqCluster2.fa"
|
|
438 Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expSecondClusterFileName)
|
|
439 expThirdClusterFileName = "exp_seqCluster3.574.fa"
|
|
440 Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expThirdClusterFileName)
|
|
441
|
|
442 FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
|
|
443 obsFirstClusterFileName = "seqCluster1.fa"
|
|
444 obsSecondClusterFileName = "seqCluster2.fa"
|
|
445 obsThirdClusterFileName = "seqCluster3.574.fa"
|
|
446
|
|
447 os.remove(inFileName)
|
|
448
|
|
449 self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName))
|
|
450 self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName))
|
|
451 self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName))
|
|
452
|
|
453 os.remove(expFirstClusterFileName)
|
|
454 os.remove(expSecondClusterFileName)
|
|
455 os.remove(expThirdClusterFileName)
|
|
456 os.remove(obsFirstClusterFileName)
|
|
457 os.remove(obsSecondClusterFileName)
|
|
458 os.remove(obsThirdClusterFileName)
|
|
459
|
|
460
|
|
461 def test_splitSeqPerCluster_four_sequences_without_dir_no_split(self):
|
|
462 inFileName = "dummyFastaFile.fa"
|
|
463 Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_in_same_cluster(inFileName)
|
|
464
|
|
465 expClusterFileName = "exp_seqCluster.fa"
|
|
466 Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_in_same_cluster(expClusterFileName)
|
|
467
|
|
468 FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
|
|
469 obsClusterFileName = "seqCluster1.fa"
|
|
470
|
|
471 os.remove(inFileName)
|
|
472
|
|
473 self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName))
|
|
474
|
|
475 os.remove(expClusterFileName)
|
|
476 os.remove(obsClusterFileName)
|
|
477
|
|
478
|
|
479 def test_splitSeqPerCluster_four_sequences_without_dir_shuffle(self):
|
|
480 inFileName = "dummyFastaFile.fa"
|
|
481 Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_shuffle(inFileName)
|
|
482
|
|
483 expFirstClusterFileName = "exp_seqCluster1.fa"
|
|
484 Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expFirstClusterFileName)
|
|
485 expSecondClusterFileName = "exp_seqCluster2.fa"
|
|
486 Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expSecondClusterFileName)
|
|
487 expThirdClusterFileName = "exp_seqCluster3.574.fa"
|
|
488 Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expThirdClusterFileName)
|
|
489
|
|
490 FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
|
|
491 obsFirstClusterFileName = "seqCluster1.fa"
|
|
492 obsSecondClusterFileName = "seqCluster2.fa"
|
|
493 obsThirdClusterFileName = "seqCluster3.574.fa"
|
|
494
|
|
495 os.remove(inFileName)
|
|
496
|
|
497 self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName))
|
|
498 self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName))
|
|
499 self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName))
|
|
500
|
|
501 os.remove(expFirstClusterFileName)
|
|
502 os.remove(expSecondClusterFileName)
|
|
503 os.remove(expThirdClusterFileName)
|
|
504 os.remove(obsFirstClusterFileName)
|
|
505 os.remove(obsSecondClusterFileName)
|
|
506 os.remove(obsThirdClusterFileName)
|
|
507
|
|
508
|
|
509 def test_splitSeqPerCluster_four_sequences_simplify_header(self):
|
|
510 inFileName = "dummyFastaFile.fa"
|
|
511 Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName)
|
|
512
|
|
513 expFirstClusterFileName = "exp_seqCluster1.fa"
|
|
514 Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result_with_simplify_header(expFirstClusterFileName)
|
|
515 expSecondClusterFileName = "exp_seqCluster2.fa"
|
|
516 Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result_with_simplify_header(expSecondClusterFileName)
|
|
517 expThirdClusterFileName = "exp_seqCluster3.574.fa"
|
|
518 Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result_with_simplify_header(expThirdClusterFileName)
|
|
519
|
|
520 FastaUtils.splitSeqPerCluster( inFileName, "Piler", True, False, "seqCluster")
|
|
521 obsFirstClusterFileName = "seqCluster1.fa"
|
|
522 obsSecondClusterFileName = "seqCluster2.fa"
|
|
523 obsThirdClusterFileName = "seqCluster3.574.fa"
|
|
524
|
|
525 os.remove(inFileName)
|
|
526
|
|
527 self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName))
|
|
528 self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName))
|
|
529 self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName))
|
|
530
|
|
531 os.remove(expFirstClusterFileName)
|
|
532 os.remove(expSecondClusterFileName)
|
|
533 os.remove(expThirdClusterFileName)
|
|
534 os.remove(obsFirstClusterFileName)
|
|
535 os.remove(obsSecondClusterFileName)
|
|
536 os.remove(obsThirdClusterFileName)
|
|
537
|
|
538
|
|
539 def test_splitSeqPerCluster_four_sequences_with_dir(self):
|
|
540 inFileName = "dummyFastaFile.fa"
|
|
541 Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName)
|
|
542 FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, True, "seqCluster")
|
|
543 os.remove(inFileName)
|
|
544
|
|
545 for i in ['1', '2', '3.574']:
|
|
546 expClusterFileName = "exp_cluster" + i + ".fa"
|
|
547 if i == '1':
|
|
548 Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expClusterFileName)
|
|
549 if i == '2':
|
|
550 Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expClusterFileName)
|
|
551 if i == '3.574':
|
|
552 Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expClusterFileName)
|
|
553
|
|
554 obsClusterFileName= inFileName + "_cluster_" + i + "/seqCluster" + i + ".fa"
|
|
555 self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName))
|
|
556 os.remove(expClusterFileName)
|
|
557 os.remove(obsClusterFileName)
|
|
558 os.rmdir( inFileName + "_cluster_" + i )
|
|
559
|
|
560
|
|
561 def test_dbLengthFilter_with_one_sequence(self):
|
|
562 fileName = "dummyFastaFile.fa"
|
|
563 Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName)
|
|
564
|
|
565 expFileNameInf = "exp_dummyFastaFile.fa.Inf12"
|
|
566 Utils_for_T_FastaUtils._createFastaFile_for_empty_file(expFileNameInf)
|
|
567 expFileNameSup = "exp_dummyFastaFile.fa.Sup12"
|
|
568 Utils_for_T_FastaUtils._createFastaFile_one_sequence(expFileNameSup)
|
|
569
|
|
570 FastaUtils.dbLengthFilter(12, fileName, verbose=0)
|
|
571
|
|
572 obsFileNameInf = "dummyFastaFile.fa.Inf12"
|
|
573 obsFileNameSup = "dummyFastaFile.fa.Sup12"
|
|
574
|
|
575 self.assertTrue(FileUtils.are2FilesIdentical(expFileNameInf, obsFileNameInf))
|
|
576 self.assertTrue(FileUtils.are2FilesIdentical(expFileNameSup, obsFileNameSup))
|
|
577
|
|
578 os.remove(fileName)
|
|
579 os.remove(expFileNameInf)
|
|
580 os.remove(expFileNameSup)
|
|
581 os.remove(obsFileNameInf)
|
|
582 os.remove(obsFileNameSup)
|
|
583
|
|
584 def test_dbLengthFilter_with_four_sequence(self):
|
|
585 fileName = "dummyFastaFile.fa"
|
|
586 Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)
|
|
587
|
|
588 expFileNameInf = "exp_dummyFastaFile.fa.Inf130"
|
|
589 Utils_for_T_FastaUtils._createFastaFile_one_sequence(expFileNameInf)
|
|
590 expFileNameSup = "exp_dummyFastaFile.fa.Sup130"
|
|
591 Utils_for_T_FastaUtils._createResult_of_dbLengthFilter_sup(expFileNameSup)
|
|
592
|
|
593 FastaUtils.dbLengthFilter(130, fileName, verbose=0)
|
|
594
|
|
595 obsFileNameInf = "dummyFastaFile.fa.Inf130"
|
|
596 obsFileNameSup = "dummyFastaFile.fa.Sup130"
|
|
597
|
|
598 self.assertTrue(FileUtils.are2FilesIdentical(expFileNameInf, obsFileNameInf))
|
|
599 self.assertTrue(FileUtils.are2FilesIdentical(expFileNameSup, obsFileNameSup))
|
|
600
|
|
601 os.remove(fileName)
|
|
602 os.remove(expFileNameInf)
|
|
603 os.remove(expFileNameSup)
|
|
604 os.remove(obsFileNameInf)
|
|
605 os.remove(obsFileNameSup)
|
|
606
|
|
607 def test_dbLongestSequences_with_empty_file(self):
|
|
608 fileName = "dummyFastaFile.fa"
|
|
609 Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)
|
|
610
|
|
611 expResult = 0
|
|
612
|
|
613 obsResult = FastaUtils.dbLongestSequences( 1, fileName )
|
|
614
|
|
615 self.assertEquals(expResult, obsResult)
|
|
616
|
|
617 os.remove(fileName)
|
|
618
|
|
619 def test_dbLongestSequences_with_one_longest_sequence(self):
|
|
620 fileName = "dummyFastaFile.fa"
|
|
621 Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)
|
|
622
|
|
623 expFileName = "exp_dummyFastaFile.fa.best1"
|
|
624 f = open(expFileName, 'w')
|
|
625 f.write(">seq 3\n")
|
|
626 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
627 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
628 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
629 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
630 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
631 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
632 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
633 f.write("ATATTCG\n")
|
|
634 f.close()
|
|
635
|
|
636 FastaUtils.dbLongestSequences( 1, fileName, outFileName="", verbose=0, minThresh=0 )
|
|
637
|
|
638 obsFileName = "dummyFastaFile.fa.best1"
|
|
639
|
|
640 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
641
|
|
642 os.remove(fileName)
|
|
643 os.remove(expFileName)
|
|
644 os.remove(obsFileName)
|
|
645
|
|
646 def test_dbLongestSequences_with_two_longest_sequence(self):
|
|
647 fileName = "dummyFastaFile.fa"
|
|
648 Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
|
|
649 expFileName = "exp_dummyFastaFile.fa.best1"
|
|
650 f = open(expFileName, 'w')
|
|
651 f.write(">seq 2\n")
|
|
652 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
653 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
654 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
655 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
656 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
657 f.write("ATATTCG\n")
|
|
658 f.write(">seq 4\n")
|
|
659 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
660 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
661 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
662 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
663 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
664 f.write("ATATTCG\n")
|
|
665 f.close()
|
|
666
|
|
667 FastaUtils.dbLongestSequences( 2, fileName, outFileName="", verbose=0, minThresh=0 )
|
|
668 obsFileName = "dummyFastaFile.fa.best2"
|
|
669
|
|
670 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
671
|
|
672 os.remove(fileName)
|
|
673 os.remove(expFileName)
|
|
674 os.remove(obsFileName)
|
|
675
|
|
676 def test_dbExtractSeqHeaders(self):
|
|
677 fileName = "dummyFastaFile.fa"
|
|
678 Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
|
|
679 expFileName = "exp_dummyFastaFile.fa"
|
|
680 f = open(expFileName, 'w')
|
|
681 f.write("seq 1\n")
|
|
682 f.write("seq 2\n")
|
|
683 f.write("seq 4\n")
|
|
684 f.close()
|
|
685
|
|
686 FastaUtils.dbExtractSeqHeaders(fileName)
|
|
687 obsFileName = "dummyFastaFile.fa.headers"
|
|
688
|
|
689 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
690
|
|
691 os.remove(fileName)
|
|
692 os.remove(expFileName)
|
|
693 os.remove(obsFileName)
|
|
694
|
|
695 def test_dbExtractSeqHeaders_with_empty_file(self):
|
|
696 fileName = "dummyFastaFile.fa"
|
|
697 Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)
|
|
698 expFileName = "exp_dummyFastaFile.fa"
|
|
699 f = open(expFileName, 'w')
|
|
700 f.write("")
|
|
701 f.close()
|
|
702
|
|
703 FastaUtils.dbExtractSeqHeaders(fileName)
|
|
704 obsFileName = "dummyFastaFile.fa.headers"
|
|
705
|
|
706 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
707
|
|
708 os.remove(fileName)
|
|
709 os.remove(expFileName)
|
|
710 os.remove(obsFileName)
|
|
711
|
|
712 def test_dbExtractSeqHeaders_without_header(self):
|
|
713 fileName = "dummyFastaFile.fa"
|
|
714 Utils_for_T_FastaUtils._createFastaFile_sequence_without_header(fileName)
|
|
715 expFileName = "exp_dummyFastaFile.fa"
|
|
716 f = open(expFileName, 'w')
|
|
717 f.write("")
|
|
718 f.close()
|
|
719
|
|
720 FastaUtils.dbExtractSeqHeaders(fileName)
|
|
721 obsFileName = "dummyFastaFile.fa.headers"
|
|
722
|
|
723 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
724
|
|
725 os.remove(fileName)
|
|
726 os.remove(expFileName)
|
|
727 os.remove(obsFileName)
|
|
728
|
|
729 def test_dbExtractByPattern_without_pattern(self):
|
|
730 fileName = "dummyFastaFile.fa"
|
|
731 Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
|
|
732
|
|
733 obsResult = FastaUtils.dbExtractByPattern( "", fileName)
|
|
734
|
|
735 expResult = None
|
|
736
|
|
737 self.assertEquals(expResult, obsResult)
|
|
738
|
|
739 os.remove(fileName)
|
|
740
|
|
741 def test_dbExtractByPattern(self):
|
|
742 fileName = "dummyFastaFile.fa"
|
|
743 Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
|
|
744 expFileName = "exp_dummyFastaFile.fa"
|
|
745 Utils_for_T_FastaUtils._createFastaFile_three_sequences(expFileName)
|
|
746
|
|
747 FastaUtils.dbExtractByPattern( 'seq', fileName)
|
|
748
|
|
749 obsFileName = "dummyFastaFile.fa.extracted"
|
|
750
|
|
751 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
752
|
|
753 os.remove(fileName)
|
|
754 os.remove(expFileName)
|
|
755 os.remove(obsFileName)
|
|
756
|
|
757 def test_dbExtractByPattern_with_2_as_pattern(self):
|
|
758 fileName = "dummyFastaFile.fa"
|
|
759 Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
|
|
760 expFileName = "exp_dummyFastaFile.fa"
|
|
761 f = open(expFileName, 'w')
|
|
762 f.write(">seq 2\n")
|
|
763 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
764 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
765 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
766 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
767 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
768 f.write("ATATTCG\n")
|
|
769 f.close()
|
|
770
|
|
771 FastaUtils.dbExtractByPattern( ' 2', fileName)
|
|
772
|
|
773 obsFileName = "dummyFastaFile.fa.extracted"
|
|
774
|
|
775 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
776
|
|
777 os.remove(fileName)
|
|
778 os.remove(expFileName)
|
|
779 os.remove(obsFileName)
|
|
780
|
|
781 def test_dbExtractByPattern_with_sandie_as_pattern(self):
|
|
782 fileName = "dummyFastaFile.fa"
|
|
783 Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
|
|
784 expFileName = "exp_dummyFastaFile.fa"
|
|
785 Utils_for_T_FastaUtils._createFastaFile_for_empty_file(expFileName)
|
|
786
|
|
787 FastaUtils.dbExtractByPattern( 'sandie', fileName)
|
|
788
|
|
789 obsFileName = "dummyFastaFile.fa.extracted"
|
|
790
|
|
791 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
792
|
|
793 os.remove(fileName)
|
|
794 os.remove(expFileName)
|
|
795 os.remove(obsFileName)
|
|
796
|
|
797 def test_dbExtractByFilePattern_empty_pattern_filename(self):
|
|
798 patternFileName = ""
|
|
799 isSysExitRaised = False
|
|
800 try:
|
|
801 FastaUtils.dbExtractByFilePattern(patternFileName , None, "")
|
|
802 except SystemExit:
|
|
803 isSysExitRaised = True
|
|
804 self.assertTrue(isSysExitRaised)
|
|
805
|
|
806 def test_dbExtractByFilePattern(self):
|
|
807 fileName = "dummyFastaFile.fa"
|
|
808 Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName)
|
|
809 patternFileName = "dummyPatternFile.txt"
|
|
810 Utils_for_T_FastaUtils._createPatternFile(patternFileName)
|
|
811
|
|
812 expFileName = "exp_dummyFastaFile.fa"
|
|
813 f = open(expFileName, 'w')
|
|
814 f.write(">seq 1\n")
|
|
815 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
816 f.write(">seq 3\n")
|
|
817 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
818 f.write(">seq 8\n")
|
|
819 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
820 f.write(">seq 10\n")
|
|
821 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
822 f.close()
|
|
823
|
|
824 obsFileName = "dummyFastaFile.fa.extracted"
|
|
825
|
|
826 FastaUtils.dbExtractByFilePattern( patternFileName, fileName, "")
|
|
827
|
|
828 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
829
|
|
830 os.remove(fileName)
|
|
831 os.remove(patternFileName)
|
|
832 os.remove(expFileName)
|
|
833 os.remove(obsFileName)
|
|
834
|
|
835 def test_dbCleanByPattern_without_pattern(self):
|
|
836 fileName = "dummyFastaFile.fa"
|
|
837 Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
|
|
838
|
|
839 obsResult = FastaUtils.dbCleanByPattern( "", fileName)
|
|
840
|
|
841 expResult = None
|
|
842
|
|
843 self.assertEquals(expResult, obsResult)
|
|
844
|
|
845 os.remove(fileName)
|
|
846
|
|
847 def test_dbCleanByPattern(self):
|
|
848 fileName = "dummyFastaFile.fa"
|
|
849 Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName)
|
|
850
|
|
851 expFileName = "exp_dummyFastaFile.fa"
|
|
852 f = open(expFileName, 'w')
|
|
853 f.write(">seq 1\n")
|
|
854 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
855 f.write(">seq 3\n")
|
|
856 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
857 f.write(">seq 4\n")
|
|
858 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
859 f.write(">seq 5\n")
|
|
860 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
861 f.write(">seq 6\n")
|
|
862 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
863 f.write(">seq 7\n")
|
|
864 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
865 f.write(">seq 8\n")
|
|
866 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
867 f.write(">seq 9\n")
|
|
868 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
869 f.write(">seq 10\n")
|
|
870 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
871 f.close()
|
|
872
|
|
873 obsFileName = "dummyFastaFile.fa.cleaned"
|
|
874 FastaUtils.dbCleanByPattern( '2', fileName)
|
|
875
|
|
876 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
877
|
|
878 os.remove(fileName)
|
|
879 os.remove(expFileName)
|
|
880 os.remove(obsFileName)
|
|
881
|
|
882 def test_dbCleanByPattern_with_expectedFile_empty(self):
|
|
883 fileName = "dummyFastaFile.fa"
|
|
884 Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName)
|
|
885
|
|
886 expFileName = "exp_dummyFastaFile.fa"
|
|
887 f = open(expFileName, 'w')
|
|
888 f.write("")
|
|
889 f.close()
|
|
890
|
|
891 obsFileName = "dummyFastaFile.fa.cleaned"
|
|
892 FastaUtils.dbCleanByPattern( 'seq', fileName)
|
|
893
|
|
894 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
895
|
|
896 os.remove(fileName)
|
|
897 os.remove(expFileName)
|
|
898 os.remove(obsFileName)
|
|
899
|
|
900 def test_dbCleanByFilePattern_empty_pattern_filename(self):
|
|
901 patternFileName = ""
|
|
902 isSysExitRaised = False
|
|
903 try:
|
|
904 FastaUtils.dbCleanByFilePattern(patternFileName , None, "")
|
|
905 except SystemExit:
|
|
906 isSysExitRaised = True
|
|
907 self.assertTrue(isSysExitRaised)
|
|
908
|
|
909 def test_dbCleanByFilePattern(self):
|
|
910 fileName = "dummyFastaFile.fa"
|
|
911 Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName)
|
|
912 patternFileName = "dummyPatternFile.txt"
|
|
913 Utils_for_T_FastaUtils._createPatternFile(patternFileName)
|
|
914
|
|
915 expFileName = "exp_dummyFastaFile.fa"
|
|
916 f = open(expFileName, 'w')
|
|
917 f.write(">seq 2\n")
|
|
918 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
919 f.write(">seq 4\n")
|
|
920 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
921 f.write(">seq 5\n")
|
|
922 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
923 f.write(">seq 6\n")
|
|
924 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
925 f.write(">seq 7\n")
|
|
926 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
927 f.write(">seq 9\n")
|
|
928 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
929 f.close()
|
|
930
|
|
931 obsFileName = "dummyFastaFile.fa.cleaned"
|
|
932
|
|
933 FastaUtils.dbCleanByFilePattern( patternFileName, fileName, "")
|
|
934
|
|
935 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
936
|
|
937 os.remove(fileName)
|
|
938 os.remove(patternFileName)
|
|
939 os.remove(expFileName)
|
|
940 os.remove(obsFileName)
|
|
941
|
|
942 def test_dbORF_without_ORF(self):
|
|
943 fileName = "dummy.fa"
|
|
944 with open(fileName, "w") as f:
|
|
945 f.write(">dummy\n")
|
|
946 f.write("GGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTT\n")
|
|
947
|
|
948 expFileName = "exp.ORF.map"
|
|
949 with open(expFileName, "w") as f:
|
|
950 f.write("")
|
|
951 obsFileName = "%s.ORF.map" % fileName
|
|
952
|
|
953 FastaUtils.dbORF(fileName, 0, 0)
|
|
954
|
|
955 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
956
|
|
957 os.remove(fileName)
|
|
958 os.remove(obsFileName)
|
|
959 os.remove(expFileName)
|
|
960
|
|
961 def test_dbORF_with_one_ORF(self):
|
|
962 fileName = "dummyFastaFile.fa"
|
|
963 f = open(fileName, 'w')
|
|
964 f.write(">seq1\n")
|
|
965 f.write("GAAAATATGGGGTAGATAAGGGATCTGGGTTAATTTTTT\n")
|
|
966 f.close()
|
|
967
|
|
968 expFileName = "exp_dummyORFFile.ORF.map"
|
|
969 f = open(expFileName, 'w')
|
|
970 f.write("ORF|1|17\tseq1\t16\t33\n")
|
|
971 f.close()
|
|
972
|
|
973 FastaUtils.dbORF(fileName, 0, 0)
|
|
974 obsFileName = fileName + ".ORF.map"
|
|
975
|
|
976 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
977
|
|
978 os.remove(fileName)
|
|
979 os.remove(obsFileName)
|
|
980 os.remove(expFileName)
|
|
981
|
|
982 def test_dbORF_with_real_ORF(self):
|
|
983 fileName = "dummy.fa"
|
|
984 with open(fileName, "w") as f:
|
|
985 f.write(">DmelChr4_Blaster_Recon_13_Map_4\n")
|
|
986 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n")
|
|
987 f.write("TTGCGGATCATTTTGTTTGAACAACCGACAATGCGACCAATTTCAGCGTAGGTTTTACCT\n")
|
|
988 f.write("TCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAATGCTTTCCGCGACCC\n")
|
|
989 f.write("ATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAAAACCTTTAATACAAC\n")
|
|
990 f.write("TCCTTTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACTCCTATTAATTTTATTCA\n")
|
|
991 f.write("GCAAATACGTGTTCAGTGCTATTTTTGTTACCGCCTCATTTCGCGCACTTTTGCAGCAAG\n")
|
|
992 f.write("TGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAATTTCTTGCTCAGAGAGCC\n")
|
|
993 f.write("AACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAATATAAACATTTAATAATT\n")
|
|
994 f.write("TTTTTTAGGAAATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATA\n")
|
|
995 f.write(">DmelChr4_Blaster_Piler_3.5_Map_7\n")
|
|
996 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
|
|
997 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
|
|
998 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
|
|
999 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
|
|
1000 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
|
|
1001 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
|
|
1002 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n")
|
|
1003 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n")
|
|
1004 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
|
|
1005 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
|
|
1006 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n")
|
|
1007 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
|
|
1008 f.write(">DmelChr4_Blaster_Grouper_10_Map_13\n")
|
|
1009 f.write("GCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGCACTATGCAGC\n")
|
|
1010 f.write("CACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTGAGAGCGTAAG\n")
|
|
1011 f.write("AAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGAGAACGCGTAT\n")
|
|
1012 f.write("AAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCTGATCGAAGAA\n")
|
|
1013 f.write("ACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAATATGATAAAA\n")
|
|
1014 f.write("TAAAAAAATTTTTAAAAATTCGCGCCCTGACTATTATAATTTTAAAGCTTTTTAAAATTT\n")
|
|
1015 f.write("GTTTGTTAAAATCGCCGCTCGAATTAGCTACCGTTTACACATTTATATTTATGTTTAATT\n")
|
|
1016 f.write("CTAATTTGTCTCTCATCTGACAATTTTTTAAGAAAGCGAAATATTTTTTTTTTGAAACAC\n")
|
|
1017 f.write("TTTTAATGTTAATGTTACATCATATTAAGTCAAATGATTTAATAAATATACTAAATAATT\n")
|
|
1018 f.write("AAATATGATAACTGTTTATTGCAAAAGTAATATCAAAGACACTAGAATTATTCTAGTGTC\n")
|
|
1019 f.write("TTTGCTTTGTTCATATCTTGAGGCACGAAGTGCGGACACAAGCACTCAACAATCATTGCC\n")
|
|
1020 f.write("TTATTAATTTTTCACACGCCGCAAGATGAATACTCTAATGACAAATATTCTTATATAAAG\n")
|
|
1021 f.write("TCATTTTTGAAATTTATTTTTGTGATAATATGTACATAGATTTGGCTATTTCTAATCTAT\n")
|
|
1022 f.write("TTTCAAATAATAATAACGTTAAGGCAATGCAAAACAAGAATTTTTTTAGTCGCATGGTGC\n")
|
|
1023 f.write("CAATTGATCAAAAATAATATAGATTTAAAGTCTAAGAACTTCTAAGGTGAAGGGCATATT\n")
|
|
1024 f.write("TTGTCAAATTTACAATGCATGAGCGAGCATACGTGTGCACACATACAGTTGTCTGCTATC\n")
|
|
1025 f.write("ACTTTGTGCGTTGAAAA\n")
|
|
1026
|
|
1027 expFileName = "exp.ORF.map"
|
|
1028 with open(expFileName, "w") as f:
|
|
1029 f.write("ORF|3|263\tDmelChr4_Blaster_Recon_13_Map_4\t189\t452\n")
|
|
1030 f.write("ORF|2|206\tDmelChr4_Blaster_Recon_13_Map_4\t185\t391\n")
|
|
1031 f.write("ORF|-3|164\tDmelChr4_Blaster_Recon_13_Map_4\t382\t218\n")
|
|
1032 f.write("ORF|-1|161\tDmelChr4_Blaster_Recon_13_Map_4\t297\t136\n")
|
|
1033 f.write("ORF|1|113\tDmelChr4_Blaster_Recon_13_Map_4\t400\t513\n")
|
|
1034 f.write("ORF|1|113\tDmelChr4_Blaster_Recon_13_Map_4\t112\t225\n")
|
|
1035 f.write("ORF|3|107\tDmelChr4_Blaster_Recon_13_Map_4\t81\t188\n")
|
|
1036 f.write("ORF|1|107\tDmelChr4_Blaster_Recon_13_Map_4\t292\t399\n")
|
|
1037 f.write("ORF|-1|104\tDmelChr4_Blaster_Recon_13_Map_4\t432\t328\n")
|
|
1038 f.write("ORF|-2|104\tDmelChr4_Blaster_Recon_13_Map_4\t515\t411\n")
|
|
1039 f.write("ORF|3|116\tDmelChr4_Blaster_Piler_3.5_Map_7\t393\t509\n")
|
|
1040 f.write("ORF|-3|116\tDmelChr4_Blaster_Piler_3.5_Map_7\t505\t389\n")
|
|
1041 f.write("ORF|-2|86\tDmelChr4_Blaster_Piler_3.5_Map_7\t518\t432\n")
|
|
1042 f.write("ORF|1|80\tDmelChr4_Blaster_Piler_3.5_Map_7\t436\t516\n")
|
|
1043 f.write("ORF|-3|170\tDmelChr4_Blaster_Grouper_10_Map_13\t222\t52\n")
|
|
1044 f.write("ORF|-1|161\tDmelChr4_Blaster_Grouper_10_Map_13\t260\t99\n")
|
|
1045 f.write("ORF|3|155\tDmelChr4_Blaster_Grouper_10_Map_13\t702\t857\n")
|
|
1046 f.write("ORF|3|152\tDmelChr4_Blaster_Grouper_10_Map_13\t288\t440\n")
|
|
1047 f.write("ORF|1|137\tDmelChr4_Blaster_Grouper_10_Map_13\t622\t759\n")
|
|
1048 f.write("ORF|2|128\tDmelChr4_Blaster_Grouper_10_Map_13\t539\t667\n")
|
|
1049 f.write("ORF|1|125\tDmelChr4_Blaster_Grouper_10_Map_13\t760\t885\n")
|
|
1050 f.write("ORF|2|122\tDmelChr4_Blaster_Grouper_10_Map_13\t14\t136\n")
|
|
1051 f.write("ORF|-2|113\tDmelChr4_Blaster_Grouper_10_Map_13\t847\t734\n")
|
|
1052 f.write("ORF|1|110\tDmelChr4_Blaster_Grouper_10_Map_13\t154\t264\n")
|
|
1053 obsFileName = "%s.ORF.map" % fileName
|
|
1054
|
|
1055 FastaUtils.dbORF(fileName, 10, 30)
|
|
1056
|
|
1057 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
1058
|
|
1059 os.remove(fileName)
|
|
1060 os.remove(obsFileName)
|
|
1061 os.remove(expFileName)
|
|
1062
|
|
1063 def test_sortSequencesByIncreasingLength(self):
|
|
1064 fileName = "dummyFastaFile.fa"
|
|
1065 f = open(fileName, 'w')
|
|
1066 f.write(">seq1_length_60\n")
|
|
1067 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1068 f.write(">seq2_length_120\n")
|
|
1069 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1070 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1071 f.write(">seq3_length_32\n")
|
|
1072 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
|
|
1073 f.close()
|
|
1074
|
|
1075 expFileName = "exp_dummyFastaFile.fa"
|
|
1076 f = open(expFileName, 'w')
|
|
1077 f.write(">seq3_length_32\n")
|
|
1078 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
|
|
1079 f.write(">seq1_length_60\n")
|
|
1080 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1081 f.write(">seq2_length_120\n")
|
|
1082 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1083 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1084
|
|
1085 f.close()
|
|
1086
|
|
1087 obsFileName = "obs_dummyFastaFile.fa"
|
|
1088
|
|
1089 FastaUtils.sortSequencesByIncreasingLength(fileName, obsFileName, 0)
|
|
1090
|
|
1091 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
1092
|
|
1093 os.remove(expFileName)
|
|
1094 os.remove(obsFileName)
|
|
1095
|
|
1096 def test_sortSequencesByIncreasingLength_in_file_do_not_exists(self):
|
|
1097 fileName = "dummyFile.fa"
|
|
1098 isSysExitRaised = False
|
|
1099 try:
|
|
1100 FastaUtils.sortSequencesByIncreasingLength(fileName, "", 0)
|
|
1101 except SystemExit:
|
|
1102 isSysExitRaised = True
|
|
1103
|
|
1104 self.assertTrue(isSysExitRaised)
|
|
1105
|
|
1106 def test_sortSequencesByHeader(self):
|
|
1107 fileName = "dummyFastaFile.fa"
|
|
1108 f = open(fileName, "w")
|
|
1109 f.write(">seq1::test-test\n")
|
|
1110 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1111 f.write(">seq3\n")
|
|
1112 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
|
|
1113 f.write(">seq2\n")
|
|
1114 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1115 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1116 f.close()
|
|
1117 expFileName = "expFastaFile.fa"
|
|
1118 f = open(expFileName, "w")
|
|
1119 f.write(">seq1::test-test\n")
|
|
1120 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1121 f.write(">seq2\n")
|
|
1122 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1123 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1124 f.write(">seq3\n")
|
|
1125 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
|
|
1126 f.close()
|
|
1127
|
|
1128 obsFileName = "obsFastaFile.fa"
|
|
1129 FastaUtils.sortSequencesByHeader(fileName, obsFileName)
|
|
1130 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
1131
|
|
1132 os.remove(expFileName)
|
|
1133 os.remove(obsFileName)
|
|
1134
|
|
1135 def test_sortSequencesByHeader_no_outFileName(self):
|
|
1136 fileName = "dummyFastaFile.fa"
|
|
1137 f = open(fileName, "w")
|
|
1138 f.write(">seq12\n")
|
|
1139 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1140 f.write(">seq1\n")
|
|
1141 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
|
|
1142 f.write(">seq2\n")
|
|
1143 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1144 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1145 f.close()
|
|
1146 expFileName = "expFastaFile.fa"
|
|
1147 f = open(expFileName, "w")
|
|
1148 f.write(">seq1\n")
|
|
1149 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
|
|
1150 f.write(">seq12\n")
|
|
1151 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1152 f.write(">seq2\n")
|
|
1153 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1154 f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
|
|
1155 f.close()
|
|
1156
|
|
1157 obsFileName = "dummyFastaFile_sortByHeaders.fa"
|
|
1158 FastaUtils.sortSequencesByHeader(fileName)
|
|
1159 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
1160
|
|
1161 os.remove(expFileName)
|
|
1162 os.remove(obsFileName)
|
|
1163
|
|
1164 def test_getLengthPerHeader( self ):
|
|
1165 inFile = "dummyFile.fa"
|
|
1166 inFileHandler = open( inFile, "w" )
|
|
1167 inFileHandler.write(">seq1\nAGCGATGCGT\n")
|
|
1168 inFileHandler.write(">seq2\nAGCGATG\n")
|
|
1169 inFileHandler.write(">seq3\nAGCGATGGTGCGTGC\n")
|
|
1170 inFileHandler.write("AGCGATGGTGCGTGC\n")
|
|
1171 inFileHandler.close()
|
|
1172
|
|
1173 dExp = { "seq1": 10, "seq2": 7, "seq3": 30 }
|
|
1174
|
|
1175 dObs = FastaUtils.getLengthPerHeader( inFile, 0 )
|
|
1176
|
|
1177 self.assertEquals( dExp, dObs )
|
|
1178
|
|
1179 os.remove( inFile )
|
|
1180
|
|
1181 def test_convertFastaHeadersFromChkToChr_grouper(self):
|
|
1182 inFile = "dummyFastaFile.fa"
|
|
1183 with open(inFile, "w") as f:
|
|
1184 f.write(">MbQ1Gr1Cl0 chunk6 {Fragment} 95523..96053\n")
|
|
1185 f.write("AGCGTGCA\n")
|
|
1186 f.write(">MbQ77Gr8Cl0 chunk7 {Fragment} 123657..122568,121935..121446\n")
|
|
1187 f.write("AGCATGC\n")
|
|
1188 f.write(">MbS78Gr8Cl0 chunk7 {Fragment} 140078..139519,139470..138985,138651..138183\n")
|
|
1189 f.write("CGTGCG\n")
|
|
1190 f.write(">MbQ79Gr8Cl0 chunk7 {Fragment} 48021..48587,48669..49153,57346..57834\n")
|
|
1191 f.write("AGCGTGC\n")
|
|
1192 mapFile = "dummyMapFile.map"
|
|
1193 with open(mapFile, "w") as f:
|
|
1194 f.write("chunk5\tdmel_chr4\t760001\t960000\n")
|
|
1195 f.write("chunk6\tdmel_chr4\t950001\t1150000\n")
|
|
1196 f.write("chunk7\tdmel_chr4\t1140001\t1281640\n")
|
|
1197 expFile = "expFile.fa"
|
|
1198 with open(expFile, "w") as f:
|
|
1199 f.write(">MbQ1Gr1Cl0 dmel_chr4 {Fragment} 1045523..1046053\n")
|
|
1200 f.write("AGCGTGCA\n")
|
|
1201 f.write(">MbQ77Gr8Cl0 dmel_chr4 {Fragment} 1263657..1262568,1261935..1261446\n")
|
|
1202 f.write("AGCATGC\n")
|
|
1203 f.write(">MbS78Gr8Cl0 dmel_chr4 {Fragment} 1280078..1279519,1279470..1278985,1278651..1278183\n")
|
|
1204 f.write("CGTGCG\n")
|
|
1205 f.write(">MbQ79Gr8Cl0 dmel_chr4 {Fragment} 1188021..1188587,1188669..1189153,1197346..1197834\n")
|
|
1206 f.write("AGCGTGC\n")
|
|
1207 obsFile = "obsFile.fa"
|
|
1208
|
|
1209 FastaUtils.convertFastaHeadersFromChkToChr(inFile, mapFile, obsFile)
|
|
1210
|
|
1211 self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile))
|
|
1212
|
|
1213 for file in [inFile, mapFile, expFile, obsFile]:
|
|
1214 os.remove(file)
|
|
1215
|
|
1216 def test_convertFastaHeadersFromChkToChr_blastclust(self):
|
|
1217 inFile = "dummyFastaFile.fa"
|
|
1218 with open(inFile, "w") as f:
|
|
1219 f.write(">BlastclustCluster12Mb63_chunk1 (dbseq-nr 0) [1,10]\n")
|
|
1220 f.write("AGCGTGCA\n")
|
|
1221 f.write(">BlastclustCluster12Mb53_chunk2 (dbseq-nr 2) [1,10]\n")
|
|
1222 f.write("AGCATGC\n")
|
|
1223 f.write(">BlastclustCluster12Mb26_chunk2 (dbseq-nr 2) [12,18]\n")
|
|
1224 f.write("CGTGCG\n")
|
|
1225 f.write(">BlastclustCluster12Mb35_chunk3 (dbseq-nr 0) [10,1]\n")
|
|
1226 f.write("AGCGTGC\n")
|
|
1227 mapFile = "dummyMapFile.map"
|
|
1228 with open(mapFile, "w") as f:
|
|
1229 f.write("chunk1\tchromosome1\t1\t20\n")
|
|
1230 f.write("chunk2\tchromosome1\t16\t35\n")
|
|
1231 f.write("chunk3\tchromosome2\t1\t20\n")
|
|
1232 expFile = "expFile.fa"
|
|
1233 with open(expFile, "w") as f:
|
|
1234 f.write(">BlastclustCluster12Mb63 chromosome1 (dbseq-nr 0) 1..10\n")
|
|
1235 f.write("AGCGTGCA\n")
|
|
1236 f.write(">BlastclustCluster12Mb53 chromosome1 (dbseq-nr 2) 16..25\n")
|
|
1237 f.write("AGCATGC\n")
|
|
1238 f.write(">BlastclustCluster12Mb26 chromosome1 (dbseq-nr 2) 27..33\n")
|
|
1239 f.write("CGTGCG\n")
|
|
1240 f.write(">BlastclustCluster12Mb35 chromosome2 (dbseq-nr 0) 10..1\n")
|
|
1241 f.write("AGCGTGC\n")
|
|
1242 obsFile = "obsFile.fa"
|
|
1243
|
|
1244 FastaUtils.convertFastaHeadersFromChkToChr(inFile, mapFile, obsFile)
|
|
1245
|
|
1246 self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile))
|
|
1247
|
|
1248 for file in [inFile, mapFile, expFile, obsFile]:
|
|
1249 os.remove(file)
|
|
1250
|
|
1251 def test_convertFastaToLength( self ):
|
|
1252 inFile = "dummyFastaFile.fa"
|
|
1253 inFileHandler = open(inFile, "w")
|
|
1254 inFileHandler.write(">ReconCluster12Mb63 chunk1 {Fragment} 1..10\n")
|
|
1255 inFileHandler.write("AGCGTGCA\n")
|
|
1256 inFileHandler.write(">ReconCluster12Mb53 chunk2 {Fragment} 1..10\n")
|
|
1257 inFileHandler.write("AGCATGCAA\n")
|
|
1258 inFileHandler.write(">ReconCluster12Mb26 chunk2 {Fragment} 12..18\n")
|
|
1259 inFileHandler.write("CGTGCGAAAA\n")
|
|
1260 inFileHandler.write(">ReconCluster12Mb35 chunk3 {Fragment} 10..1\n")
|
|
1261 inFileHandler.write("AGCGTG\n")
|
|
1262 inFileHandler.close()
|
|
1263
|
|
1264 expFile = "expFile.length"
|
|
1265 expFileHandler = open(expFile, "w")
|
|
1266 expFileHandler.write("ReconCluster12Mb63\t8\n")
|
|
1267 expFileHandler.write("ReconCluster12Mb53\t9\n")
|
|
1268 expFileHandler.write("ReconCluster12Mb26\t10\n")
|
|
1269 expFileHandler.write("ReconCluster12Mb35\t6\n")
|
|
1270 expFileHandler.close()
|
|
1271
|
|
1272 obsFile = "obsFile.length"
|
|
1273
|
|
1274 FastaUtils.convertFastaToLength(inFile, obsFile)
|
|
1275
|
|
1276 self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile))
|
|
1277
|
|
1278 for f in [inFile, expFile, obsFile]:
|
|
1279 os.remove(f)
|
|
1280
|
|
1281 def test_convertFastaToSeq( self ):
|
|
1282 inFile = "dummyFastaFile.fa"
|
|
1283 inFileHandler = open(inFile, "w")
|
|
1284 inFileHandler.write(">ReconCluster12Mb63 chunk1 {Fragment} 1..10\n")
|
|
1285 inFileHandler.write("AGCGTGCA\n")
|
|
1286 inFileHandler.write(">ReconCluster12Mb53 chunk2 {Fragment} 1..10\n")
|
|
1287 inFileHandler.write("AGCATGCAA\n")
|
|
1288 inFileHandler.write(">ReconCluster12Mb26 chunk2 {Fragment} 12..18\n")
|
|
1289 inFileHandler.write("CGTGCGAAAA\n")
|
|
1290 inFileHandler.write(">ReconCluster12Mb35 chunk3 {Fragment} 10..1\n")
|
|
1291 inFileHandler.write("AGCGTG\n")
|
|
1292 inFileHandler.close()
|
|
1293
|
|
1294 expFile = "expFile.seq"
|
|
1295 expFileHandler = open(expFile, "w")
|
|
1296 expFileHandler.write("ReconCluster12Mb63\tAGCGTGCA\tReconCluster12Mb63 chunk1 {Fragment} 1..10\t8\n")
|
|
1297 expFileHandler.write("ReconCluster12Mb53\tAGCATGCAA\tReconCluster12Mb53 chunk2 {Fragment} 1..10\t9\n")
|
|
1298 expFileHandler.write("ReconCluster12Mb26\tCGTGCGAAAA\tReconCluster12Mb26 chunk2 {Fragment} 12..18\t10\n")
|
|
1299 expFileHandler.write("ReconCluster12Mb35\tAGCGTG\tReconCluster12Mb35 chunk3 {Fragment} 10..1\t6\n")
|
|
1300 expFileHandler.close()
|
|
1301
|
|
1302 obsFile = "obsFile.seq"
|
|
1303
|
|
1304 FastaUtils.convertFastaToSeq(inFile, obsFile)
|
|
1305
|
|
1306 self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile))
|
|
1307
|
|
1308 for f in [inFile, expFile, obsFile]:
|
|
1309 os.remove(f)
|
|
1310
|
|
1311 def test_spliceFromCoords( self ):
|
|
1312 coordFile = "dummyCoordFile"
|
|
1313 coordFileHandler = open( coordFile, "w" )
|
|
1314 coordFileHandler.write("TE1\tchr1\t2\t5\n")
|
|
1315 coordFileHandler.write("TE2\tchr1\t15\t11\n")
|
|
1316 coordFileHandler.write("TE3\tchr2\t1\t3\n")
|
|
1317 coordFileHandler.write("TE1\tchr2\t8\t10\n")
|
|
1318 coordFileHandler.write("TE4\tchr3\t3\t1\n")
|
|
1319 coordFileHandler.write("TE4\tchr3\t6\t4\n")
|
|
1320 coordFileHandler.close()
|
|
1321
|
|
1322 genomeFile = "dummyGenomeFile"
|
|
1323 genomeFileHandler = open( genomeFile, "w" )
|
|
1324 genomeFileHandler.write(">chr1\n")
|
|
1325 genomeFileHandler.write("AGGGGAAAAACCCCCAAAAA\n")
|
|
1326 genomeFileHandler.write(">chr2\n")
|
|
1327 genomeFileHandler.write("GGGAAAAGGG\n")
|
|
1328 genomeFileHandler.write(">chr3\n")
|
|
1329 genomeFileHandler.write("GGGGGGTTTT\n")
|
|
1330 genomeFileHandler.close()
|
|
1331
|
|
1332 expFile = "dummyExpFile"
|
|
1333 expFileHandler = open( expFile, "w" )
|
|
1334 expFileHandler.write(">chr1\n")
|
|
1335 expFileHandler.write("AAAAAAAAAAA\n")
|
|
1336 expFileHandler.write(">chr2\n")
|
|
1337 expFileHandler.write("AAAA\n")
|
|
1338 expFileHandler.write(">chr3\n")
|
|
1339 expFileHandler.write("TTTT\n")
|
|
1340 expFileHandler.close()
|
|
1341
|
|
1342 obsFile = "dummyObsFile"
|
|
1343
|
|
1344 FastaUtils.spliceFromCoords( genomeFile,
|
|
1345 coordFile,
|
|
1346 obsFile )
|
|
1347 self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )
|
|
1348 for f in [ coordFile, genomeFile, expFile, obsFile ]:
|
|
1349 os.remove( f )
|
|
1350
|
|
1351 def test_dbShuffle_inputFile( self ):
|
|
1352 inFile = "dummyInFile.fa"
|
|
1353 inFileHandler = open( inFile, "w" )
|
|
1354 inFileHandler.write(">seq1\n")
|
|
1355 inFileHandler.write("AGCGATCGACAGCGCATCGCGCATCGCATCGCTACGCATAC\n")
|
|
1356 inFileHandler.close()
|
|
1357
|
|
1358 obsFile = "dummyObsFile.fa"
|
|
1359 FastaUtils.dbShuffle( inFile, obsFile, 1 )
|
|
1360
|
|
1361 self.assertTrue( FastaUtils.dbSize( obsFile ) == 1 )
|
|
1362
|
|
1363 for f in [ inFile, obsFile ]:
|
|
1364 os.remove( f )
|
|
1365
|
|
1366 def test_dbShuffle_inputDir( self ):
|
|
1367 inDir = "dummyInDir"
|
|
1368 if os.path.exists( inDir ):
|
|
1369 shutil.rmtree( inDir )
|
|
1370 os.mkdir( inDir )
|
|
1371 inFile = "%s/dummyInFile.fa" % inDir
|
|
1372 inFileHandler = open( inFile, "w" )
|
|
1373 inFileHandler.write(">seq1\n")
|
|
1374 inFileHandler.write("AGCGATCGACAGCGCATCGCGCATCGCATCGCTACGCATAC\n")
|
|
1375 inFileHandler.close()
|
|
1376
|
|
1377 obsDir = "dummyObsDir"
|
|
1378 FastaUtils.dbShuffle( inDir, obsDir, 1 )
|
|
1379
|
|
1380 obsFile = "dummyInFile_shuffle.fa"
|
|
1381 self.assertTrue( len( glob.glob("%s/%s" % (obsDir,obsFile)) ) == 1 )
|
|
1382
|
|
1383 for d in [ inDir, obsDir ]:
|
|
1384 shutil.rmtree( d )
|
|
1385
|
|
1386 def test_convertClusterFileToFastaFile(self):
|
|
1387 inClusterFileName = "in.tab"
|
|
1388 with open(inClusterFileName, "w") as f:
|
|
1389 f.write("DTX-incomp_DmelChr4-B-R10-Map3_reversed\tDTX-incomp_DmelChr4-B-R9-Map3_reversed\tDTX-incomp_DmelChr4-B-G9-Map3\n")
|
|
1390 f.write("PotentialHostGene-chim_DmelChr4-B-R5-Map5\tPotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
|
|
1391 f.write("RLX-incomp_DmelChr4-B-G220-Map3\n")
|
|
1392 inFastaFileName = "in.fa"
|
|
1393 with open(inFastaFileName, "w") as f:
|
|
1394 f.write(">DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
|
|
1395 f.write("ATCGCATCGATCGATC\n")
|
|
1396 f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
|
|
1397 f.write("ATCGCATCGATCGATC\n")
|
|
1398 f.write(">RLX-incomp_DmelChr4-B-G220-Map3\n")
|
|
1399 f.write("ATCGCC\n")
|
|
1400 f.write(">PotentialHostGene-chim_DmelChr4-B-R5-Map5\n")
|
|
1401 f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
|
|
1402 f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
|
|
1403 f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
|
|
1404 f.write(">DTX-incomp_DmelChr4-B-G9-Map3\n")
|
|
1405 f.write("ATCGCATCGATCGATC\n")
|
|
1406 expFileName = "exp.fa"
|
|
1407 with open(expFileName, "w") as f:
|
|
1408 f.write(">BlastclustCluster1Mb1_DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
|
|
1409 f.write("ATCGCATCGATCGATC\n")
|
|
1410 f.write(">BlastclustCluster1Mb2_DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
|
|
1411 f.write("ATCGCATCGATCGATC\n")
|
|
1412 f.write(">BlastclustCluster3Mb1_RLX-incomp_DmelChr4-B-G220-Map3\n")
|
|
1413 f.write("ATCGCC\n")
|
|
1414 f.write(">BlastclustCluster2Mb1_PotentialHostGene-chim_DmelChr4-B-R5-Map5\n")
|
|
1415 f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
|
|
1416 f.write(">BlastclustCluster2Mb2_PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
|
|
1417 f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
|
|
1418 f.write(">BlastclustCluster1Mb3_DTX-incomp_DmelChr4-B-G9-Map3\n")
|
|
1419 f.write("ATCGCATCGATCGATC\n")
|
|
1420 obsFileName = "obs.fa"
|
|
1421
|
|
1422 FastaUtils.convertClusterFileToFastaFile(inClusterFileName, inFastaFileName, obsFileName, "Blastclust")
|
|
1423
|
|
1424 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
1425 os.remove(inClusterFileName)
|
|
1426 os.remove(inFastaFileName)
|
|
1427 os.remove(expFileName)
|
|
1428 os.remove(obsFileName)
|
|
1429
|
|
1430
|
|
1431 def test_convertClusterFileToFastaFile_withoutUnclusterizedSequences(self):
|
|
1432 inClusterFileName = "in.tab"
|
|
1433 with open(inClusterFileName, "w") as f:
|
|
1434 f.write("DTX-incomp_DmelChr4-B-R10-Map3_reversed\tDTX-incomp_DmelChr4-B-R9-Map3_reversed\tDTX-incomp_DmelChr4-B-G9-Map3\n")
|
|
1435 f.write("PotentialHostGene-chim_DmelChr4-B-R5-Map5\tPotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
|
|
1436 inFastaFileName = "in.fa"
|
|
1437 with open(inFastaFileName, "w") as f:
|
|
1438 f.write(">DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
|
|
1439 f.write("ATCGCATCGATCGATC\n")
|
|
1440 f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
|
|
1441 f.write("ATCGCATCGATCGATC\n")
|
|
1442 f.write(">RLX-incomp_DmelChr4-B-G220-Map3\n")
|
|
1443 f.write("ATCGCC\n")
|
|
1444 f.write(">PotentialHostGene-chim_DmelChr4-B-R5-Map5\n")
|
|
1445 f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
|
|
1446 f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
|
|
1447 f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
|
|
1448 f.write(">DTX-incomp_DmelChr4-B-G9-Map3\n")
|
|
1449 f.write("ATCGCATCGATCGATC\n")
|
|
1450 expFileName = "exp.fa"
|
|
1451 with open(expFileName, "w") as f:
|
|
1452 f.write(">BlastclustCluster1Mb1_DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
|
|
1453 f.write("ATCGCATCGATCGATC\n")
|
|
1454 f.write(">BlastclustCluster1Mb2_DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
|
|
1455 f.write("ATCGCATCGATCGATC\n")
|
|
1456 f.write(">BlastclustCluster3Mb1_RLX-incomp_DmelChr4-B-G220-Map3\n")
|
|
1457 f.write("ATCGCC\n")
|
|
1458 f.write(">BlastclustCluster2Mb1_PotentialHostGene-chim_DmelChr4-B-R5-Map5\n")
|
|
1459 f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
|
|
1460 f.write(">BlastclustCluster2Mb2_PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
|
|
1461 f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
|
|
1462 f.write(">BlastclustCluster1Mb3_DTX-incomp_DmelChr4-B-G9-Map3\n")
|
|
1463 f.write("ATCGCATCGATCGATC\n")
|
|
1464 obsFileName = "obs.fa"
|
|
1465
|
|
1466 FastaUtils.convertClusterFileToFastaFile(inClusterFileName, inFastaFileName, obsFileName, "Blastclust")
|
|
1467
|
|
1468 self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
|
|
1469 os.remove(inClusterFileName)
|
|
1470 os.remove(inFastaFileName)
|
|
1471 os.remove(expFileName)
|
|
1472 os.remove(obsFileName)
|
|
1473
|
|
1474 def test_convertClusterFileToMapFile(self):
|
|
1475 for clustAlgo in ["Blastclust", "MCL"]:
|
|
1476 inFileName = "dummy%sOut.fa" % clustAlgo
|
|
1477 inF = open(inFileName, "w")
|
|
1478 inF.write(">%sCluster1Mb1_chunk1 (dbseq-nr 1) [1,14]\n" % clustAlgo)
|
|
1479 inF.write("gaattgtttactta\n")
|
|
1480 inF.write(">%sCluster3Mb1_chunk5 (dbseq-nr 8) [1000,1014]\n" % clustAlgo)
|
|
1481 inF.write("gaattgtttactta\n")
|
|
1482 inF.write(">%sCluster1Mb2_chunk1 (dbseq-nr 1) [30,44]\n" % clustAlgo)
|
|
1483 inF.write("gaattgtttactta\n")
|
|
1484 inF.write(">%sCluster2Mb1_chunk2 (dbseq-nr 1) [100,114]\n" % clustAlgo)
|
|
1485 inF.write("gaattgtttactta")
|
|
1486 inF.close()
|
|
1487
|
|
1488 fileExp = "%sToMapExpected.map" % clustAlgo
|
|
1489 outF = open(fileExp, "w")
|
|
1490 outF.write("%sCluster1Mb1\tchunk1\t1\t14\n" % clustAlgo)
|
|
1491 outF.write("%sCluster3Mb1\tchunk5\t1000\t1014\n" % clustAlgo)
|
|
1492 outF.write("%sCluster1Mb2\tchunk1\t30\t44\n" % clustAlgo)
|
|
1493 outF.write("%sCluster2Mb1\tchunk2\t100\t114\n" % clustAlgo)
|
|
1494 outF.close()
|
|
1495
|
|
1496 fileObs = "%s.map" % os.path.splitext(inFileName)[0]
|
|
1497 FastaUtils.convertClusteredFastaFileToMapFile(inFileName, fileObs)
|
|
1498
|
|
1499 self.assertTrue(FileUtils.are2FilesIdentical(fileObs, fileExp))
|
|
1500
|
|
1501 os.remove(inFileName)
|
|
1502 os.remove(fileObs)
|
|
1503 os.remove(fileExp)
|
|
1504
|
|
1505 if __name__ == "__main__":
|
|
1506 unittest.main() |