Mercurial > repos > yufei-luo > s_mart
view smart_toolShed/commons/core/seq/test/Test_FastaUtils.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. from commons.core.seq.FastaUtils import FastaUtils from commons.core.seq.test.Utils_for_T_FastaUtils import Utils_for_T_FastaUtils from commons.core.utils.FileUtils import FileUtils import glob import os import shutil import unittest class Test_FastaUtils( unittest.TestCase ): def test_dbSize_for_empty_file(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName) obsNb = FastaUtils.dbSize( fileName ) expNb = 0 os.remove(fileName) self.assertEquals(expNb, obsNb) def test_dbSize_one_sequence(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName) obsNb = FastaUtils.dbSize( fileName ) expNb = 1 os.remove(fileName) self.assertEquals(expNb, obsNb) def test_dbSize_four_sequences(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName) obsNb = FastaUtils.dbSize( fileName ) expNb = 4 os.remove(fileName) self.assertEquals(expNb, obsNb) def test_dbChunks(self): inFileName = "dummyBigSeqFastaFile.fa" expChunksFileName = 'exp' + inFileName +'_chunks.fa' expChunksMapFileName = 'exp' + inFileName +'_chunks.map' expCutFileName = 'exp' + inFileName +'_cut' expNStretchFileName = 'exp' + inFileName +'.Nstretch.map' Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName) Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName) Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName) Utils_for_T_FastaUtils._createFastaFile_of_cut(expCutFileName) Utils_for_T_FastaUtils._createFastaFile_of_Nstretch(expNStretchFileName) FastaUtils.dbChunks(inFileName, '60', '10', '11', '', False, 0) obsChunksFileName = inFileName +'_chunks.fa' obsChunksMapFileName = inFileName +'_chunks.map' obsCutFileName = inFileName +'_cut' obsNStretchFileName = inFileName +'.Nstretch.map' self.assertTrue(FileUtils.are2FilesIdentical(expChunksFileName, obsChunksFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expChunksMapFileName, obsChunksMapFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expCutFileName, obsCutFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expNStretchFileName, obsNStretchFileName)) os.remove(inFileName) os.remove(expChunksFileName) os.remove(expChunksMapFileName) os.remove(expCutFileName) os.remove(expNStretchFileName) os.remove(obsChunksFileName) os.remove(obsChunksMapFileName) os.remove(obsCutFileName) os.remove(obsNStretchFileName) def test_dbChunks_with_clean_and_prefix(self): inFileName = "dummyBigSeqFastaFile.fa" expChunksFileName = 'exp' + inFileName +'_chunks.fa' expChunksMapFileName = 'exp' + inFileName +'_chunks.map' Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName) Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName) Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName) FastaUtils.dbChunks(inFileName, '60', '10', '11', 'outFile_chunks', True, 0) obsChunksFileName = "outFile_chunks.fa" obsChunksMapFileName = "outFile_chunks.map" self.assertTrue(FileUtils.are2FilesIdentical(expChunksFileName, obsChunksFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expChunksMapFileName, obsChunksMapFileName)) os.remove(inFileName) os.remove(expChunksFileName) os.remove(expChunksMapFileName) os.remove(obsChunksFileName) os.remove(obsChunksMapFileName) def test_dbCumLength_with_empty_file(self): inFileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_for_empty_file(inFileName) expCumulLength = 0 inFileHandler = open(inFileName, "r") obsCumulLength = FastaUtils.dbCumLength(inFileHandler) inFileHandler.close() os.remove(inFileName) self.assertEquals(expCumulLength, obsCumulLength) def test_dbCumLength_four_sequences(self): inFileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_four_sequences(inFileName) expCumulLength = 1168 inFileHandler = open(inFileName, "r") obsCumulLength = FastaUtils.dbCumLength(inFileHandler) inFileHandler.close() os.remove(inFileName) self.assertEquals(expCumulLength, obsCumulLength) def test_dbLengths( self ): inFileName = "dummyFastaFile.fa" inF = open( inFileName, "w" ) inF.write(">seq1\nATGACGT\n") inF.write(">seq2\nATGGCGAGACGT\n") inF.close() lExp = [ 7, 12 ] lObs = FastaUtils.dbLengths( inFileName ) self.assertEquals( lExp, lObs ) os.remove( inFileName ) def test_dbHeaders_with_empty_file(self): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_for_empty_file( inFile ) lExp = [] lObs = FastaUtils.dbHeaders( inFile ) self.assertEquals( lExp, lObs ) os.remove( inFile ) def test_dbHeaders_with_one_sequence_without_header(self): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_sequence_without_header( inFile ) lExp = [] lObs = FastaUtils.dbHeaders( inFile ) self.assertEquals( lExp, lObs ) os.remove( inFile ) def test_dbHeaders_four_sequences(self): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile ) lExp = [ "seq 1", "seq 2", "seq 3", "seq 4" ] lObs = FastaUtils.dbHeaders( inFile ) self.assertEquals( lExp, lObs ) os.remove( inFile ) def test_dbSplit_no_in_file( self ): inFileName = "dummyFastaFile.fa" isSysExitRaised = False try: FastaUtils.dbSplit( inFileName, 1, False ) except SystemExit: isSysExitRaised = True self.assertTrue( isSysExitRaised ) def test_dbSplit_emptyFile( self ): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_for_empty_file( inFile ) FastaUtils.dbSplit( inFile, 10, False, 1 ) self.assertTrue( not os.path.exists( "batch_1.fa" ) ) os.remove( inFile ) def test_dbSplit_oneSequence_tenSequencesPerBatch( self ): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_one_sequence( inFile ) expBatchFile = "dummyExpBatch_1.fa" Utils_for_T_FastaUtils._createFastaFile_one_sequence( expBatchFile ) FastaUtils.dbSplit( inFile, 10, False ) obsBatchFile = "batch_1.fa" self.assertTrue( FileUtils.are2FilesIdentical( expBatchFile, obsBatchFile ) ) for f in [ inFile, expBatchFile, obsBatchFile ]: os.remove( f ) def test_dbSplit_fourSequences_threeSequencesPerBatch( self ): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile ) expBatch1File = "dummyExpBatch_1.fa" expBatch2File = "dummyExpBatch_2.fa" Utils_for_T_FastaUtils._createBatch1_three_sequences( expBatch1File ) Utils_for_T_FastaUtils._createBatch2_one_sequence( expBatch2File ) FastaUtils.dbSplit( inFile, 3, False ) obsBatch1File = "batch_1.fa" obsBatch2File = "batch_2.fa" self.assertTrue( FileUtils.are2FilesIdentical( expBatch1File, obsBatch1File ) ) self.assertTrue( FileUtils.are2FilesIdentical( expBatch2File, obsBatch2File ) ) for f in [ inFile, expBatch1File, expBatch2File, obsBatch1File, obsBatch2File ]: os.remove( f ) def test_dbSplit_fourSequences_twoSequencesPerBatch_inBatchDirectory( self ): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile ) expBatch1File = "dummyExp_batch_1.fa" expBatch2File = "dummyExp_batch_2.fa" Utils_for_T_FastaUtils._createBatch1_two_sequences( expBatch1File ) Utils_for_T_FastaUtils._createBatch2_two_sequences( expBatch2File ) FastaUtils.dbSplit( inFile, 2, True, 1 ) obsBatch1File = "batches/batch_1.fa" obsBatch2File = "batches/batch_2.fa" self.assertTrue( FileUtils.are2FilesIdentical( expBatch1File, obsBatch1File ) ) self.assertTrue( FileUtils.are2FilesIdentical( expBatch2File, obsBatch2File ) ) for f in [ inFile, expBatch1File, expBatch2File, obsBatch1File, obsBatch2File ]: os.remove( f ) def test_dbSplit_tenSequences_oneSequencePerBatch_inBatchDirectory( self ): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_ten_sequences( inFile ) FastaUtils.dbSplit( inFile, 1, True ) nb = 1 for s in [ '01', '02', '03', '04', '05', '06', '07', '08', '09', '10' ]: expBatchFile = "exp_batch_%s.fa" % ( s ) Utils_for_T_FastaUtils._createBatch_one_small_sequence( expBatchFile, "seq " + str(nb) ) nb += 1 obsBatchFile = "batches/batch_%s.fa" % ( s ) self.assertTrue( FileUtils.are2FilesIdentical( expBatchFile, obsBatchFile ) ) os.remove( expBatchFile ) os.remove( obsBatchFile ) os.remove( inFile ) os.rmdir( "batches" ) def test_dbSplit_twoSequences_oneSequencePerBatch_useSeqHeader( self ): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils.createFastaFile_twoSequences( inFile ) lExpFileNames = [ "seq_1.fa", "seq_2.fa" ] lExpFiles = [ "dummyExp_seq_1.fa", "dummyExp_seq_2.fa" ] Utils_for_T_FastaUtils.createFastaFile_seq_1( lExpFiles[0] ) Utils_for_T_FastaUtils.createFastaFile_seq_2( lExpFiles[1] ) FastaUtils.dbSplit( inFile, 1, False, True ) lObsFiles = glob.glob( "seq*.fa" ) lObsFiles.sort() for i in range( 0, len(lExpFileNames) ): self.assertEqual( lExpFileNames[i], lObsFiles[i] ) self.assertTrue( FileUtils.are2FilesIdentical( lExpFiles[i], lObsFiles[i] ) ) for f in [ inFile ] + lExpFiles + lObsFiles: os.remove( f ) def test_dbSplit_twoSequences_otherPrefix( self ): inFile = "dummyFastaFile.fa" Utils_for_T_FastaUtils.createFastaFile_twoSequences( inFile ) lExpFileNames = [ "query_1.fa", "query_2.fa" ] lExpFiles = [ "dummyExp_seq_1.fa", "dummyExp_seq_2.fa" ] Utils_for_T_FastaUtils.createFastaFile_seq_1( lExpFiles[0] ) Utils_for_T_FastaUtils.createFastaFile_seq_2( lExpFiles[1] ) FastaUtils.dbSplit( inFile, 1, False, False, "query" ) lObsFiles = glob.glob( "query_*.fa" ) lObsFiles.sort() for i in range( 0, len(lExpFileNames) ): self.assertEqual( lExpFileNames[i], lObsFiles[i] ) self.assertTrue( FileUtils.are2FilesIdentical( lExpFiles[i], lObsFiles[i] ) ) for f in [ inFile ] + lExpFiles + lObsFiles: os.remove( f ) def test_splitFastaFileInBatches(self): inFileName = "dummyFastaFile.fa" with open(inFileName, "w") as f: f.write(">seq1\n") f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n") f.write(">seq2\n") f.write("ATCGCTAGCTAGCTCG\n") f.write(">seq3\n") f.write("GTTTGGATCGCT\n") f.write(">seq6\n") f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCCTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n") f.write(">seq5\n") f.write("TTGGATCGCTCTCTGCTCGGAAATCCCGTC\n") expBatch1 = "expBatch_1.fa" with open(expBatch1, "w") as f: f.write(">seq6\n") f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCCTCT\n") f.write("GTTTGGATCGCTCTCTGCTCGGAAATCC\n") expBatch2 = "expBatch_2.fa" with open(expBatch2, "w") as f: f.write(">seq1\n") f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n") expBatch3 = "expBatch_3.fa" with open(expBatch3, "w") as f: f.write(">seq5\n") f.write("TTGGATCGCTCTCTGCTCGGAAATCCCGTC\n") f.write(">seq2\n") f.write("ATCGCTAGCTAGCTCG\n") f.write(">seq3\n") f.write("GTTTGGATCGCT\n") FastaUtils.splitFastaFileInBatches(inFileName, 60) obsBatch1 = "batches/batch_1.fa" obsBatch2 = "batches/batch_2.fa" obsBatch3 = "batches/batch_3.fa" self.assertTrue(FileUtils.are2FilesIdentical(expBatch1, obsBatch1)) self.assertTrue(FileUtils.are2FilesIdentical(expBatch2, obsBatch2)) self.assertTrue(FileUtils.are2FilesIdentical(expBatch3, obsBatch3)) os.remove(inFileName) os.remove(expBatch1) os.remove(expBatch2) os.remove(expBatch3) shutil.rmtree("batches") def test_splitFastaFileInBatches_one_seq(self): inFileName = "dummyFastaFile.fa" with open(inFileName, "w") as f: f.write(">seq2\n") f.write("ATCGCTAGCTAGCTCG\n") expBatch1 = "expBatch_1.fa" with open(expBatch1, "w") as f: f.write(">seq2\n") f.write("ATCGCTAGCTAGCTCG\n") FastaUtils.splitFastaFileInBatches(inFileName, 60) obsBatch1 = "batches/batch_1.fa" self.assertTrue(FileUtils.are2FilesIdentical(expBatch1, obsBatch1)) os.remove(inFileName) os.remove(expBatch1) shutil.rmtree("batches") def test_splitSeqPerCluster_no_in_file(self): inFileName = "dummyFastaFile.fa" isSysExitRaised = False try: FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster") except SystemExit: isSysExitRaised = True self.assertTrue(isSysExitRaised) def test_splitSeqPerCluster_in_file_empty(self): inFileName = "dummyFastaFile.fa" with open(inFileName, 'w'): pass FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster") self.assertEquals(glob.glob("seqCluster*.fa"), []) os.remove(inFileName) def test_splitSeqPerCluster_four_sequences_without_dir(self): inFileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName) expFirstClusterFileName = "exp_seqCluster1.fa" Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expFirstClusterFileName) expSecondClusterFileName = "exp_seqCluster2.fa" Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expSecondClusterFileName) expThirdClusterFileName = "exp_seqCluster3.574.fa" Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expThirdClusterFileName) FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster") obsFirstClusterFileName = "seqCluster1.fa" obsSecondClusterFileName = "seqCluster2.fa" obsThirdClusterFileName = "seqCluster3.574.fa" os.remove(inFileName) self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName)) os.remove(expFirstClusterFileName) os.remove(expSecondClusterFileName) os.remove(expThirdClusterFileName) os.remove(obsFirstClusterFileName) os.remove(obsSecondClusterFileName) os.remove(obsThirdClusterFileName) def test_splitSeqPerCluster_four_sequences_without_dir_no_split(self): inFileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_in_same_cluster(inFileName) expClusterFileName = "exp_seqCluster.fa" Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_in_same_cluster(expClusterFileName) FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster") obsClusterFileName = "seqCluster1.fa" os.remove(inFileName) self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName)) os.remove(expClusterFileName) os.remove(obsClusterFileName) def test_splitSeqPerCluster_four_sequences_without_dir_shuffle(self): inFileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_shuffle(inFileName) expFirstClusterFileName = "exp_seqCluster1.fa" Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expFirstClusterFileName) expSecondClusterFileName = "exp_seqCluster2.fa" Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expSecondClusterFileName) expThirdClusterFileName = "exp_seqCluster3.574.fa" Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expThirdClusterFileName) FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster") obsFirstClusterFileName = "seqCluster1.fa" obsSecondClusterFileName = "seqCluster2.fa" obsThirdClusterFileName = "seqCluster3.574.fa" os.remove(inFileName) self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName)) os.remove(expFirstClusterFileName) os.remove(expSecondClusterFileName) os.remove(expThirdClusterFileName) os.remove(obsFirstClusterFileName) os.remove(obsSecondClusterFileName) os.remove(obsThirdClusterFileName) def test_splitSeqPerCluster_four_sequences_simplify_header(self): inFileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName) expFirstClusterFileName = "exp_seqCluster1.fa" Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result_with_simplify_header(expFirstClusterFileName) expSecondClusterFileName = "exp_seqCluster2.fa" Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result_with_simplify_header(expSecondClusterFileName) expThirdClusterFileName = "exp_seqCluster3.574.fa" Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result_with_simplify_header(expThirdClusterFileName) FastaUtils.splitSeqPerCluster( inFileName, "Piler", True, False, "seqCluster") obsFirstClusterFileName = "seqCluster1.fa" obsSecondClusterFileName = "seqCluster2.fa" obsThirdClusterFileName = "seqCluster3.574.fa" os.remove(inFileName) self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName)) self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName)) os.remove(expFirstClusterFileName) os.remove(expSecondClusterFileName) os.remove(expThirdClusterFileName) os.remove(obsFirstClusterFileName) os.remove(obsSecondClusterFileName) os.remove(obsThirdClusterFileName) def test_splitSeqPerCluster_four_sequences_with_dir(self): inFileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName) FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, True, "seqCluster") os.remove(inFileName) for i in ['1', '2', '3.574']: expClusterFileName = "exp_cluster" + i + ".fa" if i == '1': Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expClusterFileName) if i == '2': Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expClusterFileName) if i == '3.574': Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expClusterFileName) obsClusterFileName= inFileName + "_cluster_" + i + "/seqCluster" + i + ".fa" self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName)) os.remove(expClusterFileName) os.remove(obsClusterFileName) os.rmdir( inFileName + "_cluster_" + i ) def test_dbLengthFilter_with_one_sequence(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName) expFileNameInf = "exp_dummyFastaFile.fa.Inf12" Utils_for_T_FastaUtils._createFastaFile_for_empty_file(expFileNameInf) expFileNameSup = "exp_dummyFastaFile.fa.Sup12" Utils_for_T_FastaUtils._createFastaFile_one_sequence(expFileNameSup) FastaUtils.dbLengthFilter(12, fileName, verbose=0) obsFileNameInf = "dummyFastaFile.fa.Inf12" obsFileNameSup = "dummyFastaFile.fa.Sup12" self.assertTrue(FileUtils.are2FilesIdentical(expFileNameInf, obsFileNameInf)) self.assertTrue(FileUtils.are2FilesIdentical(expFileNameSup, obsFileNameSup)) os.remove(fileName) os.remove(expFileNameInf) os.remove(expFileNameSup) os.remove(obsFileNameInf) os.remove(obsFileNameSup) def test_dbLengthFilter_with_four_sequence(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName) expFileNameInf = "exp_dummyFastaFile.fa.Inf130" Utils_for_T_FastaUtils._createFastaFile_one_sequence(expFileNameInf) expFileNameSup = "exp_dummyFastaFile.fa.Sup130" Utils_for_T_FastaUtils._createResult_of_dbLengthFilter_sup(expFileNameSup) FastaUtils.dbLengthFilter(130, fileName, verbose=0) obsFileNameInf = "dummyFastaFile.fa.Inf130" obsFileNameSup = "dummyFastaFile.fa.Sup130" self.assertTrue(FileUtils.are2FilesIdentical(expFileNameInf, obsFileNameInf)) self.assertTrue(FileUtils.are2FilesIdentical(expFileNameSup, obsFileNameSup)) os.remove(fileName) os.remove(expFileNameInf) os.remove(expFileNameSup) os.remove(obsFileNameInf) os.remove(obsFileNameSup) def test_dbLongestSequences_with_empty_file(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName) expResult = 0 obsResult = FastaUtils.dbLongestSequences( 1, fileName ) self.assertEquals(expResult, obsResult) os.remove(fileName) def test_dbLongestSequences_with_one_longest_sequence(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName) expFileName = "exp_dummyFastaFile.fa.best1" f = open(expFileName, 'w') f.write(">seq 3\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCG\n") f.close() FastaUtils.dbLongestSequences( 1, fileName, outFileName="", verbose=0, minThresh=0 ) obsFileName = "dummyFastaFile.fa.best1" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbLongestSequences_with_two_longest_sequence(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName) expFileName = "exp_dummyFastaFile.fa.best1" f = open(expFileName, 'w') f.write(">seq 2\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCG\n") f.write(">seq 4\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCG\n") f.close() FastaUtils.dbLongestSequences( 2, fileName, outFileName="", verbose=0, minThresh=0 ) obsFileName = "dummyFastaFile.fa.best2" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbExtractSeqHeaders(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName) expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write("seq 1\n") f.write("seq 2\n") f.write("seq 4\n") f.close() FastaUtils.dbExtractSeqHeaders(fileName) obsFileName = "dummyFastaFile.fa.headers" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbExtractSeqHeaders_with_empty_file(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName) expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write("") f.close() FastaUtils.dbExtractSeqHeaders(fileName) obsFileName = "dummyFastaFile.fa.headers" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbExtractSeqHeaders_without_header(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_sequence_without_header(fileName) expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write("") f.close() FastaUtils.dbExtractSeqHeaders(fileName) obsFileName = "dummyFastaFile.fa.headers" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbExtractByPattern_without_pattern(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName) obsResult = FastaUtils.dbExtractByPattern( "", fileName) expResult = None self.assertEquals(expResult, obsResult) os.remove(fileName) def test_dbExtractByPattern(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName) expFileName = "exp_dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_three_sequences(expFileName) FastaUtils.dbExtractByPattern( 'seq', fileName) obsFileName = "dummyFastaFile.fa.extracted" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbExtractByPattern_with_2_as_pattern(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName) expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write(">seq 2\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCG\n") f.close() FastaUtils.dbExtractByPattern( ' 2', fileName) obsFileName = "dummyFastaFile.fa.extracted" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbExtractByPattern_with_sandie_as_pattern(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName) expFileName = "exp_dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_for_empty_file(expFileName) FastaUtils.dbExtractByPattern( 'sandie', fileName) obsFileName = "dummyFastaFile.fa.extracted" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbExtractByFilePattern_empty_pattern_filename(self): patternFileName = "" isSysExitRaised = False try: FastaUtils.dbExtractByFilePattern(patternFileName , None, "") except SystemExit: isSysExitRaised = True self.assertTrue(isSysExitRaised) def test_dbExtractByFilePattern(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName) patternFileName = "dummyPatternFile.txt" Utils_for_T_FastaUtils._createPatternFile(patternFileName) expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write(">seq 1\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 3\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 8\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 10\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.close() obsFileName = "dummyFastaFile.fa.extracted" FastaUtils.dbExtractByFilePattern( patternFileName, fileName, "") self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(patternFileName) os.remove(expFileName) os.remove(obsFileName) def test_dbCleanByPattern_without_pattern(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName) obsResult = FastaUtils.dbCleanByPattern( "", fileName) expResult = None self.assertEquals(expResult, obsResult) os.remove(fileName) def test_dbCleanByPattern(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName) expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write(">seq 1\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 3\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 4\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 5\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 6\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 7\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 8\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 9\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 10\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.close() obsFileName = "dummyFastaFile.fa.cleaned" FastaUtils.dbCleanByPattern( '2', fileName) self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbCleanByPattern_with_expectedFile_empty(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName) expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write("") f.close() obsFileName = "dummyFastaFile.fa.cleaned" FastaUtils.dbCleanByPattern( 'seq', fileName) self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(expFileName) os.remove(obsFileName) def test_dbCleanByFilePattern_empty_pattern_filename(self): patternFileName = "" isSysExitRaised = False try: FastaUtils.dbCleanByFilePattern(patternFileName , None, "") except SystemExit: isSysExitRaised = True self.assertTrue(isSysExitRaised) def test_dbCleanByFilePattern(self): fileName = "dummyFastaFile.fa" Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName) patternFileName = "dummyPatternFile.txt" Utils_for_T_FastaUtils._createPatternFile(patternFileName) expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write(">seq 2\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 4\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 5\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 6\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 7\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq 9\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.close() obsFileName = "dummyFastaFile.fa.cleaned" FastaUtils.dbCleanByFilePattern( patternFileName, fileName, "") self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(patternFileName) os.remove(expFileName) os.remove(obsFileName) def test_dbORF_without_ORF(self): fileName = "dummy.fa" with open(fileName, "w") as f: f.write(">dummy\n") f.write("GGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTT\n") expFileName = "exp.ORF.map" with open(expFileName, "w") as f: f.write("") obsFileName = "%s.ORF.map" % fileName FastaUtils.dbORF(fileName, 0, 0) self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(obsFileName) os.remove(expFileName) def test_dbORF_with_one_ORF(self): fileName = "dummyFastaFile.fa" f = open(fileName, 'w') f.write(">seq1\n") f.write("GAAAATATGGGGTAGATAAGGGATCTGGGTTAATTTTTT\n") f.close() expFileName = "exp_dummyORFFile.ORF.map" f = open(expFileName, 'w') f.write("ORF|1|17\tseq1\t16\t33\n") f.close() FastaUtils.dbORF(fileName, 0, 0) obsFileName = fileName + ".ORF.map" self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(obsFileName) os.remove(expFileName) def test_dbORF_with_real_ORF(self): fileName = "dummy.fa" with open(fileName, "w") as f: f.write(">DmelChr4_Blaster_Recon_13_Map_4\n") f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n") f.write("TTGCGGATCATTTTGTTTGAACAACCGACAATGCGACCAATTTCAGCGTAGGTTTTACCT\n") f.write("TCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAATGCTTTCCGCGACCC\n") f.write("ATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAAAACCTTTAATACAAC\n") f.write("TCCTTTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACTCCTATTAATTTTATTCA\n") f.write("GCAAATACGTGTTCAGTGCTATTTTTGTTACCGCCTCATTTCGCGCACTTTTGCAGCAAG\n") f.write("TGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAATTTCTTGCTCAGAGAGCC\n") f.write("AACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAATATAAACATTTAATAATT\n") f.write("TTTTTTAGGAAATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATA\n") f.write(">DmelChr4_Blaster_Piler_3.5_Map_7\n") f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n") f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n") f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n") f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") f.write(">DmelChr4_Blaster_Grouper_10_Map_13\n") f.write("GCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGCACTATGCAGC\n") f.write("CACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTGAGAGCGTAAG\n") f.write("AAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGAGAACGCGTAT\n") f.write("AAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCTGATCGAAGAA\n") f.write("ACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAATATGATAAAA\n") f.write("TAAAAAAATTTTTAAAAATTCGCGCCCTGACTATTATAATTTTAAAGCTTTTTAAAATTT\n") f.write("GTTTGTTAAAATCGCCGCTCGAATTAGCTACCGTTTACACATTTATATTTATGTTTAATT\n") f.write("CTAATTTGTCTCTCATCTGACAATTTTTTAAGAAAGCGAAATATTTTTTTTTTGAAACAC\n") f.write("TTTTAATGTTAATGTTACATCATATTAAGTCAAATGATTTAATAAATATACTAAATAATT\n") f.write("AAATATGATAACTGTTTATTGCAAAAGTAATATCAAAGACACTAGAATTATTCTAGTGTC\n") f.write("TTTGCTTTGTTCATATCTTGAGGCACGAAGTGCGGACACAAGCACTCAACAATCATTGCC\n") f.write("TTATTAATTTTTCACACGCCGCAAGATGAATACTCTAATGACAAATATTCTTATATAAAG\n") f.write("TCATTTTTGAAATTTATTTTTGTGATAATATGTACATAGATTTGGCTATTTCTAATCTAT\n") f.write("TTTCAAATAATAATAACGTTAAGGCAATGCAAAACAAGAATTTTTTTAGTCGCATGGTGC\n") f.write("CAATTGATCAAAAATAATATAGATTTAAAGTCTAAGAACTTCTAAGGTGAAGGGCATATT\n") f.write("TTGTCAAATTTACAATGCATGAGCGAGCATACGTGTGCACACATACAGTTGTCTGCTATC\n") f.write("ACTTTGTGCGTTGAAAA\n") expFileName = "exp.ORF.map" with open(expFileName, "w") as f: f.write("ORF|3|263\tDmelChr4_Blaster_Recon_13_Map_4\t189\t452\n") f.write("ORF|2|206\tDmelChr4_Blaster_Recon_13_Map_4\t185\t391\n") f.write("ORF|-3|164\tDmelChr4_Blaster_Recon_13_Map_4\t382\t218\n") f.write("ORF|-1|161\tDmelChr4_Blaster_Recon_13_Map_4\t297\t136\n") f.write("ORF|1|113\tDmelChr4_Blaster_Recon_13_Map_4\t400\t513\n") f.write("ORF|1|113\tDmelChr4_Blaster_Recon_13_Map_4\t112\t225\n") f.write("ORF|3|107\tDmelChr4_Blaster_Recon_13_Map_4\t81\t188\n") f.write("ORF|1|107\tDmelChr4_Blaster_Recon_13_Map_4\t292\t399\n") f.write("ORF|-1|104\tDmelChr4_Blaster_Recon_13_Map_4\t432\t328\n") f.write("ORF|-2|104\tDmelChr4_Blaster_Recon_13_Map_4\t515\t411\n") f.write("ORF|3|116\tDmelChr4_Blaster_Piler_3.5_Map_7\t393\t509\n") f.write("ORF|-3|116\tDmelChr4_Blaster_Piler_3.5_Map_7\t505\t389\n") f.write("ORF|-2|86\tDmelChr4_Blaster_Piler_3.5_Map_7\t518\t432\n") f.write("ORF|1|80\tDmelChr4_Blaster_Piler_3.5_Map_7\t436\t516\n") f.write("ORF|-3|170\tDmelChr4_Blaster_Grouper_10_Map_13\t222\t52\n") f.write("ORF|-1|161\tDmelChr4_Blaster_Grouper_10_Map_13\t260\t99\n") f.write("ORF|3|155\tDmelChr4_Blaster_Grouper_10_Map_13\t702\t857\n") f.write("ORF|3|152\tDmelChr4_Blaster_Grouper_10_Map_13\t288\t440\n") f.write("ORF|1|137\tDmelChr4_Blaster_Grouper_10_Map_13\t622\t759\n") f.write("ORF|2|128\tDmelChr4_Blaster_Grouper_10_Map_13\t539\t667\n") f.write("ORF|1|125\tDmelChr4_Blaster_Grouper_10_Map_13\t760\t885\n") f.write("ORF|2|122\tDmelChr4_Blaster_Grouper_10_Map_13\t14\t136\n") f.write("ORF|-2|113\tDmelChr4_Blaster_Grouper_10_Map_13\t847\t734\n") f.write("ORF|1|110\tDmelChr4_Blaster_Grouper_10_Map_13\t154\t264\n") obsFileName = "%s.ORF.map" % fileName FastaUtils.dbORF(fileName, 10, 30) self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(fileName) os.remove(obsFileName) os.remove(expFileName) def test_sortSequencesByIncreasingLength(self): fileName = "dummyFastaFile.fa" f = open(fileName, 'w') f.write(">seq1_length_60\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq2_length_120\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq3_length_32\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n") f.close() expFileName = "exp_dummyFastaFile.fa" f = open(expFileName, 'w') f.write(">seq3_length_32\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n") f.write(">seq1_length_60\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq2_length_120\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.close() obsFileName = "obs_dummyFastaFile.fa" FastaUtils.sortSequencesByIncreasingLength(fileName, obsFileName, 0) self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(expFileName) os.remove(obsFileName) def test_sortSequencesByIncreasingLength_in_file_do_not_exists(self): fileName = "dummyFile.fa" isSysExitRaised = False try: FastaUtils.sortSequencesByIncreasingLength(fileName, "", 0) except SystemExit: isSysExitRaised = True self.assertTrue(isSysExitRaised) def test_sortSequencesByHeader(self): fileName = "dummyFastaFile.fa" f = open(fileName, "w") f.write(">seq1::test-test\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq3\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n") f.write(">seq2\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.close() expFileName = "expFastaFile.fa" f = open(expFileName, "w") f.write(">seq1::test-test\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq2\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq3\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n") f.close() obsFileName = "obsFastaFile.fa" FastaUtils.sortSequencesByHeader(fileName, obsFileName) self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(expFileName) os.remove(obsFileName) def test_sortSequencesByHeader_no_outFileName(self): fileName = "dummyFastaFile.fa" f = open(fileName, "w") f.write(">seq12\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq1\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n") f.write(">seq2\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.close() expFileName = "expFastaFile.fa" f = open(expFileName, "w") f.write(">seq1\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n") f.write(">seq12\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write(">seq2\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n") f.close() obsFileName = "dummyFastaFile_sortByHeaders.fa" FastaUtils.sortSequencesByHeader(fileName) self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(expFileName) os.remove(obsFileName) def test_getLengthPerHeader( self ): inFile = "dummyFile.fa" inFileHandler = open( inFile, "w" ) inFileHandler.write(">seq1\nAGCGATGCGT\n") inFileHandler.write(">seq2\nAGCGATG\n") inFileHandler.write(">seq3\nAGCGATGGTGCGTGC\n") inFileHandler.write("AGCGATGGTGCGTGC\n") inFileHandler.close() dExp = { "seq1": 10, "seq2": 7, "seq3": 30 } dObs = FastaUtils.getLengthPerHeader( inFile, 0 ) self.assertEquals( dExp, dObs ) os.remove( inFile ) def test_convertFastaHeadersFromChkToChr_grouper(self): inFile = "dummyFastaFile.fa" with open(inFile, "w") as f: f.write(">MbQ1Gr1Cl0 chunk6 {Fragment} 95523..96053\n") f.write("AGCGTGCA\n") f.write(">MbQ77Gr8Cl0 chunk7 {Fragment} 123657..122568,121935..121446\n") f.write("AGCATGC\n") f.write(">MbS78Gr8Cl0 chunk7 {Fragment} 140078..139519,139470..138985,138651..138183\n") f.write("CGTGCG\n") f.write(">MbQ79Gr8Cl0 chunk7 {Fragment} 48021..48587,48669..49153,57346..57834\n") f.write("AGCGTGC\n") mapFile = "dummyMapFile.map" with open(mapFile, "w") as f: f.write("chunk5\tdmel_chr4\t760001\t960000\n") f.write("chunk6\tdmel_chr4\t950001\t1150000\n") f.write("chunk7\tdmel_chr4\t1140001\t1281640\n") expFile = "expFile.fa" with open(expFile, "w") as f: f.write(">MbQ1Gr1Cl0 dmel_chr4 {Fragment} 1045523..1046053\n") f.write("AGCGTGCA\n") f.write(">MbQ77Gr8Cl0 dmel_chr4 {Fragment} 1263657..1262568,1261935..1261446\n") f.write("AGCATGC\n") f.write(">MbS78Gr8Cl0 dmel_chr4 {Fragment} 1280078..1279519,1279470..1278985,1278651..1278183\n") f.write("CGTGCG\n") f.write(">MbQ79Gr8Cl0 dmel_chr4 {Fragment} 1188021..1188587,1188669..1189153,1197346..1197834\n") f.write("AGCGTGC\n") obsFile = "obsFile.fa" FastaUtils.convertFastaHeadersFromChkToChr(inFile, mapFile, obsFile) self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile)) for file in [inFile, mapFile, expFile, obsFile]: os.remove(file) def test_convertFastaHeadersFromChkToChr_blastclust(self): inFile = "dummyFastaFile.fa" with open(inFile, "w") as f: f.write(">BlastclustCluster12Mb63_chunk1 (dbseq-nr 0) [1,10]\n") f.write("AGCGTGCA\n") f.write(">BlastclustCluster12Mb53_chunk2 (dbseq-nr 2) [1,10]\n") f.write("AGCATGC\n") f.write(">BlastclustCluster12Mb26_chunk2 (dbseq-nr 2) [12,18]\n") f.write("CGTGCG\n") f.write(">BlastclustCluster12Mb35_chunk3 (dbseq-nr 0) [10,1]\n") f.write("AGCGTGC\n") mapFile = "dummyMapFile.map" with open(mapFile, "w") as f: f.write("chunk1\tchromosome1\t1\t20\n") f.write("chunk2\tchromosome1\t16\t35\n") f.write("chunk3\tchromosome2\t1\t20\n") expFile = "expFile.fa" with open(expFile, "w") as f: f.write(">BlastclustCluster12Mb63 chromosome1 (dbseq-nr 0) 1..10\n") f.write("AGCGTGCA\n") f.write(">BlastclustCluster12Mb53 chromosome1 (dbseq-nr 2) 16..25\n") f.write("AGCATGC\n") f.write(">BlastclustCluster12Mb26 chromosome1 (dbseq-nr 2) 27..33\n") f.write("CGTGCG\n") f.write(">BlastclustCluster12Mb35 chromosome2 (dbseq-nr 0) 10..1\n") f.write("AGCGTGC\n") obsFile = "obsFile.fa" FastaUtils.convertFastaHeadersFromChkToChr(inFile, mapFile, obsFile) self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile)) for file in [inFile, mapFile, expFile, obsFile]: os.remove(file) def test_convertFastaToLength( self ): inFile = "dummyFastaFile.fa" inFileHandler = open(inFile, "w") inFileHandler.write(">ReconCluster12Mb63 chunk1 {Fragment} 1..10\n") inFileHandler.write("AGCGTGCA\n") inFileHandler.write(">ReconCluster12Mb53 chunk2 {Fragment} 1..10\n") inFileHandler.write("AGCATGCAA\n") inFileHandler.write(">ReconCluster12Mb26 chunk2 {Fragment} 12..18\n") inFileHandler.write("CGTGCGAAAA\n") inFileHandler.write(">ReconCluster12Mb35 chunk3 {Fragment} 10..1\n") inFileHandler.write("AGCGTG\n") inFileHandler.close() expFile = "expFile.length" expFileHandler = open(expFile, "w") expFileHandler.write("ReconCluster12Mb63\t8\n") expFileHandler.write("ReconCluster12Mb53\t9\n") expFileHandler.write("ReconCluster12Mb26\t10\n") expFileHandler.write("ReconCluster12Mb35\t6\n") expFileHandler.close() obsFile = "obsFile.length" FastaUtils.convertFastaToLength(inFile, obsFile) self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile)) for f in [inFile, expFile, obsFile]: os.remove(f) def test_convertFastaToSeq( self ): inFile = "dummyFastaFile.fa" inFileHandler = open(inFile, "w") inFileHandler.write(">ReconCluster12Mb63 chunk1 {Fragment} 1..10\n") inFileHandler.write("AGCGTGCA\n") inFileHandler.write(">ReconCluster12Mb53 chunk2 {Fragment} 1..10\n") inFileHandler.write("AGCATGCAA\n") inFileHandler.write(">ReconCluster12Mb26 chunk2 {Fragment} 12..18\n") inFileHandler.write("CGTGCGAAAA\n") inFileHandler.write(">ReconCluster12Mb35 chunk3 {Fragment} 10..1\n") inFileHandler.write("AGCGTG\n") inFileHandler.close() expFile = "expFile.seq" expFileHandler = open(expFile, "w") expFileHandler.write("ReconCluster12Mb63\tAGCGTGCA\tReconCluster12Mb63 chunk1 {Fragment} 1..10\t8\n") expFileHandler.write("ReconCluster12Mb53\tAGCATGCAA\tReconCluster12Mb53 chunk2 {Fragment} 1..10\t9\n") expFileHandler.write("ReconCluster12Mb26\tCGTGCGAAAA\tReconCluster12Mb26 chunk2 {Fragment} 12..18\t10\n") expFileHandler.write("ReconCluster12Mb35\tAGCGTG\tReconCluster12Mb35 chunk3 {Fragment} 10..1\t6\n") expFileHandler.close() obsFile = "obsFile.seq" FastaUtils.convertFastaToSeq(inFile, obsFile) self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile)) for f in [inFile, expFile, obsFile]: os.remove(f) def test_spliceFromCoords( self ): coordFile = "dummyCoordFile" coordFileHandler = open( coordFile, "w" ) coordFileHandler.write("TE1\tchr1\t2\t5\n") coordFileHandler.write("TE2\tchr1\t15\t11\n") coordFileHandler.write("TE3\tchr2\t1\t3\n") coordFileHandler.write("TE1\tchr2\t8\t10\n") coordFileHandler.write("TE4\tchr3\t3\t1\n") coordFileHandler.write("TE4\tchr3\t6\t4\n") coordFileHandler.close() genomeFile = "dummyGenomeFile" genomeFileHandler = open( genomeFile, "w" ) genomeFileHandler.write(">chr1\n") genomeFileHandler.write("AGGGGAAAAACCCCCAAAAA\n") genomeFileHandler.write(">chr2\n") genomeFileHandler.write("GGGAAAAGGG\n") genomeFileHandler.write(">chr3\n") genomeFileHandler.write("GGGGGGTTTT\n") genomeFileHandler.close() expFile = "dummyExpFile" expFileHandler = open( expFile, "w" ) expFileHandler.write(">chr1\n") expFileHandler.write("AAAAAAAAAAA\n") expFileHandler.write(">chr2\n") expFileHandler.write("AAAA\n") expFileHandler.write(">chr3\n") expFileHandler.write("TTTT\n") expFileHandler.close() obsFile = "dummyObsFile" FastaUtils.spliceFromCoords( genomeFile, coordFile, obsFile ) self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) for f in [ coordFile, genomeFile, expFile, obsFile ]: os.remove( f ) def test_dbShuffle_inputFile( self ): inFile = "dummyInFile.fa" inFileHandler = open( inFile, "w" ) inFileHandler.write(">seq1\n") inFileHandler.write("AGCGATCGACAGCGCATCGCGCATCGCATCGCTACGCATAC\n") inFileHandler.close() obsFile = "dummyObsFile.fa" FastaUtils.dbShuffle( inFile, obsFile, 1 ) self.assertTrue( FastaUtils.dbSize( obsFile ) == 1 ) for f in [ inFile, obsFile ]: os.remove( f ) def test_dbShuffle_inputDir( self ): inDir = "dummyInDir" if os.path.exists( inDir ): shutil.rmtree( inDir ) os.mkdir( inDir ) inFile = "%s/dummyInFile.fa" % inDir inFileHandler = open( inFile, "w" ) inFileHandler.write(">seq1\n") inFileHandler.write("AGCGATCGACAGCGCATCGCGCATCGCATCGCTACGCATAC\n") inFileHandler.close() obsDir = "dummyObsDir" FastaUtils.dbShuffle( inDir, obsDir, 1 ) obsFile = "dummyInFile_shuffle.fa" self.assertTrue( len( glob.glob("%s/%s" % (obsDir,obsFile)) ) == 1 ) for d in [ inDir, obsDir ]: shutil.rmtree( d ) def test_convertClusterFileToFastaFile(self): inClusterFileName = "in.tab" with open(inClusterFileName, "w") as f: f.write("DTX-incomp_DmelChr4-B-R10-Map3_reversed\tDTX-incomp_DmelChr4-B-R9-Map3_reversed\tDTX-incomp_DmelChr4-B-G9-Map3\n") f.write("PotentialHostGene-chim_DmelChr4-B-R5-Map5\tPotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n") f.write("RLX-incomp_DmelChr4-B-G220-Map3\n") inFastaFileName = "in.fa" with open(inFastaFileName, "w") as f: f.write(">DTX-incomp_DmelChr4-B-R10-Map3_reversed\n") f.write("ATCGCATCGATCGATC\n") f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\n") f.write("ATCGCATCGATCGATC\n") f.write(">RLX-incomp_DmelChr4-B-G220-Map3\n") f.write("ATCGCC\n") f.write(">PotentialHostGene-chim_DmelChr4-B-R5-Map5\n") f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n") f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n") f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n") f.write(">DTX-incomp_DmelChr4-B-G9-Map3\n") f.write("ATCGCATCGATCGATC\n") expFileName = "exp.fa" with open(expFileName, "w") as f: f.write(">BlastclustCluster1Mb1_DTX-incomp_DmelChr4-B-R10-Map3_reversed\n") f.write("ATCGCATCGATCGATC\n") f.write(">BlastclustCluster1Mb2_DTX-incomp_DmelChr4-B-R9-Map3_reversed\n") f.write("ATCGCATCGATCGATC\n") f.write(">BlastclustCluster3Mb1_RLX-incomp_DmelChr4-B-G220-Map3\n") f.write("ATCGCC\n") f.write(">BlastclustCluster2Mb1_PotentialHostGene-chim_DmelChr4-B-R5-Map5\n") f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n") f.write(">BlastclustCluster2Mb2_PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n") f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n") f.write(">BlastclustCluster1Mb3_DTX-incomp_DmelChr4-B-G9-Map3\n") f.write("ATCGCATCGATCGATC\n") obsFileName = "obs.fa" FastaUtils.convertClusterFileToFastaFile(inClusterFileName, inFastaFileName, obsFileName, "Blastclust") self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(inClusterFileName) os.remove(inFastaFileName) os.remove(expFileName) os.remove(obsFileName) def test_convertClusterFileToFastaFile_withoutUnclusterizedSequences(self): inClusterFileName = "in.tab" with open(inClusterFileName, "w") as f: f.write("DTX-incomp_DmelChr4-B-R10-Map3_reversed\tDTX-incomp_DmelChr4-B-R9-Map3_reversed\tDTX-incomp_DmelChr4-B-G9-Map3\n") f.write("PotentialHostGene-chim_DmelChr4-B-R5-Map5\tPotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n") inFastaFileName = "in.fa" with open(inFastaFileName, "w") as f: f.write(">DTX-incomp_DmelChr4-B-R10-Map3_reversed\n") f.write("ATCGCATCGATCGATC\n") f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\n") f.write("ATCGCATCGATCGATC\n") f.write(">RLX-incomp_DmelChr4-B-G220-Map3\n") f.write("ATCGCC\n") f.write(">PotentialHostGene-chim_DmelChr4-B-R5-Map5\n") f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n") f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n") f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n") f.write(">DTX-incomp_DmelChr4-B-G9-Map3\n") f.write("ATCGCATCGATCGATC\n") expFileName = "exp.fa" with open(expFileName, "w") as f: f.write(">BlastclustCluster1Mb1_DTX-incomp_DmelChr4-B-R10-Map3_reversed\n") f.write("ATCGCATCGATCGATC\n") f.write(">BlastclustCluster1Mb2_DTX-incomp_DmelChr4-B-R9-Map3_reversed\n") f.write("ATCGCATCGATCGATC\n") f.write(">BlastclustCluster3Mb1_RLX-incomp_DmelChr4-B-G220-Map3\n") f.write("ATCGCC\n") f.write(">BlastclustCluster2Mb1_PotentialHostGene-chim_DmelChr4-B-R5-Map5\n") f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n") f.write(">BlastclustCluster2Mb2_PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n") f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n") f.write(">BlastclustCluster1Mb3_DTX-incomp_DmelChr4-B-G9-Map3\n") f.write("ATCGCATCGATCGATC\n") obsFileName = "obs.fa" FastaUtils.convertClusterFileToFastaFile(inClusterFileName, inFastaFileName, obsFileName, "Blastclust") self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName)) os.remove(inClusterFileName) os.remove(inFastaFileName) os.remove(expFileName) os.remove(obsFileName) def test_convertClusterFileToMapFile(self): for clustAlgo in ["Blastclust", "MCL"]: inFileName = "dummy%sOut.fa" % clustAlgo inF = open(inFileName, "w") inF.write(">%sCluster1Mb1_chunk1 (dbseq-nr 1) [1,14]\n" % clustAlgo) inF.write("gaattgtttactta\n") inF.write(">%sCluster3Mb1_chunk5 (dbseq-nr 8) [1000,1014]\n" % clustAlgo) inF.write("gaattgtttactta\n") inF.write(">%sCluster1Mb2_chunk1 (dbseq-nr 1) [30,44]\n" % clustAlgo) inF.write("gaattgtttactta\n") inF.write(">%sCluster2Mb1_chunk2 (dbseq-nr 1) [100,114]\n" % clustAlgo) inF.write("gaattgtttactta") inF.close() fileExp = "%sToMapExpected.map" % clustAlgo outF = open(fileExp, "w") outF.write("%sCluster1Mb1\tchunk1\t1\t14\n" % clustAlgo) outF.write("%sCluster3Mb1\tchunk5\t1000\t1014\n" % clustAlgo) outF.write("%sCluster1Mb2\tchunk1\t30\t44\n" % clustAlgo) outF.write("%sCluster2Mb1\tchunk2\t100\t114\n" % clustAlgo) outF.close() fileObs = "%s.map" % os.path.splitext(inFileName)[0] FastaUtils.convertClusteredFastaFileToMapFile(inFileName, fileObs) self.assertTrue(FileUtils.are2FilesIdentical(fileObs, fileExp)) os.remove(inFileName) os.remove(fileObs) os.remove(fileExp) if __name__ == "__main__": unittest.main()