view commons/core/seq/test/Test_FastaUtils.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children 94ab73e8a190
line wrap: on
line source

# Copyright INRA (Institut National de la Recherche Agronomique)
# http://www.inra.fr
# http://urgi.versailles.inra.fr
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software.  You can  use, 
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info". 
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability. 
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or 
# data to be ensured and,  more generally, to use and operate it in the 
# same conditions as regards security. 
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.


from commons.core.seq.FastaUtils import FastaUtils
from commons.core.seq.test.Utils_for_T_FastaUtils import Utils_for_T_FastaUtils
from commons.core.utils.FileUtils import FileUtils
import glob
import os
import shutil
import unittest


class Test_FastaUtils( unittest.TestCase ):
    
        
    def test_dbSize_for_empty_file(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)
        
        obsNb = FastaUtils.dbSize( fileName )
        
        expNb = 0
        os.remove(fileName)
        self.assertEquals(expNb, obsNb)
        
        
    def test_dbSize_one_sequence(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName)
        
        obsNb = FastaUtils.dbSize( fileName )
        
        expNb = 1
        os.remove(fileName)
        self.assertEquals(expNb, obsNb)
        
        
    def test_dbSize_four_sequences(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)
        
        obsNb = FastaUtils.dbSize( fileName )
        
        expNb = 4
        os.remove(fileName)
        self.assertEquals(expNb, obsNb)
        
        
    def test_dbChunks(self):
        inFileName = "dummyBigSeqFastaFile.fa"
        expChunksFileName = 'exp' + inFileName +'_chunks.fa'
        expChunksMapFileName = 'exp' + inFileName +'_chunks.map'
        expCutFileName = 'exp' + inFileName +'_cut'
        expNStretchFileName = 'exp' + inFileName +'.Nstretch.map'
        Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName)
        Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName)
        Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName)
        Utils_for_T_FastaUtils._createFastaFile_of_cut(expCutFileName)
        Utils_for_T_FastaUtils._createFastaFile_of_Nstretch(expNStretchFileName)
        
        FastaUtils.dbChunks(inFileName, '60', '10', '11', '', False, 0)
        
        obsChunksFileName = inFileName +'_chunks.fa'
        obsChunksMapFileName = inFileName +'_chunks.map'
        obsCutFileName = inFileName +'_cut'
        obsNStretchFileName = inFileName +'.Nstretch.map'
        
        self.assertTrue(FileUtils.are2FilesIdentical(expChunksFileName, obsChunksFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expChunksMapFileName, obsChunksMapFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expCutFileName, obsCutFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expNStretchFileName, obsNStretchFileName))
        
        os.remove(inFileName)
        os.remove(expChunksFileName)
        os.remove(expChunksMapFileName)
        os.remove(expCutFileName)
        os.remove(expNStretchFileName)
        os.remove(obsChunksFileName)
        os.remove(obsChunksMapFileName)
        os.remove(obsCutFileName)
        os.remove(obsNStretchFileName)
        
        
    def test_dbChunks_with_clean_and_prefix(self):
        inFileName = "dummyBigSeqFastaFile.fa"
        expChunksFileName = 'exp' + inFileName +'_chunks.fa'
        expChunksMapFileName = 'exp' + inFileName +'_chunks.map'
        Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName)
        Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName)
        Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName)

        FastaUtils.dbChunks(inFileName, '60', '10', '11', 'outFile_chunks', True, 0)
        
        obsChunksFileName = "outFile_chunks.fa"
        obsChunksMapFileName = "outFile_chunks.map"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expChunksFileName, obsChunksFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expChunksMapFileName, obsChunksMapFileName))
        
        os.remove(inFileName)
        os.remove(expChunksFileName)
        os.remove(expChunksMapFileName)
        os.remove(obsChunksFileName)
        os.remove(obsChunksMapFileName)
        
        
    def test_dbCumLength_with_empty_file(self):
        inFileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_for_empty_file(inFileName)
        
        expCumulLength = 0
        
        inFileHandler = open(inFileName, "r")
        obsCumulLength = FastaUtils.dbCumLength(inFileHandler)
        inFileHandler.close()
        os.remove(inFileName)
        
        self.assertEquals(expCumulLength, obsCumulLength)
        
    def test_dbCumLength_four_sequences(self):
        inFileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_four_sequences(inFileName)
        
        expCumulLength = 1168
        
        inFileHandler = open(inFileName, "r")
        obsCumulLength = FastaUtils.dbCumLength(inFileHandler)
        inFileHandler.close()
        os.remove(inFileName)
        
        self.assertEquals(expCumulLength, obsCumulLength)
        
        
    def test_dbLengths( self ):
        inFileName = "dummyFastaFile.fa"
        inF = open( inFileName, "w" )
        inF.write(">seq1\nATGACGT\n")
        inF.write(">seq2\nATGGCGAGACGT\n")
        inF.close()
        lExp = [ 7, 12 ]
        lObs = FastaUtils.dbLengths( inFileName )
        self.assertEquals( lExp, lObs )
        os.remove( inFileName )
        
        
    def test_dbHeaders_with_empty_file(self):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_for_empty_file( inFile )
        lExp = []
        lObs = FastaUtils.dbHeaders( inFile )
        self.assertEquals( lExp, lObs )
        os.remove( inFile )
        
        
    def test_dbHeaders_with_one_sequence_without_header(self):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_sequence_without_header( inFile )
        lExp = []
        lObs = FastaUtils.dbHeaders( inFile )
        self.assertEquals( lExp, lObs )
        os.remove( inFile )
        
        
    def test_dbHeaders_four_sequences(self):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile )
        lExp = [ "seq 1", "seq 2", "seq 3", "seq 4" ]
        lObs = FastaUtils.dbHeaders( inFile )
        self.assertEquals( lExp, lObs )
        os.remove( inFile )
        
        
    def test_dbSplit_no_in_file( self ):
        inFileName = "dummyFastaFile.fa"
        isSysExitRaised = False
        try:
            FastaUtils.dbSplit( inFileName, 1, False )
        except SystemExit:
            isSysExitRaised = True
        self.assertTrue( isSysExitRaised )
        
        
    def test_dbSplit_emptyFile( self ):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_for_empty_file( inFile )
        FastaUtils.dbSplit( inFile, 10, False, 1 )
        self.assertTrue( not os.path.exists( "batch_1.fa" ) )
        os.remove( inFile )
        
        
    def test_dbSplit_oneSequence_tenSequencesPerBatch( self ):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_one_sequence( inFile )
        
        expBatchFile = "dummyExpBatch_1.fa"
        Utils_for_T_FastaUtils._createFastaFile_one_sequence( expBatchFile )
        
        FastaUtils.dbSplit( inFile, 10, False )
        
        obsBatchFile = "batch_1.fa"
        
        self.assertTrue( FileUtils.are2FilesIdentical( expBatchFile, obsBatchFile ) )
        
        for f in [ inFile, expBatchFile, obsBatchFile ]:
            os.remove( f )
        
        
    def test_dbSplit_fourSequences_threeSequencesPerBatch( self ):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile )
        
        expBatch1File = "dummyExpBatch_1.fa"
        expBatch2File = "dummyExpBatch_2.fa"
        Utils_for_T_FastaUtils._createBatch1_three_sequences( expBatch1File )
        Utils_for_T_FastaUtils._createBatch2_one_sequence( expBatch2File )
        
        FastaUtils.dbSplit( inFile, 3, False )
        
        obsBatch1File = "batch_1.fa"
        obsBatch2File = "batch_2.fa"
        
        self.assertTrue( FileUtils.are2FilesIdentical( expBatch1File, obsBatch1File ) )
        self.assertTrue( FileUtils.are2FilesIdentical( expBatch2File, obsBatch2File ) )
        
        for f in [ inFile, expBatch1File, expBatch2File, obsBatch1File, obsBatch2File ]:
            os.remove( f )
            
            
    def test_dbSplit_fourSequences_twoSequencesPerBatch_inBatchDirectory( self ):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_four_sequences( inFile )
        
        expBatch1File = "dummyExp_batch_1.fa"
        expBatch2File = "dummyExp_batch_2.fa"
        Utils_for_T_FastaUtils._createBatch1_two_sequences( expBatch1File )
        Utils_for_T_FastaUtils._createBatch2_two_sequences( expBatch2File )
        
        FastaUtils.dbSplit( inFile, 2, True, 1 )
        
        obsBatch1File = "batches/batch_1.fa"
        obsBatch2File = "batches/batch_2.fa"
        
        self.assertTrue( FileUtils.are2FilesIdentical( expBatch1File, obsBatch1File ) )
        self.assertTrue( FileUtils.are2FilesIdentical( expBatch2File, obsBatch2File ) )
        
        for f in [ inFile, expBatch1File, expBatch2File, obsBatch1File, obsBatch2File ]:
            os.remove( f )
            
            
    def test_dbSplit_tenSequences_oneSequencePerBatch_inBatchDirectory( self ):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_ten_sequences( inFile )
        
        FastaUtils.dbSplit( inFile, 1, True )
        
        nb = 1
        for s in [ '01', '02', '03', '04', '05', '06', '07', '08', '09', '10' ]:
            expBatchFile = "exp_batch_%s.fa" % ( s )
            Utils_for_T_FastaUtils._createBatch_one_small_sequence( expBatchFile, "seq " + str(nb) )
            nb += 1
            obsBatchFile = "batches/batch_%s.fa" % ( s )
            self.assertTrue( FileUtils.are2FilesIdentical( expBatchFile, obsBatchFile ) )
            os.remove( expBatchFile )
            os.remove( obsBatchFile )
            
        os.remove( inFile )
        os.rmdir( "batches" )
        
        
    def test_dbSplit_twoSequences_oneSequencePerBatch_useSeqHeader( self ):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils.createFastaFile_twoSequences( inFile )
        
        lExpFileNames = [ "seq_1.fa", "seq_2.fa" ]
        lExpFiles = [ "dummyExp_seq_1.fa", "dummyExp_seq_2.fa" ]
        Utils_for_T_FastaUtils.createFastaFile_seq_1( lExpFiles[0] )
        Utils_for_T_FastaUtils.createFastaFile_seq_2( lExpFiles[1] )
        
        FastaUtils.dbSplit( inFile, 1, False, True )
        
        lObsFiles = glob.glob( "seq*.fa" )
        lObsFiles.sort()
        for i in range( 0, len(lExpFileNames) ):
            self.assertEqual( lExpFileNames[i], lObsFiles[i] )
            self.assertTrue( FileUtils.are2FilesIdentical( lExpFiles[i], lObsFiles[i] ) )
            
        for f in [ inFile ] + lExpFiles + lObsFiles:
            os.remove( f )
            
            
    def test_dbSplit_twoSequences_otherPrefix( self ):
        inFile = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils.createFastaFile_twoSequences( inFile )
        
        lExpFileNames = [ "query_1.fa", "query_2.fa" ]
        lExpFiles = [ "dummyExp_seq_1.fa", "dummyExp_seq_2.fa" ]
        Utils_for_T_FastaUtils.createFastaFile_seq_1( lExpFiles[0] )
        Utils_for_T_FastaUtils.createFastaFile_seq_2( lExpFiles[1] )
        
        FastaUtils.dbSplit( inFile, 1, False, False, "query" )
        
        lObsFiles = glob.glob( "query_*.fa" )
        lObsFiles.sort()
        for i in range( 0, len(lExpFileNames) ):
            self.assertEqual( lExpFileNames[i], lObsFiles[i] )
            self.assertTrue( FileUtils.are2FilesIdentical( lExpFiles[i], lObsFiles[i] ) )
            
        for f in [ inFile ] + lExpFiles + lObsFiles:
            os.remove( f )
            
            
    def test_splitFastaFileInBatches(self):
        inFileName = "dummyFastaFile.fa"
        with open(inFileName, "w") as f:
            f.write(">seq1\n")
            f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n")
            f.write(">seq2\n")
            f.write("ATCGCTAGCTAGCTCG\n")
            f.write(">seq3\n")
            f.write("GTTTGGATCGCT\n")
            f.write(">seq6\n")
            f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCCTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n")
            f.write(">seq5\n")
            f.write("TTGGATCGCTCTCTGCTCGGAAATCCCGTC\n")
        expBatch1 = "expBatch_1.fa"
        with open(expBatch1, "w") as f:
            f.write(">seq6\n")
            f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCCTCT\n")
            f.write("GTTTGGATCGCTCTCTGCTCGGAAATCC\n")
        expBatch2 = "expBatch_2.fa"
        with open(expBatch2, "w") as f:
            f.write(">seq1\n")
            f.write("ATCGCTAGCTAGCTCGATCTAGTCAGTCTGTTTGGATCGCTCTCTGCTCGGAAATCC\n")
        expBatch3 = "expBatch_3.fa"
        with open(expBatch3, "w") as f:
            f.write(">seq5\n")
            f.write("TTGGATCGCTCTCTGCTCGGAAATCCCGTC\n")
            f.write(">seq2\n")
            f.write("ATCGCTAGCTAGCTCG\n")
            f.write(">seq3\n")
            f.write("GTTTGGATCGCT\n")
        
        FastaUtils.splitFastaFileInBatches(inFileName, 60)
        
        obsBatch1 = "batches/batch_1.fa"
        obsBatch2 = "batches/batch_2.fa"
        obsBatch3 = "batches/batch_3.fa"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expBatch1, obsBatch1))
        self.assertTrue(FileUtils.are2FilesIdentical(expBatch2, obsBatch2))
        self.assertTrue(FileUtils.are2FilesIdentical(expBatch3, obsBatch3))
        
        os.remove(inFileName)
        os.remove(expBatch1)
        os.remove(expBatch2)
        os.remove(expBatch3)
        shutil.rmtree("batches")
            
            
    def test_splitFastaFileInBatches_one_seq(self):
        inFileName = "dummyFastaFile.fa"
        with open(inFileName, "w") as f:
            f.write(">seq2\n")
            f.write("ATCGCTAGCTAGCTCG\n")
        expBatch1 = "expBatch_1.fa"
        with open(expBatch1, "w") as f:
            f.write(">seq2\n")
            f.write("ATCGCTAGCTAGCTCG\n")
        
        FastaUtils.splitFastaFileInBatches(inFileName, 60)
        
        obsBatch1 = "batches/batch_1.fa"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expBatch1, obsBatch1))
        
        os.remove(inFileName)
        os.remove(expBatch1)
        shutil.rmtree("batches")
        
            
    def test_splitSeqPerCluster_no_in_file(self):
        inFileName = "dummyFastaFile.fa"
        isSysExitRaised = False
        try:
            FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
        except SystemExit:
            isSysExitRaised = True
        self.assertTrue(isSysExitRaised)        
        
        
    def test_splitSeqPerCluster_in_file_empty(self):
        inFileName = "dummyFastaFile.fa"
        with open(inFileName, 'w'):
            pass
        
        FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
        
        self.assertEquals(glob.glob("seqCluster*.fa"), [])
        
        os.remove(inFileName)
        
        
    def test_splitSeqPerCluster_four_sequences_without_dir(self):
        inFileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName)
        
        expFirstClusterFileName = "exp_seqCluster1.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expFirstClusterFileName)
        expSecondClusterFileName = "exp_seqCluster2.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expSecondClusterFileName)
        expThirdClusterFileName = "exp_seqCluster3.574.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expThirdClusterFileName)
        
        FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
        obsFirstClusterFileName = "seqCluster1.fa"
        obsSecondClusterFileName = "seqCluster2.fa"
        obsThirdClusterFileName = "seqCluster3.574.fa"
        
        os.remove(inFileName)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName))
        
        os.remove(expFirstClusterFileName)
        os.remove(expSecondClusterFileName)
        os.remove(expThirdClusterFileName)
        os.remove(obsFirstClusterFileName)
        os.remove(obsSecondClusterFileName)
        os.remove(obsThirdClusterFileName)
        
        
    def test_splitSeqPerCluster_four_sequences_without_dir_no_split(self):
        inFileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_in_same_cluster(inFileName)
        
        expClusterFileName = "exp_seqCluster.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_in_same_cluster(expClusterFileName)
        
        FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
        obsClusterFileName = "seqCluster1.fa"
        
        os.remove(inFileName)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName))
        
        os.remove(expClusterFileName)
        os.remove(obsClusterFileName)
        
        
    def test_splitSeqPerCluster_four_sequences_without_dir_shuffle(self):
        inFileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header_shuffle(inFileName)
        
        expFirstClusterFileName = "exp_seqCluster1.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expFirstClusterFileName)
        expSecondClusterFileName = "exp_seqCluster2.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expSecondClusterFileName)
        expThirdClusterFileName = "exp_seqCluster3.574.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expThirdClusterFileName)
        
        FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, False, "seqCluster")
        obsFirstClusterFileName = "seqCluster1.fa"
        obsSecondClusterFileName = "seqCluster2.fa"
        obsThirdClusterFileName = "seqCluster3.574.fa"
        
        os.remove(inFileName)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName))
        
        os.remove(expFirstClusterFileName)
        os.remove(expSecondClusterFileName)
        os.remove(expThirdClusterFileName)
        os.remove(obsFirstClusterFileName)
        os.remove(obsSecondClusterFileName)
        os.remove(obsThirdClusterFileName)
        
        
    def test_splitSeqPerCluster_four_sequences_simplify_header(self):
        inFileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName)
        
        expFirstClusterFileName = "exp_seqCluster1.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result_with_simplify_header(expFirstClusterFileName)
        expSecondClusterFileName = "exp_seqCluster2.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result_with_simplify_header(expSecondClusterFileName)
        expThirdClusterFileName = "exp_seqCluster3.574.fa"
        Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result_with_simplify_header(expThirdClusterFileName)
        
        FastaUtils.splitSeqPerCluster( inFileName, "Piler", True, False, "seqCluster")
        obsFirstClusterFileName = "seqCluster1.fa"
        obsSecondClusterFileName = "seqCluster2.fa"
        obsThirdClusterFileName = "seqCluster3.574.fa"
        
        os.remove(inFileName)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFirstClusterFileName, obsFirstClusterFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expSecondClusterFileName, obsSecondClusterFileName))
        self.assertTrue(FileUtils.are2FilesIdentical(expThirdClusterFileName, obsThirdClusterFileName))
        
        os.remove(expFirstClusterFileName)
        os.remove(expSecondClusterFileName)
        os.remove(expThirdClusterFileName)
        os.remove(obsFirstClusterFileName)
        os.remove(obsSecondClusterFileName)
        os.remove(obsThirdClusterFileName)
        
        
    def test_splitSeqPerCluster_four_sequences_with_dir(self):
        inFileName = "dummyFastaFile.fa" 
        Utils_for_T_FastaUtils._createFastaFile_of_four_sequences_with_specific_header(inFileName)
        FastaUtils.splitSeqPerCluster( inFileName, "Piler", False, True, "seqCluster")
        os.remove(inFileName)
        
        for i in ['1', '2', '3.574']:
            expClusterFileName = "exp_cluster" + i + ".fa"
            if i == '1':
                Utils_for_T_FastaUtils._createFastaFile_of_first_cluster_result(expClusterFileName)
            if i == '2':
                Utils_for_T_FastaUtils._createFastaFile_of_second_cluster_result(expClusterFileName)
            if i == '3.574':
                Utils_for_T_FastaUtils._createFastaFile_of_third_cluster_result(expClusterFileName)
               
            obsClusterFileName= inFileName + "_cluster_" + i + "/seqCluster" + i + ".fa"
            self.assertTrue(FileUtils.are2FilesIdentical(expClusterFileName, obsClusterFileName))
            os.remove(expClusterFileName)
            os.remove(obsClusterFileName)
            os.rmdir( inFileName + "_cluster_" + i )
            
            
    def test_dbLengthFilter_with_one_sequence(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName)
        
        expFileNameInf = "exp_dummyFastaFile.fa.Inf12"
        Utils_for_T_FastaUtils._createFastaFile_for_empty_file(expFileNameInf)
        expFileNameSup = "exp_dummyFastaFile.fa.Sup12"
        Utils_for_T_FastaUtils._createFastaFile_one_sequence(expFileNameSup)
        
        FastaUtils.dbLengthFilter(12, fileName, verbose=0)
        
        obsFileNameInf = "dummyFastaFile.fa.Inf12"
        obsFileNameSup = "dummyFastaFile.fa.Sup12"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileNameInf, obsFileNameInf))
        self.assertTrue(FileUtils.are2FilesIdentical(expFileNameSup, obsFileNameSup))
        
        os.remove(fileName)
        os.remove(expFileNameInf)
        os.remove(expFileNameSup)
        os.remove(obsFileNameInf)
        os.remove(obsFileNameSup)
        
    def test_dbLengthFilter_with_four_sequence(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)
        
        expFileNameInf = "exp_dummyFastaFile.fa.Inf130"
        Utils_for_T_FastaUtils._createFastaFile_one_sequence(expFileNameInf)
        expFileNameSup = "exp_dummyFastaFile.fa.Sup130"
        Utils_for_T_FastaUtils._createResult_of_dbLengthFilter_sup(expFileNameSup) 
        
        FastaUtils.dbLengthFilter(130, fileName, verbose=0)
        
        obsFileNameInf = "dummyFastaFile.fa.Inf130"
        obsFileNameSup = "dummyFastaFile.fa.Sup130"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileNameInf, obsFileNameInf))
        self.assertTrue(FileUtils.are2FilesIdentical(expFileNameSup, obsFileNameSup)) 
        
        os.remove(fileName)
        os.remove(expFileNameInf)
        os.remove(expFileNameSup)
        os.remove(obsFileNameInf)
        os.remove(obsFileNameSup)
        
    def test_dbLongestSequences_with_empty_file(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)
      
        expResult = 0
        
        obsResult = FastaUtils.dbLongestSequences( 1, fileName )
        
        self.assertEquals(expResult, obsResult)
        
        os.remove(fileName)
        
    def test_dbLongestSequences_with_one_longest_sequence(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)
      
        expFileName = "exp_dummyFastaFile.fa.best1"
        f = open(expFileName, 'w')
        f.write(">seq 3\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCG\n")
        f.close()
        
        FastaUtils.dbLongestSequences( 1, fileName, outFileName="", verbose=0, minThresh=0 )
        
        obsFileName = "dummyFastaFile.fa.best1"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbLongestSequences_with_two_longest_sequence(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
        expFileName = "exp_dummyFastaFile.fa.best1"
        f = open(expFileName, 'w')
        f.write(">seq 2\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCG\n")
        f.write(">seq 4\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCG\n")
        f.close()   

        FastaUtils.dbLongestSequences( 2, fileName, outFileName="", verbose=0, minThresh=0 )
        obsFileName = "dummyFastaFile.fa.best2"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbExtractSeqHeaders(self):  
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write("seq 1\n")
        f.write("seq 2\n")
        f.write("seq 4\n")
        f.close()   
        
        FastaUtils.dbExtractSeqHeaders(fileName)  
        obsFileName = "dummyFastaFile.fa.headers"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbExtractSeqHeaders_with_empty_file(self):  
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write("")
        f.close()   
        
        FastaUtils.dbExtractSeqHeaders(fileName)  
        obsFileName = "dummyFastaFile.fa.headers"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbExtractSeqHeaders_without_header(self):  
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_sequence_without_header(fileName)
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write("")
        f.close()   
        
        FastaUtils.dbExtractSeqHeaders(fileName)  
        obsFileName = "dummyFastaFile.fa.headers"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbExtractByPattern_without_pattern(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
        
        obsResult = FastaUtils.dbExtractByPattern( "", fileName)
        
        expResult = None
        
        self.assertEquals(expResult, obsResult)
        
        os.remove(fileName)

    def test_dbExtractByPattern(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
        expFileName = "exp_dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_three_sequences(expFileName)
        
        FastaUtils.dbExtractByPattern( 'seq', fileName)
        
        obsFileName = "dummyFastaFile.fa.extracted"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbExtractByPattern_with_2_as_pattern(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write(">seq 2\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCG\n")
        f.close()   
        
        FastaUtils.dbExtractByPattern( ' 2', fileName)
        
        obsFileName = "dummyFastaFile.fa.extracted"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbExtractByPattern_with_sandie_as_pattern(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
        expFileName = "exp_dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_for_empty_file(expFileName)
        
        FastaUtils.dbExtractByPattern( 'sandie', fileName)
        
        obsFileName = "dummyFastaFile.fa.extracted"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
    
    def test_dbExtractByFilePattern_empty_pattern_filename(self):
        patternFileName = ""
        isSysExitRaised = False
        try:
            FastaUtils.dbExtractByFilePattern(patternFileName , None, "")
        except SystemExit:
            isSysExitRaised = True
        self.assertTrue(isSysExitRaised)        
        
    def test_dbExtractByFilePattern(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName)
        patternFileName = "dummyPatternFile.txt"
        Utils_for_T_FastaUtils._createPatternFile(patternFileName)
        
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write(">seq 1\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 3\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 8\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 10\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.close()
        
        obsFileName = "dummyFastaFile.fa.extracted"
        
        FastaUtils.dbExtractByFilePattern( patternFileName, fileName, "")
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(patternFileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbCleanByPattern_without_pattern(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils._createFastaFile_three_sequences(fileName)
        
        obsResult = FastaUtils.dbCleanByPattern( "", fileName)
        
        expResult = None
        
        self.assertEquals(expResult, obsResult)
        
        os.remove(fileName)
        
    def test_dbCleanByPattern(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName)
        
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write(">seq 1\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 3\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 4\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 5\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 6\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 7\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 8\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 9\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 10\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.close()
        
        obsFileName = "dummyFastaFile.fa.cleaned"
        FastaUtils.dbCleanByPattern( '2', fileName)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbCleanByPattern_with_expectedFile_empty(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName)
        
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write("")
        f.close()
        
        obsFileName = "dummyFastaFile.fa.cleaned"
        FastaUtils.dbCleanByPattern( 'seq', fileName)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(expFileName)
        os.remove(obsFileName)
    
    def test_dbCleanByFilePattern_empty_pattern_filename(self):
        patternFileName = ""
        isSysExitRaised = False
        try:
            FastaUtils.dbCleanByFilePattern(patternFileName , None, "")
        except SystemExit:
            isSysExitRaised = True
        self.assertTrue(isSysExitRaised)       
        
    def test_dbCleanByFilePattern(self):
        fileName = "dummyFastaFile.fa"
        Utils_for_T_FastaUtils. _createFastaFile_ten_sequences(fileName)
        patternFileName = "dummyPatternFile.txt"
        Utils_for_T_FastaUtils._createPatternFile(patternFileName)
        
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write(">seq 2\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 4\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 5\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 6\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 7\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq 9\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.close()
        
        obsFileName = "dummyFastaFile.fa.cleaned"
        
        FastaUtils.dbCleanByFilePattern( patternFileName, fileName, "")
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        
        os.remove(fileName)
        os.remove(patternFileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_dbORF_without_ORF(self):
        fileName = "dummy.fa"
        with open(fileName, "w") as f:
            f.write(">dummy\n")
            f.write("GGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTTGGGTT\n")
    
        expFileName = "exp.ORF.map"
        with open(expFileName, "w") as f:
            f.write("")
        obsFileName = "%s.ORF.map" % fileName
        
        FastaUtils.dbORF(fileName, 0, 0)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
    
        os.remove(fileName)
        os.remove(obsFileName)
        os.remove(expFileName)
        
    def test_dbORF_with_one_ORF(self):
        fileName = "dummyFastaFile.fa"
        f = open(fileName, 'w')
        f.write(">seq1\n")
        f.write("GAAAATATGGGGTAGATAAGGGATCTGGGTTAATTTTTT\n")
        f.close()
    
        expFileName = "exp_dummyORFFile.ORF.map"
        f = open(expFileName, 'w')
        f.write("ORF|1|17\tseq1\t16\t33\n")
        f.close()
        
        FastaUtils.dbORF(fileName, 0, 0)
        obsFileName = fileName + ".ORF.map"
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
    
        os.remove(fileName)
        os.remove(obsFileName)
        os.remove(expFileName)
        
    def test_dbORF_with_real_ORF(self):
        fileName = "dummy.fa"
        with open(fileName, "w") as f:
            f.write(">DmelChr4_Blaster_Recon_13_Map_4\n")
            f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n")
            f.write("TTGCGGATCATTTTGTTTGAACAACCGACAATGCGACCAATTTCAGCGTAGGTTTTACCT\n")
            f.write("TCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAATGCTTTCCGCGACCC\n")
            f.write("ATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAAAACCTTTAATACAAC\n")
            f.write("TCCTTTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACTCCTATTAATTTTATTCA\n")
            f.write("GCAAATACGTGTTCAGTGCTATTTTTGTTACCGCCTCATTTCGCGCACTTTTGCAGCAAG\n")
            f.write("TGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAATTTCTTGCTCAGAGAGCC\n")
            f.write("AACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAATATAAACATTTAATAATT\n")
            f.write("TTTTTTAGGAAATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATA\n")
            f.write(">DmelChr4_Blaster_Piler_3.5_Map_7\n")
            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n")
            f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n")
            f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
            f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
            f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n")
            f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
            f.write(">DmelChr4_Blaster_Grouper_10_Map_13\n")
            f.write("GCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGCACTATGCAGC\n")
            f.write("CACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTGAGAGCGTAAG\n")
            f.write("AAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGAGAACGCGTAT\n")
            f.write("AAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCTGATCGAAGAA\n")
            f.write("ACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAATATGATAAAA\n")
            f.write("TAAAAAAATTTTTAAAAATTCGCGCCCTGACTATTATAATTTTAAAGCTTTTTAAAATTT\n")
            f.write("GTTTGTTAAAATCGCCGCTCGAATTAGCTACCGTTTACACATTTATATTTATGTTTAATT\n")
            f.write("CTAATTTGTCTCTCATCTGACAATTTTTTAAGAAAGCGAAATATTTTTTTTTTGAAACAC\n")
            f.write("TTTTAATGTTAATGTTACATCATATTAAGTCAAATGATTTAATAAATATACTAAATAATT\n")
            f.write("AAATATGATAACTGTTTATTGCAAAAGTAATATCAAAGACACTAGAATTATTCTAGTGTC\n")
            f.write("TTTGCTTTGTTCATATCTTGAGGCACGAAGTGCGGACACAAGCACTCAACAATCATTGCC\n")
            f.write("TTATTAATTTTTCACACGCCGCAAGATGAATACTCTAATGACAAATATTCTTATATAAAG\n")
            f.write("TCATTTTTGAAATTTATTTTTGTGATAATATGTACATAGATTTGGCTATTTCTAATCTAT\n")
            f.write("TTTCAAATAATAATAACGTTAAGGCAATGCAAAACAAGAATTTTTTTAGTCGCATGGTGC\n")
            f.write("CAATTGATCAAAAATAATATAGATTTAAAGTCTAAGAACTTCTAAGGTGAAGGGCATATT\n")
            f.write("TTGTCAAATTTACAATGCATGAGCGAGCATACGTGTGCACACATACAGTTGTCTGCTATC\n")
            f.write("ACTTTGTGCGTTGAAAA\n")
    
        expFileName = "exp.ORF.map"
        with open(expFileName, "w") as f:
            f.write("ORF|3|263\tDmelChr4_Blaster_Recon_13_Map_4\t189\t452\n")
            f.write("ORF|2|206\tDmelChr4_Blaster_Recon_13_Map_4\t185\t391\n")
            f.write("ORF|-3|164\tDmelChr4_Blaster_Recon_13_Map_4\t382\t218\n")
            f.write("ORF|-1|161\tDmelChr4_Blaster_Recon_13_Map_4\t297\t136\n")
            f.write("ORF|1|113\tDmelChr4_Blaster_Recon_13_Map_4\t400\t513\n")
            f.write("ORF|1|113\tDmelChr4_Blaster_Recon_13_Map_4\t112\t225\n")
            f.write("ORF|3|107\tDmelChr4_Blaster_Recon_13_Map_4\t81\t188\n")
            f.write("ORF|1|107\tDmelChr4_Blaster_Recon_13_Map_4\t292\t399\n")
            f.write("ORF|-1|104\tDmelChr4_Blaster_Recon_13_Map_4\t432\t328\n")
            f.write("ORF|-2|104\tDmelChr4_Blaster_Recon_13_Map_4\t515\t411\n")
            f.write("ORF|3|116\tDmelChr4_Blaster_Piler_3.5_Map_7\t393\t509\n")
            f.write("ORF|-3|116\tDmelChr4_Blaster_Piler_3.5_Map_7\t505\t389\n")
            f.write("ORF|-2|86\tDmelChr4_Blaster_Piler_3.5_Map_7\t518\t432\n")
            f.write("ORF|1|80\tDmelChr4_Blaster_Piler_3.5_Map_7\t436\t516\n")
            f.write("ORF|-3|170\tDmelChr4_Blaster_Grouper_10_Map_13\t222\t52\n")
            f.write("ORF|-1|161\tDmelChr4_Blaster_Grouper_10_Map_13\t260\t99\n")
            f.write("ORF|3|155\tDmelChr4_Blaster_Grouper_10_Map_13\t702\t857\n")
            f.write("ORF|3|152\tDmelChr4_Blaster_Grouper_10_Map_13\t288\t440\n")
            f.write("ORF|1|137\tDmelChr4_Blaster_Grouper_10_Map_13\t622\t759\n")
            f.write("ORF|2|128\tDmelChr4_Blaster_Grouper_10_Map_13\t539\t667\n")
            f.write("ORF|1|125\tDmelChr4_Blaster_Grouper_10_Map_13\t760\t885\n")
            f.write("ORF|2|122\tDmelChr4_Blaster_Grouper_10_Map_13\t14\t136\n")
            f.write("ORF|-2|113\tDmelChr4_Blaster_Grouper_10_Map_13\t847\t734\n")
            f.write("ORF|1|110\tDmelChr4_Blaster_Grouper_10_Map_13\t154\t264\n")
        obsFileName = "%s.ORF.map" % fileName
        
        FastaUtils.dbORF(fileName, 10, 30)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
    
        os.remove(fileName)
        os.remove(obsFileName)
        os.remove(expFileName)
        
    def test_sortSequencesByIncreasingLength(self):
        fileName = "dummyFastaFile.fa"
        f = open(fileName, 'w')
        f.write(">seq1_length_60\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq2_length_120\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq3_length_32\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
        f.close()
        
        expFileName = "exp_dummyFastaFile.fa"
        f = open(expFileName, 'w')
        f.write(">seq3_length_32\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
        f.write(">seq1_length_60\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq2_length_120\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        
        f.close()
        
        obsFileName = "obs_dummyFastaFile.fa"
        
        FastaUtils.sortSequencesByIncreasingLength(fileName, obsFileName, 0)

        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
    
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_sortSequencesByIncreasingLength_in_file_do_not_exists(self):
        fileName = "dummyFile.fa"
        isSysExitRaised = False
        try:
            FastaUtils.sortSequencesByIncreasingLength(fileName, "", 0)
        except SystemExit:
            isSysExitRaised = True
        
        self.assertTrue(isSysExitRaised)

    def test_sortSequencesByHeader(self):
        fileName = "dummyFastaFile.fa"
        f = open(fileName, "w")
        f.write(">seq1::test-test\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq3\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
        f.write(">seq2\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.close()
        expFileName = "expFastaFile.fa"
        f = open(expFileName, "w")
        f.write(">seq1::test-test\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq2\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq3\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
        f.close()
        
        obsFileName = "obsFastaFile.fa"
        FastaUtils.sortSequencesByHeader(fileName, obsFileName)
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
    
        os.remove(expFileName)
        os.remove(obsFileName)

    def test_sortSequencesByHeader_no_outFileName(self):
        fileName = "dummyFastaFile.fa"
        f = open(fileName, "w")
        f.write(">seq12\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq1\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
        f.write(">seq2\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.close()
        expFileName = "expFastaFile.fa"
        f = open(expFileName, "w")
        f.write(">seq1\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATAT\n")
        f.write(">seq12\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write(">seq2\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\n")
        f.close()
        
        obsFileName = "dummyFastaFile_sortByHeaders.fa"
        FastaUtils.sortSequencesByHeader(fileName)
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
    
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_getLengthPerHeader( self ):
        inFile = "dummyFile.fa"
        inFileHandler = open( inFile, "w" )
        inFileHandler.write(">seq1\nAGCGATGCGT\n")
        inFileHandler.write(">seq2\nAGCGATG\n")
        inFileHandler.write(">seq3\nAGCGATGGTGCGTGC\n")
        inFileHandler.write("AGCGATGGTGCGTGC\n")
        inFileHandler.close()
        
        dExp = { "seq1": 10, "seq2": 7, "seq3": 30 }
        
        dObs = FastaUtils.getLengthPerHeader( inFile, 0 )
        
        self.assertEquals( dExp, dObs )
        
        os.remove( inFile )

    def test_convertFastaHeadersFromChkToChr_grouper(self):
        inFile = "dummyFastaFile.fa"
        with open(inFile, "w") as f:
            f.write(">MbQ1Gr1Cl0 chunk6 {Fragment} 95523..96053\n")
            f.write("AGCGTGCA\n")
            f.write(">MbQ77Gr8Cl0 chunk7 {Fragment} 123657..122568,121935..121446\n")
            f.write("AGCATGC\n")
            f.write(">MbS78Gr8Cl0 chunk7 {Fragment} 140078..139519,139470..138985,138651..138183\n")
            f.write("CGTGCG\n")
            f.write(">MbQ79Gr8Cl0 chunk7 {Fragment} 48021..48587,48669..49153,57346..57834\n")
            f.write("AGCGTGC\n")
        mapFile = "dummyMapFile.map"
        with open(mapFile, "w") as f:
            f.write("chunk5\tdmel_chr4\t760001\t960000\n")
            f.write("chunk6\tdmel_chr4\t950001\t1150000\n")
            f.write("chunk7\tdmel_chr4\t1140001\t1281640\n")
        expFile = "expFile.fa"
        with open(expFile, "w") as f:
            f.write(">MbQ1Gr1Cl0 dmel_chr4 {Fragment} 1045523..1046053\n")
            f.write("AGCGTGCA\n")
            f.write(">MbQ77Gr8Cl0 dmel_chr4 {Fragment} 1263657..1262568,1261935..1261446\n")
            f.write("AGCATGC\n")
            f.write(">MbS78Gr8Cl0 dmel_chr4 {Fragment} 1280078..1279519,1279470..1278985,1278651..1278183\n")
            f.write("CGTGCG\n")
            f.write(">MbQ79Gr8Cl0 dmel_chr4 {Fragment} 1188021..1188587,1188669..1189153,1197346..1197834\n")
            f.write("AGCGTGC\n")
        obsFile = "obsFile.fa"

        FastaUtils.convertFastaHeadersFromChkToChr(inFile, mapFile, obsFile)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile))
        
        for file in [inFile, mapFile, expFile, obsFile]:
            os.remove(file)
        
    def test_convertFastaHeadersFromChkToChr_blastclust(self):
        inFile = "dummyFastaFile.fa"
        with open(inFile, "w") as f:
            f.write(">BlastclustCluster12Mb63_chunk1 (dbseq-nr 0) [1,10]\n")
            f.write("AGCGTGCA\n")
            f.write(">BlastclustCluster12Mb53_chunk2 (dbseq-nr 2) [1,10]\n")
            f.write("AGCATGC\n")
            f.write(">BlastclustCluster12Mb26_chunk2 (dbseq-nr 2) [12,18]\n")
            f.write("CGTGCG\n")
            f.write(">BlastclustCluster12Mb35_chunk3 (dbseq-nr 0) [10,1]\n")
            f.write("AGCGTGC\n")
        mapFile = "dummyMapFile.map"
        with open(mapFile, "w") as f:
            f.write("chunk1\tchromosome1\t1\t20\n")
            f.write("chunk2\tchromosome1\t16\t35\n")
            f.write("chunk3\tchromosome2\t1\t20\n")
        expFile = "expFile.fa"
        with open(expFile, "w") as f:
            f.write(">BlastclustCluster12Mb63 chromosome1 (dbseq-nr 0) 1..10\n")
            f.write("AGCGTGCA\n")
            f.write(">BlastclustCluster12Mb53 chromosome1 (dbseq-nr 2) 16..25\n")
            f.write("AGCATGC\n")
            f.write(">BlastclustCluster12Mb26 chromosome1 (dbseq-nr 2) 27..33\n")
            f.write("CGTGCG\n")
            f.write(">BlastclustCluster12Mb35 chromosome2 (dbseq-nr 0) 10..1\n")
            f.write("AGCGTGC\n")
        obsFile = "obsFile.fa"

        FastaUtils.convertFastaHeadersFromChkToChr(inFile, mapFile, obsFile)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile))
        
        for file in [inFile, mapFile, expFile, obsFile]:
            os.remove(file)
            
    def test_convertFastaToLength( self ):
        inFile = "dummyFastaFile.fa"
        inFileHandler = open(inFile, "w")
        inFileHandler.write(">ReconCluster12Mb63 chunk1 {Fragment} 1..10\n")
        inFileHandler.write("AGCGTGCA\n")
        inFileHandler.write(">ReconCluster12Mb53 chunk2 {Fragment} 1..10\n")
        inFileHandler.write("AGCATGCAA\n")
        inFileHandler.write(">ReconCluster12Mb26 chunk2 {Fragment} 12..18\n")
        inFileHandler.write("CGTGCGAAAA\n")
        inFileHandler.write(">ReconCluster12Mb35 chunk3 {Fragment} 10..1\n")
        inFileHandler.write("AGCGTG\n")
        inFileHandler.close()

        expFile = "expFile.length"
        expFileHandler = open(expFile, "w")
        expFileHandler.write("ReconCluster12Mb63\t8\n")
        expFileHandler.write("ReconCluster12Mb53\t9\n")
        expFileHandler.write("ReconCluster12Mb26\t10\n")
        expFileHandler.write("ReconCluster12Mb35\t6\n")
        expFileHandler.close()
        
        obsFile = "obsFile.length"
        
        FastaUtils.convertFastaToLength(inFile, obsFile)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile))
        
        for f in [inFile, expFile, obsFile]:
            os.remove(f)
            
    def test_convertFastaToSeq( self ):
        inFile = "dummyFastaFile.fa"
        inFileHandler = open(inFile, "w")
        inFileHandler.write(">ReconCluster12Mb63 chunk1 {Fragment} 1..10\n")
        inFileHandler.write("AGCGTGCA\n")
        inFileHandler.write(">ReconCluster12Mb53 chunk2 {Fragment} 1..10\n")
        inFileHandler.write("AGCATGCAA\n")
        inFileHandler.write(">ReconCluster12Mb26 chunk2 {Fragment} 12..18\n")
        inFileHandler.write("CGTGCGAAAA\n")
        inFileHandler.write(">ReconCluster12Mb35 chunk3 {Fragment} 10..1\n")
        inFileHandler.write("AGCGTG\n")
        inFileHandler.close()

        expFile = "expFile.seq"
        expFileHandler = open(expFile, "w")
        expFileHandler.write("ReconCluster12Mb63\tAGCGTGCA\tReconCluster12Mb63 chunk1 {Fragment} 1..10\t8\n")
        expFileHandler.write("ReconCluster12Mb53\tAGCATGCAA\tReconCluster12Mb53 chunk2 {Fragment} 1..10\t9\n")
        expFileHandler.write("ReconCluster12Mb26\tCGTGCGAAAA\tReconCluster12Mb26 chunk2 {Fragment} 12..18\t10\n")
        expFileHandler.write("ReconCluster12Mb35\tAGCGTG\tReconCluster12Mb35 chunk3 {Fragment} 10..1\t6\n")
        expFileHandler.close()
        
        obsFile = "obsFile.seq"
        
        FastaUtils.convertFastaToSeq(inFile, obsFile)
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFile, obsFile))
        
        for f in [inFile, expFile, obsFile]:
            os.remove(f)
            
    def test_spliceFromCoords( self ):
        coordFile = "dummyCoordFile"
        coordFileHandler = open( coordFile, "w" )
        coordFileHandler.write("TE1\tchr1\t2\t5\n")
        coordFileHandler.write("TE2\tchr1\t15\t11\n")
        coordFileHandler.write("TE3\tchr2\t1\t3\n")
        coordFileHandler.write("TE1\tchr2\t8\t10\n")
        coordFileHandler.write("TE4\tchr3\t3\t1\n")
        coordFileHandler.write("TE4\tchr3\t6\t4\n")
        coordFileHandler.close()
        
        genomeFile = "dummyGenomeFile"
        genomeFileHandler = open( genomeFile, "w" )
        genomeFileHandler.write(">chr1\n")
        genomeFileHandler.write("AGGGGAAAAACCCCCAAAAA\n")
        genomeFileHandler.write(">chr2\n")
        genomeFileHandler.write("GGGAAAAGGG\n")
        genomeFileHandler.write(">chr3\n")
        genomeFileHandler.write("GGGGGGTTTT\n")
        genomeFileHandler.close()
        
        expFile = "dummyExpFile"
        expFileHandler = open( expFile, "w" )
        expFileHandler.write(">chr1\n")
        expFileHandler.write("AAAAAAAAAAA\n")
        expFileHandler.write(">chr2\n")
        expFileHandler.write("AAAA\n")
        expFileHandler.write(">chr3\n")
        expFileHandler.write("TTTT\n")
        expFileHandler.close()
        
        obsFile = "dummyObsFile"
        
        FastaUtils.spliceFromCoords( genomeFile,
                                     coordFile,
                                     obsFile )
        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )
        for f in [ coordFile, genomeFile, expFile, obsFile ]:
            os.remove( f )
            
    def test_dbShuffle_inputFile( self ):
        inFile = "dummyInFile.fa"
        inFileHandler = open( inFile, "w" )
        inFileHandler.write(">seq1\n")
        inFileHandler.write("AGCGATCGACAGCGCATCGCGCATCGCATCGCTACGCATAC\n")
        inFileHandler.close()
        
        obsFile = "dummyObsFile.fa"
        FastaUtils.dbShuffle( inFile, obsFile, 1 )
        
        self.assertTrue( FastaUtils.dbSize( obsFile ) == 1 )
        
        for f in [ inFile, obsFile ]:
            os.remove( f )
            
    def test_dbShuffle_inputDir( self ):
        inDir = "dummyInDir"
        if os.path.exists( inDir ):
            shutil.rmtree( inDir )
        os.mkdir( inDir )
        inFile = "%s/dummyInFile.fa" % inDir
        inFileHandler = open( inFile, "w" )
        inFileHandler.write(">seq1\n")
        inFileHandler.write("AGCGATCGACAGCGCATCGCGCATCGCATCGCTACGCATAC\n")
        inFileHandler.close()
        
        obsDir = "dummyObsDir"
        FastaUtils.dbShuffle( inDir, obsDir, 1 )
        
        obsFile = "dummyInFile_shuffle.fa"
        self.assertTrue( len( glob.glob("%s/%s" % (obsDir,obsFile)) ) == 1 )
        
        for d in [ inDir, obsDir ]:
            shutil.rmtree( d )
            
    def test_convertClusterFileToFastaFile(self):
        inClusterFileName = "in.tab"
        with open(inClusterFileName, "w") as f:
            f.write("DTX-incomp_DmelChr4-B-R10-Map3_reversed\tDTX-incomp_DmelChr4-B-R9-Map3_reversed\tDTX-incomp_DmelChr4-B-G9-Map3\n")
            f.write("PotentialHostGene-chim_DmelChr4-B-R5-Map5\tPotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
            f.write("RLX-incomp_DmelChr4-B-G220-Map3\n")
        inFastaFileName = "in.fa"
        with open(inFastaFileName, "w") as f:
            f.write(">DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
            f.write("ATCGCATCGATCGATC\n")
            f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
            f.write("ATCGCATCGATCGATC\n")
            f.write(">RLX-incomp_DmelChr4-B-G220-Map3\n")
            f.write("ATCGCC\n")
            f.write(">PotentialHostGene-chim_DmelChr4-B-R5-Map5\n")
            f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
            f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
            f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
            f.write(">DTX-incomp_DmelChr4-B-G9-Map3\n")
            f.write("ATCGCATCGATCGATC\n")
        expFileName = "exp.fa"
        with open(expFileName, "w") as f:
            f.write(">BlastclustCluster1Mb1_DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
            f.write("ATCGCATCGATCGATC\n")
            f.write(">BlastclustCluster1Mb2_DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
            f.write("ATCGCATCGATCGATC\n")
            f.write(">BlastclustCluster3Mb1_RLX-incomp_DmelChr4-B-G220-Map3\n")
            f.write("ATCGCC\n")
            f.write(">BlastclustCluster2Mb1_PotentialHostGene-chim_DmelChr4-B-R5-Map5\n")
            f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
            f.write(">BlastclustCluster2Mb2_PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
            f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
            f.write(">BlastclustCluster1Mb3_DTX-incomp_DmelChr4-B-G9-Map3\n")
            f.write("ATCGCATCGATCGATC\n")
        obsFileName = "obs.fa"
        
        FastaUtils.convertClusterFileToFastaFile(inClusterFileName, inFastaFileName, obsFileName, "Blastclust")
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        os.remove(inClusterFileName)
        os.remove(inFastaFileName)
        os.remove(expFileName)
        os.remove(obsFileName)

    
    def test_convertClusterFileToFastaFile_withoutUnclusterizedSequences(self):
        inClusterFileName = "in.tab"
        with open(inClusterFileName, "w") as f:
            f.write("DTX-incomp_DmelChr4-B-R10-Map3_reversed\tDTX-incomp_DmelChr4-B-R9-Map3_reversed\tDTX-incomp_DmelChr4-B-G9-Map3\n")
            f.write("PotentialHostGene-chim_DmelChr4-B-R5-Map5\tPotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
        inFastaFileName = "in.fa"
        with open(inFastaFileName, "w") as f:
            f.write(">DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
            f.write("ATCGCATCGATCGATC\n")
            f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
            f.write("ATCGCATCGATCGATC\n")
            f.write(">RLX-incomp_DmelChr4-B-G220-Map3\n")
            f.write("ATCGCC\n")
            f.write(">PotentialHostGene-chim_DmelChr4-B-R5-Map5\n")
            f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
            f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
            f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
            f.write(">DTX-incomp_DmelChr4-B-G9-Map3\n")
            f.write("ATCGCATCGATCGATC\n")
        expFileName = "exp.fa"
        with open(expFileName, "w") as f:
            f.write(">BlastclustCluster1Mb1_DTX-incomp_DmelChr4-B-R10-Map3_reversed\n")
            f.write("ATCGCATCGATCGATC\n")
            f.write(">BlastclustCluster1Mb2_DTX-incomp_DmelChr4-B-R9-Map3_reversed\n")
            f.write("ATCGCATCGATCGATC\n")
            f.write(">BlastclustCluster3Mb1_RLX-incomp_DmelChr4-B-G220-Map3\n")
            f.write("ATCGCC\n")
            f.write(">BlastclustCluster2Mb1_PotentialHostGene-chim_DmelChr4-B-R5-Map5\n")
            f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
            f.write(">BlastclustCluster2Mb2_PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\n")
            f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\n")
            f.write(">BlastclustCluster1Mb3_DTX-incomp_DmelChr4-B-G9-Map3\n")
            f.write("ATCGCATCGATCGATC\n")
        obsFileName = "obs.fa"
        
        FastaUtils.convertClusterFileToFastaFile(inClusterFileName, inFastaFileName, obsFileName, "Blastclust")
        
        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))
        os.remove(inClusterFileName)
        os.remove(inFastaFileName)
        os.remove(expFileName)
        os.remove(obsFileName)
        
    def test_convertClusterFileToMapFile(self):
        for clustAlgo in ["Blastclust", "MCL"]:
            inFileName = "dummy%sOut.fa"  % clustAlgo
            inF = open(inFileName, "w")
            inF.write(">%sCluster1Mb1_chunk1 (dbseq-nr 1) [1,14]\n" % clustAlgo)
            inF.write("gaattgtttactta\n")
            inF.write(">%sCluster3Mb1_chunk5 (dbseq-nr 8) [1000,1014]\n" % clustAlgo)
            inF.write("gaattgtttactta\n")
            inF.write(">%sCluster1Mb2_chunk1 (dbseq-nr 1) [30,44]\n" % clustAlgo)
            inF.write("gaattgtttactta\n")
            inF.write(">%sCluster2Mb1_chunk2 (dbseq-nr 1) [100,114]\n" % clustAlgo)
            inF.write("gaattgtttactta")
            inF.close()
            
            fileExp = "%sToMapExpected.map" % clustAlgo
            outF = open(fileExp, "w")
            outF.write("%sCluster1Mb1\tchunk1\t1\t14\n" % clustAlgo)
            outF.write("%sCluster3Mb1\tchunk5\t1000\t1014\n" % clustAlgo)
            outF.write("%sCluster1Mb2\tchunk1\t30\t44\n" % clustAlgo)
            outF.write("%sCluster2Mb1\tchunk2\t100\t114\n" % clustAlgo)
            outF.close()
            
            fileObs = "%s.map" % os.path.splitext(inFileName)[0]
            FastaUtils.convertClusteredFastaFileToMapFile(inFileName, fileObs)
            
            self.assertTrue(FileUtils.are2FilesIdentical(fileObs, fileExp))
            
            os.remove(inFileName)
            os.remove(fileObs)
            os.remove(fileExp)
            
if __name__ == "__main__":
    unittest.main()