view commons/core/seq/test/Test_BioseqDB.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 769e306b7933
children
line wrap: on
line source

# Copyright INRA (Institut National de la Recherche Agronomique)
# http://www.inra.fr
# http://urgi.versailles.inra.fr
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software.  You can  use, 
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info". 
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability. 
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or 
# data to be ensured and,  more generally, to use and operate it in the 
# same conditions as regards security. 
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.


import unittest
import os
import time
from commons.core.seq.BioseqDB import BioseqDB
from commons.core.seq.Bioseq import Bioseq
from commons.core.utils.FileUtils import FileUtils
from commons.core.coord.Map import Map


class Test_BioseqDB( unittest.TestCase ):
    
    def setUp( self ):
        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
        
        
    def tearDown( self ):
        if os._exists("dummyBioseqDB.fa"):
            os.remove("dummyBioseqDB.fa")
            
            
    def test__eq__(self):
        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        
        self.assertEquals( expBioseqDB, obsBioseqDB )
        
        
    def test__eq__instances_with_different_header(self):
        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        
        self.assertNotEquals( expBioseqDB, obsBioseqDB )
        
        
    def test__eq__instances_with_different_sequences(self):
        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        
        self.assertNotEquals( expBioseqDB, obsBioseqDB )
        
        
    def test__eq__instances_with_different_sequences_and_headers(self):
        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        
        self.assertNotEquals( expBioseqDB, obsBioseqDB )
        
        
    def test__eq__instances_with_different_sizeOfBioseq(self):
        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3 ] )
        
        self.assertNotEquals( expBioseqDB, obsBioseqDB )
        
        
    def test_setName (self): 
        expName = "myDataBank"
        iBioseqDB = BioseqDB()
        self.assertEquals (iBioseqDB.name, "")
        
        iBioseqDB.setName (expName)
        obsName = iBioseqDB.name
        self.assertEquals (expName, obsName)
        
        
    def test_read(self):
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCT")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGC")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        faFN = "dummyFaFile.fa"
        faF = open( faFN, "w" )
        faF.write(">consensus1\n")
        faF.write("GAGATGGCTCATGGAGTACCTGCCT\n")
        faF.write(">consensus2\n")
        faF.write("GAGATGGCTCATGGAGTACCGC\n")
        faF.close()
        
        faF = open( faFN, "r" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.read( faF )
        faF.close()
        os.remove( faFN )
        self.assertEquals( expBioseqDB, obsBioseqDB )
        
        
    def test_write(self):
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        expFaFileName = "dummyFaFile.fa"
        expFaFile = open( expFaFileName, "w" )
        expFaFile.write(">consensus1\n")
        expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
        expFaFile.write("ATGGAGTACCTGCCT\n")
        expFaFile.write(">consensus2\n")
        expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
        expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
        expFaFile.close()
        
        obsFaFileName = "obsDummyFastaFile.fa"
        obsFaFile = open( obsFaFileName, "w" )
        iBioseqDB.write( obsFaFile )
        obsFaFile.close()
        
        self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) )
        os.remove( expFaFileName )
        os.remove( obsFaFileName )
        
        
    def test_save(self):
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        expFaFileName = "dummyFaFile.fa"
        expFaFile = open( expFaFileName, "w" )
        expFaFile.write(">consensus1\n")
        expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
        expFaFile.write("ATGGAGTACCTGCCT\n")
        expFaFile.write(">consensus2\n")
        expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
        expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
        expFaFile.close()
        
        obsFaFileName = "obsDummyFastaFile.fa"
        iBioseqDB.save( obsFaFileName )
        
        self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) )
        os.remove( expFaFileName )
        os.remove( obsFaFileName )
        
        
    def test_load(self):
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        FaFileName = "dummyFaFile.fa"
        FaFile = open( FaFileName, "w" )
        FaFile.write(">consensus1\n")
        FaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
        FaFile.write("ATGGAGTACCTGCCT\n")
        FaFile.write(">consensus2\n")
        FaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
        FaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
        FaFile.close()
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.load( FaFileName )
        
        self.assertEquals( expBioseqDB, obsBioseqDB )
        os.remove( FaFileName )
        
        
    def test_reverse( self ):
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        iBioseq2 = Bioseq( "seq2", "CGAAT" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        iBioseq3 = Bioseq( "seq1", "GTTA" )
        iBioseq4 = Bioseq( "seq2", "TAAGC" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        obsBioseqDB.reverse()
        self.assertEquals( expBioseqDB, obsBioseqDB )
        
        
    def test_complement( self ):
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        iBioseq2 = Bioseq( "seq2", "CGAAT" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        iBioseq3 = Bioseq( "seq1", "TAAC" )
        iBioseq4 = Bioseq( "seq2", "GCTTA" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        
        obsBioseqDB.complement()
        self.assertEquals( expBioseqDB, obsBioseqDB )
        
        
    def test_reverseComplement( self ):
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        iBioseq2 = Bioseq( "seq2", "CGAAT" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        iBioseq3 = Bioseq( "seq1", "CAAT" )
        iBioseq4 = Bioseq( "seq2", "ATTCG" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        
        obsBioseqDB.reverseComplement()
        self.assertEquals( expBioseqDB, obsBioseqDB )
        
        
    def test_setData(self):
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        iBioseq2 = Bioseq( "seq2", "CGAAT" )
        iBioseq3 = Bioseq( "seq3", "CAAT" )
        iBioseq4 = Bioseq( "seq4", "ATTCG" )
        
        lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4]
        expBioseqDB = BioseqDB()
        expBioseqDB.db = lBioseq
        
        iBioseq5 = Bioseq( "seq1", "ATTG" )
        iBioseq6 = Bioseq( "seq2", "CGAAT" )
        iBioseq7 = Bioseq( "seq3", "CAAT" )
        iBioseq8 = Bioseq( "seq4", "ATTCG" )
        
        lBioseq2 = [iBioseq5, iBioseq6, iBioseq7, iBioseq8]
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData(lBioseq2)
        
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_reset( self ):
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        iBioseq2 = Bioseq( "seq2", "CGAAT" )
        iBioseq3 = Bioseq( "seq3", "CAAT" )
        iBioseq4 = Bioseq( "seq4", "ATTCG" )
        
        lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4]
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData(lBioseq)
        obsBioseqDB.reset()
              
        expBioseqDB = BioseqDB()
        
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def testCleanGap(self):
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        iBioseq2 = Bioseq( "seq2", "CGAAT" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData([iBioseq1, iBioseq2])
        
        iBioseq3 = Bioseq( "seq1", "AT-----TG" )
        iBioseq4 = Bioseq( "seq2", "CGAA----T" )
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        obsBioseqDB.cleanGap()
        
        self.assertEquals(expBioseqDB, obsBioseqDB)          
        
        
    def testCleanGap_on_empty_db(self):
        expBioseqDB = BioseqDB()
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.cleanGap()
        
        self.assertEquals(expBioseqDB, obsBioseqDB)          
        
        
    def testCleanGap_on_size_one_db(self):
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        expBioseqDB = BioseqDB()
        expBioseqDB.setData([iBioseq1])
        
        iBioseq2 = Bioseq( "seq1", "AT-----TG" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData([iBioseq2])
        
        obsBioseqDB.cleanGap()
         
        self.assertEquals(expBioseqDB, obsBioseqDB)          
        
        
    def test_add_to_a_empty_bioseqDB_instance (self):
        sHeader = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
        sHeader += "(At4g29080) mRNA, complete cds."
        
        expDictIdx = { sHeader : 0}
        
        sHeaderRenamed = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
        sHeaderRenamed += "(At4g29080)_mRNA-_complete_cds."
        expDictIdxRenamed = {sHeaderRenamed : 0}
        
        iBioseq1 = Bioseq( sHeader, "ATTG" )
        obsBioseqDB = BioseqDB()
        obsBioseqDB.add(iBioseq1)
        
        obsDictIdx = obsBioseqDB.idx
        obsDictIdxRenamed = obsBioseqDB.idx_renamed
        
        self.assertEquals(expDictIdx,obsDictIdx)
        self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
        
        
    def test_add_to_a_size_one_bioseqDB_instance (self):
        sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
        sHeader1 += "(At4g29080) mRNA, complete cds."
        
        sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
        sHeader2 += "(At4g29080) mRNA, complete cds."
        
        expDictIdx = { sHeader1 : 0, sHeader2 : 1}
        
        sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
        sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds."

        sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
        sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
        
        expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1}
        
        iBioseq1 = Bioseq( sHeader1, "ATTG" )
        iBioseq2 = Bioseq( sHeader2, "ATTG" )
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData([ iBioseq1])
        obsBioseqDB.add(iBioseq2)
        
        obsDictIdx = obsBioseqDB.idx
        obsDictIdxRenamed = obsBioseqDB.idx_renamed
        
        self.assertEquals(expDictIdx,obsDictIdx)
        self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
        
        
    def test_add_to_a_size_two_bioseqDB_instance (self):
        sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
        sHeader1 += "(At4g29080) mRNA, complete cds."
        
        sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
        sHeader2 += "(At4g29080) mRNA, complete cds."
        
        sHeader3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)"
        sHeader3 += "(At4g29080) mRNA, complete cds."
        expDictIdx = { sHeader1 : 0, sHeader2 : 1, sHeader3 : 2}
        
        sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
        sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds."

        sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
        sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
        
        sHeaderRenamed3 = "embl-AF332604-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
        sHeaderRenamed3 += "(At4g29080)_mRNA-_complete_cds."
        expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1, sHeaderRenamed3 :2}
        
        iBioseq1 = Bioseq( sHeader1, "ATTG" )
        iBioseq2 = Bioseq( sHeader2, "ATTG" )
        iBioseq3 = Bioseq( sHeader3, "ATTG" )
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData([ iBioseq1, iBioseq2 ])
        obsBioseqDB.add(iBioseq3)
        
        obsDictIdx = obsBioseqDB.idx
        obsDictIdxRenamed = obsBioseqDB.idx_renamed
        
        self.assertEquals(expDictIdx,obsDictIdx)
        self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
        
        
    def test__getitem__(self):
        iBioseq1 = Bioseq("seq1","ATTG")
        iBioseq2 = Bioseq("seq2","CGAAT")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        expBioseq = Bioseq("seq2","CGAAT")
        obsBioseq = iBioseqDB[1]
        
        self.assertEquals(expBioseq, obsBioseq)
        
        
    def test_getSize(self):
        expSize = 4
        
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        iBioseq2 = Bioseq( "seq2", "CGAAT" )
        iBioseq3 = Bioseq( "seq3", "AT-----TG" )
        iBioseq4 = Bioseq( "seq4", "CGAA----T" )
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [iBioseq1, iBioseq2 , iBioseq3, iBioseq4 ] )
        obsSize = obsBioseqDB.getSize()
        
        self.assertEquals(expSize,obsSize)
        
        
    def test_getSize_emptyDB(self):
        expSize = 0
        
        obsBioseqDB = BioseqDB()
        obsSize = obsBioseqDB.getSize()
        
        self.assertEquals(expSize,obsSize)
        
        
    def test_getLength(self):
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        expLength = 163
        obsLength = iBioseqDB.getLength()
        
        self.assertEquals( expLength, obsLength)

    def test_getListOfSequencesLength(self):
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        lLength = iBioseqDB.getListOfSequencesLength()
        
        expLLengh = [75, 88]
        self.assertEquals( expLLengh, lLength )
        
        
    def test_getHeaderList( self ):
        lExpHeader = ["seq1", "seq2"]
        
        iBioseq1 = Bioseq( "seq1", "ATTG" )
        iBioseq2 = Bioseq( "seq2", "CGAAT" )
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        lObsHeader = obsBioseqDB.getHeaderList()

        self.assertEquals( lExpHeader, lObsHeader )
        
        
    def test_getSequencesList( self ):
        lExpSeqs = ["ATGC", "AATTCCGG"]
        
        iBioseq1 = Bioseq("seq1", "ATGC")
        iBioseq2 = Bioseq("seq2", "AATTCCGG")
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData([iBioseq1, iBioseq2])
        
        lObsSeqs = obsBioseqDB.getSequencesList()

        self.assertEquals(lExpSeqs, lObsSeqs)
        
        
    def test_fetch( self ):
        ibioseq1 = Bioseq( "seq1", "ATTG" )
        ibioseq2 = Bioseq( "seq2", "CGAAT" )
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ ibioseq1, ibioseq2 ] )
        expBioseq = ibioseq1
        obsBioseq = iBioseqDB.fetch( "seq1" )
        self.assertEquals( expBioseq, obsBioseq )
        
        
    def test_getBioseqByRenamedHeader( self ):
        Header1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
        Header1 += "(At4g29080) mRNA, complete cds."
        
        Header2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
        Header2 += "(At4g29080) mRNA, complete cds."
        
        Header3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)"
        Header3 += "(At4g29080) mRNA, complete cds."
        
        HeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
        HeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
        
        ibioseq1 = Bioseq( Header1, "ATTG" )
        ibioseq2 = Bioseq( Header2, "CGAAT" )
        ibioseq3 = Bioseq( Header3, "TGCGAAT" )
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ ibioseq1, ibioseq2, ibioseq3 ] )
        expBioseq = ibioseq2
        
        obsBioseq = iBioseqDB.getBioseqByRenamedHeader( HeaderRenamed2  )
        
        self.assertEquals( expBioseq, obsBioseq )
        
        
    def test_init_with_the_parm_name( self ):
        iBioseq1 = Bioseq("seq1","ATTG")
        iBioseq2 = Bioseq("seq2","CGAAT")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        fastaFilename = "dummyBioseqDB.fa"
        f = open(fastaFilename, "w")
        f.write(">seq1\n")
        f.write("ATTG\n")
        f.write(">seq2\n")
        f.write("CGAAT\n")
        f.close()
        
        obsBioseqDB = BioseqDB(fastaFilename)
        os.remove(fastaFilename)
        self.assertEquals( expBioseqDB, obsBioseqDB )
        
        
    def test_countNt(self):
        iBioseq1 = Bioseq()
        iBioseq1.header = "seq1 description1"
        iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
        iBioseq2 = Bioseq()
        iBioseq2.header = "seq2 description2"
        iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        expCount = 6
        obsCount = iBioseqDB.countNt('N')
        self.assertEquals(expCount, obsCount)
        
    def test_countNt_lowercase(self):
        iBioseq1 = Bioseq()
        iBioseq1.header = "seq1 description1"
        iBioseq1.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg"
        iBioseq2 = Bioseq()
        iBioseq2.header = "seq2 description2"
        iBioseq2.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg"
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        expCount = 0
        obsCount = iBioseqDB.countNt('N')
        self.assertEquals(expCount, obsCount)
        
        
    def test_countNt_withCharacterNotExisting(self):
        iBioseq1 = Bioseq()
        iBioseq1.header = "seq1 description1"
        iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
        iBioseq2 = Bioseq()
        iBioseq2.header = "seq2 description2"
        iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        expCount = 0
        obsCount = iBioseqDB.countNt('W')
        self.assertEquals(expCount, obsCount)
        
        
    def test_countAllNt(self):
        iBioseq1 = Bioseq()
        iBioseq1.header = "seq1 description1"
        iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
        iBioseq2 = Bioseq()
        iBioseq2.header = "seq2 description2"
        iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        
        dExpCount = {'A': 68, 'C': 62, 'T': 86, 'G': 70, 'N': 6}
        
        dObsCount = iBioseqDB.countAllNt()
        self.assertEquals(dExpCount, dObsCount)
        
        
    def test_extractPart(self):    
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
                    
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4 ] )

        iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
                 
        expSubBioseqDB = BioseqDB()
        expSubBioseqDB.setData( [ iBioseq5, iBioseq6 ] )
        
        obsSubBioseqDB = iBioseqDB.extractPart (1, 2)
        
        self.assertEquals(expSubBioseqDB, obsSubBioseqDB)
        
        
    def test_bestLength(self):
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
        iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq5 = Bioseq("consensus5","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq7 = Bioseq("consensus7","TGCCTGATGGCTCATGGAGTACCTGCCT")
                    
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4, iBioseq5, iBioseq6 , iBioseq7] )
        
        iBioseq8 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
        iBioseq9 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq10 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq8, iBioseq9, iBioseq10, iBioseq11] )
        
        obsBioseqDB = iBioseqDB.bestLength (4)
        
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_bestLength_with_a_none_sequence_include(self):
        iBioseq1 = Bioseq("consensus1", None)
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
                    
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
        
        iBioseq4 = Bioseq("consensus1", None)
        iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] )
        
        obsBioseqDB = iBioseqDB.bestLength (3)
        
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_bestLength_with_a_none_sequence_not_include(self):
        iBioseq1 = Bioseq("consensus1", None)
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
                    
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
        
        iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq5, iBioseq6] )
        
        obsBioseqDB = iBioseqDB.bestLength (2)
        
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_bestLength_number_of_bioseq_requiered_gt_BioseqDB_size(self):
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
                    
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
        
        iBioseq4 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
        iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] )
        
        obsBioseqDB = iBioseqDB.bestLength (15)
        
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_extractPatternOfFile(self):
        fastaFilename = "dummyBioseqDB.fa"
        f = open(fastaFilename, "w")
        f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n")
        f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
        f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n")
        f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
        f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n")
        f.close()
        
        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
        iBioseq2 = Bioseq("consensus11","TGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq1, iBioseq2] )
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.extractPatternOfFile("consensus1+" , fastaFilename)
        os.remove(fastaFilename)
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_extractPatternOfFile_WithNoExistingPattern(self):
        fastaFilename = "dummyBioseqDB.fa"
        f = open(fastaFilename, "w")
        f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n")
        f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
        f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n")
        f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
        f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n")
        f.close()
        
        expBioseqDB = BioseqDB()
        
        obsBioseqDB = BioseqDB()
        obsBioseqDB.extractPatternOfFile("NoExistingPattern" , fastaFilename)
        os.remove(fastaFilename)
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_getByPattern (self):
        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
        
        iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq5, iBioseq6] )
       
        obsBioseqDB = iBioseqDB.getByPattern("consensus1+")
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_getByPattern_with_no_existing_pattern (self):
        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
        
        expBioseqDB = BioseqDB()
       
        obsBioseqDB = iBioseqDB.getByPattern("noExistingPattern+")
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_getDiffFromPattern (self):
        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
        
        iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq5, iBioseq6] )
       
        obsBioseqDB = iBioseqDB.getDiffFromPattern("consensus[4|6]")
        
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_getDiffFromPattern_with_no_existing_pattern (self):
        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        iBioseqDB = BioseqDB()
        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
        
        iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8] )
       
        obsBioseqDB = iBioseqDB.getDiffFromPattern("noExistingPattern+")
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_rmByPattern (self):
        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
        
        iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq5, iBioseq6 ] )
       
        obsBioseqDB.rmByPattern("consensus1+")
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_rmByPattern_with_no_existing_pattern (self):
        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
        
        iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8 ] )
        obsBioseqDB.rmByPattern("noExistingPattern+")
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_addBioseqFromABioseqDBIfHeaderContainPattern (self):
        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
        
        iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        inBioseqDB = BioseqDB()
        inBioseqDB.setData( [ iBioseq5, iBioseq6 ])

        iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10, iBioseq11] )
       
        obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB)
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self):
        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
        
        iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        inBioseqDB = BioseqDB()
        inBioseqDB.setData( [ iBioseq5, iBioseq6 ])

        iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
        iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
        iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
        iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
        
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] )
       
        obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB)
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_upCase (self):
        iBioseq1 = Bioseq("consensus4","atgacGatgca")
        iBioseq2 = Bioseq("consensus1","atgcgaT")
        obsBioseqDB = BioseqDB()
        obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
        iBioseq3 = Bioseq("consensus4","ATGACGATGCA")
        iBioseq4 = Bioseq("consensus1","ATGCGAT")
        expBioseqDB = BioseqDB()
        expBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
        obsBioseqDB.upCase()
        self.assertEquals(expBioseqDB, obsBioseqDB)
        
        
    def test_getMap(self):
        iBioseq1 = Bioseq("header1","ATGC-RA-GCT")
        iBioseq2 = Bioseq("header2","-TGC-RA-GCT")
        iBioseq3 = Bioseq("header3","ATGC-RA-GC-")

        iAlignedBioseqDB = BioseqDB()
        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
        
        obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps()
        
        expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )]
        expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )]
        expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )]    
        
        expDict = {
                   "header1": expLMap1,
                   "header2": expLMap2,
                   "header3": expLMap3
                   } 
        
        self.assertEquals(expDict, obsDict)

    def test_getSeqLengthByListOfName(self):
        iBioseq1 = Bioseq("header1","ATGC-RA-GCT")
        iBioseq2 = Bioseq("header2","-TGC-RAR")
        iBioseq3 = Bioseq("header3","ATGC")

        iBioseqDB = BioseqDB()
        iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
        
        expList =  [11, 4]
        obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"])
        
        self.assertEquals( expList, obsList )        
        
test_suite = unittest.TestSuite()
test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) )
if __name__ == "__main__":
    unittest.TextTestRunner(verbosity=2).run( test_suite )