diff smart_toolShed/commons/core/seq/test/Test_BioseqDB.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/commons/core/seq/test/Test_BioseqDB.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,974 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import os
+import time
+from commons.core.seq.BioseqDB import BioseqDB
+from commons.core.seq.Bioseq import Bioseq
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.coord.Map import Map
+
+
+class Test_BioseqDB( unittest.TestCase ):
+    
+    def setUp( self ):
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
+        
+        
+    def tearDown( self ):
+        if os._exists("dummyBioseqDB.fa"):
+            os.remove("dummyBioseqDB.fa")
+            
+            
+    def test__eq__(self):
+        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        
+        self.assertEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test__eq__instances_with_different_header(self):
+        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        
+        self.assertNotEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test__eq__instances_with_different_sequences(self):
+        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        
+        self.assertNotEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test__eq__instances_with_different_sequences_and_headers(self):
+        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        
+        self.assertNotEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test__eq__instances_with_different_sizeOfBioseq(self):
+        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3 ] )
+        
+        self.assertNotEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test_setName (self): 
+        expName = "myDataBank"
+        iBioseqDB = BioseqDB()
+        self.assertEquals (iBioseqDB.name, "")
+        
+        iBioseqDB.setName (expName)
+        obsName = iBioseqDB.name
+        self.assertEquals (expName, obsName)
+        
+        
+    def test_read(self):
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGC")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        faFN = "dummyFaFile.fa"
+        faF = open( faFN, "w" )
+        faF.write(">consensus1\n")
+        faF.write("GAGATGGCTCATGGAGTACCTGCCT\n")
+        faF.write(">consensus2\n")
+        faF.write("GAGATGGCTCATGGAGTACCGC\n")
+        faF.close()
+        
+        faF = open( faFN, "r" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.read( faF )
+        faF.close()
+        os.remove( faFN )
+        self.assertEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test_write(self):
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        expFaFileName = "dummyFaFile.fa"
+        expFaFile = open( expFaFileName, "w" )
+        expFaFile.write(">consensus1\n")
+        expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
+        expFaFile.write("ATGGAGTACCTGCCT\n")
+        expFaFile.write(">consensus2\n")
+        expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
+        expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
+        expFaFile.close()
+        
+        obsFaFileName = "obsDummyFastaFile.fa"
+        obsFaFile = open( obsFaFileName, "w" )
+        iBioseqDB.write( obsFaFile )
+        obsFaFile.close()
+        
+        self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) )
+        os.remove( expFaFileName )
+        os.remove( obsFaFileName )
+        
+        
+    def test_save(self):
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        expFaFileName = "dummyFaFile.fa"
+        expFaFile = open( expFaFileName, "w" )
+        expFaFile.write(">consensus1\n")
+        expFaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
+        expFaFile.write("ATGGAGTACCTGCCT\n")
+        expFaFile.write(">consensus2\n")
+        expFaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
+        expFaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
+        expFaFile.close()
+        
+        obsFaFileName = "obsDummyFastaFile.fa"
+        iBioseqDB.save( obsFaFileName )
+        
+        self.assertTrue( FileUtils.are2FilesIdentical(expFaFileName, obsFaFileName) )
+        os.remove( expFaFileName )
+        os.remove( obsFaFileName )
+        
+        
+    def test_load(self):
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        FaFileName = "dummyFaFile.fa"
+        FaFile = open( FaFileName, "w" )
+        FaFile.write(">consensus1\n")
+        FaFile.write("GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTC\n")
+        FaFile.write("ATGGAGTACCTGCCT\n")
+        FaFile.write(">consensus2\n")
+        FaFile.write("GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAG\n")
+        FaFile.write("TACCGCGAGATGGCTCATGGAGTACCGC\n")
+        FaFile.close()
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.load( FaFileName )
+        
+        self.assertEquals( expBioseqDB, obsBioseqDB )
+        os.remove( FaFileName )
+        
+        
+    def test_reverse( self ):
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        iBioseq2 = Bioseq( "seq2", "CGAAT" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iBioseq3 = Bioseq( "seq1", "GTTA" )
+        iBioseq4 = Bioseq( "seq2", "TAAGC" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        obsBioseqDB.reverse()
+        self.assertEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test_complement( self ):
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        iBioseq2 = Bioseq( "seq2", "CGAAT" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iBioseq3 = Bioseq( "seq1", "TAAC" )
+        iBioseq4 = Bioseq( "seq2", "GCTTA" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        
+        obsBioseqDB.complement()
+        self.assertEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test_reverseComplement( self ):
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        iBioseq2 = Bioseq( "seq2", "CGAAT" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        iBioseq3 = Bioseq( "seq1", "CAAT" )
+        iBioseq4 = Bioseq( "seq2", "ATTCG" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        
+        obsBioseqDB.reverseComplement()
+        self.assertEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test_setData(self):
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        iBioseq2 = Bioseq( "seq2", "CGAAT" )
+        iBioseq3 = Bioseq( "seq3", "CAAT" )
+        iBioseq4 = Bioseq( "seq4", "ATTCG" )
+        
+        lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4]
+        expBioseqDB = BioseqDB()
+        expBioseqDB.db = lBioseq
+        
+        iBioseq5 = Bioseq( "seq1", "ATTG" )
+        iBioseq6 = Bioseq( "seq2", "CGAAT" )
+        iBioseq7 = Bioseq( "seq3", "CAAT" )
+        iBioseq8 = Bioseq( "seq4", "ATTCG" )
+        
+        lBioseq2 = [iBioseq5, iBioseq6, iBioseq7, iBioseq8]
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData(lBioseq2)
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_reset( self ):
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        iBioseq2 = Bioseq( "seq2", "CGAAT" )
+        iBioseq3 = Bioseq( "seq3", "CAAT" )
+        iBioseq4 = Bioseq( "seq4", "ATTCG" )
+        
+        lBioseq = [iBioseq1, iBioseq2, iBioseq3, iBioseq4]
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData(lBioseq)
+        obsBioseqDB.reset()
+              
+        expBioseqDB = BioseqDB()
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def testCleanGap(self):
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        iBioseq2 = Bioseq( "seq2", "CGAAT" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData([iBioseq1, iBioseq2])
+        
+        iBioseq3 = Bioseq( "seq1", "AT-----TG" )
+        iBioseq4 = Bioseq( "seq2", "CGAA----T" )
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        obsBioseqDB.cleanGap()
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)          
+        
+        
+    def testCleanGap_on_empty_db(self):
+        expBioseqDB = BioseqDB()
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.cleanGap()
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)          
+        
+        
+    def testCleanGap_on_size_one_db(self):
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData([iBioseq1])
+        
+        iBioseq2 = Bioseq( "seq1", "AT-----TG" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData([iBioseq2])
+        
+        obsBioseqDB.cleanGap()
+         
+        self.assertEquals(expBioseqDB, obsBioseqDB)          
+        
+        
+    def test_add_to_a_empty_bioseqDB_instance (self):
+        sHeader = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        sHeader += "(At4g29080) mRNA, complete cds."
+        
+        expDictIdx = { sHeader : 0}
+        
+        sHeaderRenamed = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
+        sHeaderRenamed += "(At4g29080)_mRNA-_complete_cds."
+        expDictIdxRenamed = {sHeaderRenamed : 0}
+        
+        iBioseq1 = Bioseq( sHeader, "ATTG" )
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.add(iBioseq1)
+        
+        obsDictIdx = obsBioseqDB.idx
+        obsDictIdxRenamed = obsBioseqDB.idx_renamed
+        
+        self.assertEquals(expDictIdx,obsDictIdx)
+        self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
+        
+        
+    def test_add_to_a_size_one_bioseqDB_instance (self):
+        sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        sHeader1 += "(At4g29080) mRNA, complete cds."
+        
+        sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        sHeader2 += "(At4g29080) mRNA, complete cds."
+        
+        expDictIdx = { sHeader1 : 0, sHeader2 : 1}
+        
+        sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
+        sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds."
+
+        sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
+        sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
+        
+        expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1}
+        
+        iBioseq1 = Bioseq( sHeader1, "ATTG" )
+        iBioseq2 = Bioseq( sHeader2, "ATTG" )
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData([ iBioseq1])
+        obsBioseqDB.add(iBioseq2)
+        
+        obsDictIdx = obsBioseqDB.idx
+        obsDictIdxRenamed = obsBioseqDB.idx_renamed
+        
+        self.assertEquals(expDictIdx,obsDictIdx)
+        self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
+        
+        
+    def test_add_to_a_size_two_bioseqDB_instance (self):
+        sHeader1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        sHeader1 += "(At4g29080) mRNA, complete cds."
+        
+        sHeader2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        sHeader2 += "(At4g29080) mRNA, complete cds."
+        
+        sHeader3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        sHeader3 += "(At4g29080) mRNA, complete cds."
+        expDictIdx = { sHeader1 : 0, sHeader2 : 1, sHeader3 : 2}
+        
+        sHeaderRenamed1 = "embl-AF332402-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
+        sHeaderRenamed1 += "(At4g29080)_mRNA-_complete_cds."
+
+        sHeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
+        sHeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
+        
+        sHeaderRenamed3 = "embl-AF332604-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
+        sHeaderRenamed3 += "(At4g29080)_mRNA-_complete_cds."
+        expDictIdxRenamed = {sHeaderRenamed1 : 0, sHeaderRenamed2 : 1, sHeaderRenamed3 :2}
+        
+        iBioseq1 = Bioseq( sHeader1, "ATTG" )
+        iBioseq2 = Bioseq( sHeader2, "ATTG" )
+        iBioseq3 = Bioseq( sHeader3, "ATTG" )
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData([ iBioseq1, iBioseq2 ])
+        obsBioseqDB.add(iBioseq3)
+        
+        obsDictIdx = obsBioseqDB.idx
+        obsDictIdxRenamed = obsBioseqDB.idx_renamed
+        
+        self.assertEquals(expDictIdx,obsDictIdx)
+        self.assertEquals(expDictIdxRenamed,obsDictIdxRenamed)
+        
+        
+    def test__getitem__(self):
+        iBioseq1 = Bioseq("seq1","ATTG")
+        iBioseq2 = Bioseq("seq2","CGAAT")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        expBioseq = Bioseq("seq2","CGAAT")
+        obsBioseq = iBioseqDB[1]
+        
+        self.assertEquals(expBioseq, obsBioseq)
+        
+        
+    def test_getSize(self):
+        expSize = 4
+        
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        iBioseq2 = Bioseq( "seq2", "CGAAT" )
+        iBioseq3 = Bioseq( "seq3", "AT-----TG" )
+        iBioseq4 = Bioseq( "seq4", "CGAA----T" )
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [iBioseq1, iBioseq2 , iBioseq3, iBioseq4 ] )
+        obsSize = obsBioseqDB.getSize()
+        
+        self.assertEquals(expSize,obsSize)
+        
+        
+    def test_getSize_emptyDB(self):
+        expSize = 0
+        
+        obsBioseqDB = BioseqDB()
+        obsSize = obsBioseqDB.getSize()
+        
+        self.assertEquals(expSize,obsSize)
+        
+        
+    def test_getLength(self):
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        expLength = 163
+        obsLength = iBioseqDB.getLength()
+        
+        self.assertEquals( expLength, obsLength)
+
+    def test_getListOfSequencesLength(self):
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        lLength = iBioseqDB.getListOfSequencesLength()
+        
+        expLLengh = [75, 88]
+        self.assertEquals( expLLengh, lLength )
+        
+        
+    def test_getHeaderList( self ):
+        lExpHeader = ["seq1", "seq2"]
+        
+        iBioseq1 = Bioseq( "seq1", "ATTG" )
+        iBioseq2 = Bioseq( "seq2", "CGAAT" )
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        lObsHeader = obsBioseqDB.getHeaderList()
+
+        self.assertEquals( lExpHeader, lObsHeader )
+        
+        
+    def test_getSequencesList( self ):
+        lExpSeqs = ["ATGC", "AATTCCGG"]
+        
+        iBioseq1 = Bioseq("seq1", "ATGC")
+        iBioseq2 = Bioseq("seq2", "AATTCCGG")
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData([iBioseq1, iBioseq2])
+        
+        lObsSeqs = obsBioseqDB.getSequencesList()
+
+        self.assertEquals(lExpSeqs, lObsSeqs)
+        
+        
+    def test_fetch( self ):
+        ibioseq1 = Bioseq( "seq1", "ATTG" )
+        ibioseq2 = Bioseq( "seq2", "CGAAT" )
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ ibioseq1, ibioseq2 ] )
+        expBioseq = ibioseq1
+        obsBioseq = iBioseqDB.fetch( "seq1" )
+        self.assertEquals( expBioseq, obsBioseq )
+        
+        
+    def test_getBioseqByRenamedHeader( self ):
+        Header1 = "embl::AF332402:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        Header1 += "(At4g29080) mRNA, complete cds."
+        
+        Header2 = "embl::AF332503:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        Header2 += "(At4g29080) mRNA, complete cds."
+        
+        Header3 = "embl::AF332604:AF332402 Arabidopsis thaliana clone C00024 (f)"
+        Header3 += "(At4g29080) mRNA, complete cds."
+        
+        HeaderRenamed2 = "embl-AF332503-AF332402_Arabidopsis_thaliana_clone_C00024_(f)"
+        HeaderRenamed2 += "(At4g29080)_mRNA-_complete_cds."
+        
+        ibioseq1 = Bioseq( Header1, "ATTG" )
+        ibioseq2 = Bioseq( Header2, "CGAAT" )
+        ibioseq3 = Bioseq( Header3, "TGCGAAT" )
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ ibioseq1, ibioseq2, ibioseq3 ] )
+        expBioseq = ibioseq2
+        
+        obsBioseq = iBioseqDB.getBioseqByRenamedHeader( HeaderRenamed2  )
+        
+        self.assertEquals( expBioseq, obsBioseq )
+        
+        
+    def test_init_with_the_parm_name( self ):
+        iBioseq1 = Bioseq("seq1","ATTG")
+        iBioseq2 = Bioseq("seq2","CGAAT")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        fastaFilename = "dummyBioseqDB.fa"
+        f = open(fastaFilename, "w")
+        f.write(">seq1\n")
+        f.write("ATTG\n")
+        f.write(">seq2\n")
+        f.write("CGAAT\n")
+        f.close()
+        
+        obsBioseqDB = BioseqDB(fastaFilename)
+        os.remove(fastaFilename)
+        self.assertEquals( expBioseqDB, obsBioseqDB )
+        
+        
+    def test_countNt(self):
+        iBioseq1 = Bioseq()
+        iBioseq1.header = "seq1 description1"
+        iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
+        iBioseq2 = Bioseq()
+        iBioseq2.header = "seq2 description2"
+        iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        expCount = 6
+        obsCount = iBioseqDB.countNt('N')
+        self.assertEquals(expCount, obsCount)
+        
+    def test_countNt_lowercase(self):
+        iBioseq1 = Bioseq()
+        iBioseq1.header = "seq1 description1"
+        iBioseq1.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg"
+        iBioseq2 = Bioseq()
+        iBioseq2.header = "seq2 description2"
+        iBioseq2.sequence = "gcgncgctgctttattaagcgctagcatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcgatgcgncgctgctttattaagcgctagcgattatatagcagacgcatattatattgcgcg"
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        expCount = 0
+        obsCount = iBioseqDB.countNt('N')
+        self.assertEquals(expCount, obsCount)
+        
+        
+    def test_countNt_withCharacterNotExisting(self):
+        iBioseq1 = Bioseq()
+        iBioseq1.header = "seq1 description1"
+        iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
+        iBioseq2 = Bioseq()
+        iBioseq2.header = "seq2 description2"
+        iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        expCount = 0
+        obsCount = iBioseqDB.countNt('W')
+        self.assertEquals(expCount, obsCount)
+        
+        
+    def test_countAllNt(self):
+        iBioseq1 = Bioseq()
+        iBioseq1.header = "seq1 description1"
+        iBioseq1.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
+        iBioseq2 = Bioseq()
+        iBioseq2.header = "seq2 description2"
+        iBioseq2.sequence = "GCGNCGCTGCTTTATTAAGCGCTAGCATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCGATGCGNCGCTGCTTTATTAAGCGCTAGCGATTATATAGCAGACGCATATTATATTGCGCG"
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        
+        dExpCount = {'A': 68, 'C': 62, 'T': 86, 'G': 70, 'N': 6}
+        
+        dObsCount = iBioseqDB.countAllNt()
+        self.assertEquals(dExpCount, dObsCount)
+        
+        
+    def test_extractPart(self):    
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+                    
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4 ] )
+
+        iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACCTGCCTTGCATGACTGCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACCTGCCT")
+                 
+        expSubBioseqDB = BioseqDB()
+        expSubBioseqDB.setData( [ iBioseq5, iBioseq6 ] )
+        
+        obsSubBioseqDB = iBioseqDB.extractPart (1, 2)
+        
+        self.assertEquals(expSubBioseqDB, obsSubBioseqDB)
+        
+        
+    def test_bestLength(self):
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
+        iBioseq4 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq5 = Bioseq("consensus5","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq7 = Bioseq("consensus7","TGCCTGATGGCTCATGGAGTACCTGCCT")
+                    
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4, iBioseq5, iBioseq6 , iBioseq7] )
+        
+        iBioseq8 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
+        iBioseq9 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq10 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq8, iBioseq9, iBioseq10, iBioseq11] )
+        
+        obsBioseqDB = iBioseqDB.bestLength (4)
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_bestLength_with_a_none_sequence_include(self):
+        iBioseq1 = Bioseq("consensus1", None)
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
+                    
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
+        
+        iBioseq4 = Bioseq("consensus1", None)
+        iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] )
+        
+        obsBioseqDB = iBioseqDB.bestLength (3)
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_bestLength_with_a_none_sequence_not_include(self):
+        iBioseq1 = Bioseq("consensus1", None)
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
+                    
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
+        
+        iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq5, iBioseq6] )
+        
+        obsBioseqDB = iBioseqDB.bestLength (2)
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_bestLength_number_of_bioseq_requiered_gt_BioseqDB_size(self):
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
+        iBioseq2 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq3 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
+                    
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3] )
+        
+        iBioseq4 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
+        iBioseq5 = Bioseq("consensus2","GAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus3","GAGATGGCTCATGGAGTACC")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq4, iBioseq5, iBioseq6] )
+        
+        obsBioseqDB = iBioseqDB.bestLength (15)
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_extractPatternOfFile(self):
+        fastaFilename = "dummyBioseqDB.fa"
+        f = open(fastaFilename, "w")
+        f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n")
+        f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
+        f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n")
+        f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
+        f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n")
+        f.close()
+        
+        iBioseq1 = Bioseq("consensus1","GAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC")
+        iBioseq2 = Bioseq("consensus11","TGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq1, iBioseq2] )
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.extractPatternOfFile("consensus1+" , fastaFilename)
+        os.remove(fastaFilename)
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_extractPatternOfFile_WithNoExistingPattern(self):
+        fastaFilename = "dummyBioseqDB.fa"
+        f = open(fastaFilename, "w")
+        f.write(">consensus1\nGAGATGGCTCATGGAGTACCTGCCTGAGATGGCTCATGGAGTACC\n")
+        f.write(">consensus2\nGAGATGGCTCATGGAGTACCGCGAGACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
+        f.write(">consensus3\nGAGATGGCTCATGGAGTACC\n")
+        f.write(">consensus4\nGAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC\n")
+        f.write(">consensus11\nTGCCTGAGATGGCTCATGGAGTACCTGCCTTGCCTTGCATGACTGCATGGAGTACCTGCCTGTGCCTGATGGCTCATGGAGTACCTGCCT\n")
+        f.close()
+        
+        expBioseqDB = BioseqDB()
+        
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.extractPatternOfFile("NoExistingPattern" , fastaFilename)
+        os.remove(fastaFilename)
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_getByPattern (self):
+        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
+        
+        iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq5, iBioseq6] )
+       
+        obsBioseqDB = iBioseqDB.getByPattern("consensus1+")
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_getByPattern_with_no_existing_pattern (self):
+        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
+        
+        expBioseqDB = BioseqDB()
+       
+        obsBioseqDB = iBioseqDB.getByPattern("noExistingPattern+")
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_getDiffFromPattern (self):
+        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
+        
+        iBioseq5 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq6 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq5, iBioseq6] )
+       
+        obsBioseqDB = iBioseqDB.getDiffFromPattern("consensus[4|6]")
+        
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_getDiffFromPattern_with_no_existing_pattern (self):
+        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
+        
+        iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8] )
+       
+        obsBioseqDB = iBioseqDB.getDiffFromPattern("noExistingPattern+")
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_rmByPattern (self):
+        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
+        
+        iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq5, iBioseq6 ] )
+       
+        obsBioseqDB.rmByPattern("consensus1+")
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_rmByPattern_with_no_existing_pattern (self):
+        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq3 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
+        
+        iBioseq5 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq7 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq8 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq5, iBioseq6, iBioseq7, iBioseq8 ] )
+        obsBioseqDB.rmByPattern("noExistingPattern+")
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_addBioseqFromABioseqDBIfHeaderContainPattern (self):
+        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
+        
+        iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        inBioseqDB = BioseqDB()
+        inBioseqDB.setData( [ iBioseq5, iBioseq6 ])
+
+        iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        iBioseq11 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10, iBioseq11] )
+       
+        obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB)
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self):
+        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )
+        
+        iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        inBioseqDB = BioseqDB()
+        inBioseqDB.setData( [ iBioseq5, iBioseq6 ])
+
+        iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")
+        iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")
+        iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")
+        iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")
+        
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] )
+       
+        obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB)
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_upCase (self):
+        iBioseq1 = Bioseq("consensus4","atgacGatgca")
+        iBioseq2 = Bioseq("consensus1","atgcgaT")
+        obsBioseqDB = BioseqDB()
+        obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )
+        iBioseq3 = Bioseq("consensus4","ATGACGATGCA")
+        iBioseq4 = Bioseq("consensus1","ATGCGAT")
+        expBioseqDB = BioseqDB()
+        expBioseqDB.setData( [ iBioseq3, iBioseq4 ] )
+        obsBioseqDB.upCase()
+        self.assertEquals(expBioseqDB, obsBioseqDB)
+        
+        
+    def test_getMap(self):
+        iBioseq1 = Bioseq("header1","ATGC-RA-GCT")
+        iBioseq2 = Bioseq("header2","-TGC-RA-GCT")
+        iBioseq3 = Bioseq("header3","ATGC-RA-GC-")
+
+        iAlignedBioseqDB = BioseqDB()
+        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+        
+        obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps()
+        
+        expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )]
+        expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )]
+        expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )]    
+        
+        expDict = {
+                   "header1": expLMap1,
+                   "header2": expLMap2,
+                   "header3": expLMap3
+                   } 
+        
+        self.assertEquals(expDict, obsDict)
+
+    def test_getSeqLengthByListOfName(self):
+        iBioseq1 = Bioseq("header1","ATGC-RA-GCT")
+        iBioseq2 = Bioseq("header2","-TGC-RAR")
+        iBioseq3 = Bioseq("header3","ATGC")
+
+        iBioseqDB = BioseqDB()
+        iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])
+        
+        expList =  [11, 4]
+        obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"])
+        
+        self.assertEquals( expList, obsList )        
+        
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )