Mercurial > repos > yufei-luo > s_mart
diff smart_toolShed/commons/core/seq/test/Test_AlignedBioseqDB.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/commons/core/seq/test/Test_AlignedBioseqDB.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,773 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + + +import unittest +import sys +import os +import time +from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB +from commons.core.seq.Bioseq import Bioseq +from commons.core.utils.FileUtils import FileUtils +from commons.core.coord.Align import Align +from commons.core.coord.Range import Range +from commons.core.stat.Stat import Stat + + +class Test_AlignedBioseqDB( unittest.TestCase ): + + def setUp( self ): + self._i = AlignedBioseqDB() + self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() ) + + + def tearDown( self ): + self._i = None + self._uniqId = "" + + + def test_getLength(self): + iAlignedBioseqDB = AlignedBioseqDB() + + iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" ) + iAlignedBioseqDB.setData([iBioseq1]) + + expLenght = 29 + obsLength = iAlignedBioseqDB.getLength() + + self.assertEquals(expLenght, obsLength) + + + def test_getSeqLengthWithoutGaps( self ): + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.add( Bioseq( "seq3", + "AGCG-GACGATGCAGCAT--GCGAATGA--CGAT" ) ) + expLenght = 29 + obsLength = iAlignedBioseqDB.getSeqLengthWithoutGaps( "seq3" ) + + self.assertEquals(expLenght, obsLength) + + + def test_getListOccPerSite(self): + iBioseq1 = Bioseq( "seq1", "AGAAA") + iBioseq2 = Bioseq( "seq2", "TCAAG") + iBioseq3 = Bioseq( "seq3", "GGTAC") + iBioseq4 = Bioseq( "seq4", "CCTTA") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expList = [ + + {"A":1, "T":1, "G":1, "C":1}, + + {"G":2, "C":2}, + + {"A":2, "T":2 }, + + {"A":3, "T":1 }, + + {"A":2, "G":1, "C":1} + ] + + obsList = iAlignedBioseqDB.getListOccPerSite() + + self.assertEquals(expList, obsList) + + + def test_getListOccPerSite_with_none_sequence(self): + iBioseq1 = Bioseq( "seq1", "AGAAA") + iBioseq2 = Bioseq( "seq2", "TCAAG") + iBioseq3 = Bioseq( "seq3", "GGTAC") + iBioseq4 = Bioseq( "seq4", None) + iBioseq5 = Bioseq( "seq5", "CCTTA") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4, iBioseq5]) + + expList = [ + {"A":1, "T":1, "G":1}, + {"G":2, "C":1}, + {"A":2, "T":1 }, + {"A":3}, + {"A":1, "G":1, "C":1}, + ] + + obsList = iAlignedBioseqDB.getListOccPerSite() + + self.assertEquals(expList, obsList) + + + def test_getListOccPerSite_on_three_sequence(self): + iBioseq1 = Bioseq( "seq1", "AGAAA") + iBioseq2 = Bioseq( "seq2", "TCAAG") + iBioseq3 = Bioseq( "seq3", "GGTAC") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) + + expList = [ + {"A":1, "T":1, "G":1}, + {"G":2, "C":1}, + {"A":2, "T":1 }, + {"A":3}, + {"A":1, "G":1, "C":1}, + ] + + obsList = iAlignedBioseqDB.getListOccPerSite() + + self.assertEquals(expList, obsList) + + + def test_getConsensus_with_minNbNt_greater_than_nbInSeq(self): + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "TGCA") + iBioseq3 = Bioseq("seq3", "TACT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) + + expConsensus = Bioseq("consensus= length=4 nbAlign=3","TGCT") + obsConsensus = iAlignedBioseqDB.getConsensus(5) + + self.assertEquals(expConsensus, obsConsensus) + + + def test_getConsensus_with_minPropNt_greater_than_1(self): + isSysExitRaised = False + + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "TGCA") + iBioseq3 = Bioseq("seq3", "TACT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) + + expFileName = "expFileName" + expFileHandler = open(expFileName,"w") + expMessage = "ERROR: minPropNt=2.00 should be a proportion (below 1.0)\n" + expFileHandler.write(expMessage) + expFileHandler.close() + + obsFileName = "obsFileName" + obsFileHandler = open(obsFileName,"w") + + stdoutRef = sys.stdout + sys.stdout = obsFileHandler + + try: + iAlignedBioseqDB.getConsensus(2,2) + except SystemExit: + isSysExitRaised = True + + obsFileHandler.close() + expFileHandler.close() + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + self.assertTrue( isSysExitRaised ) + + sys.stdout = stdoutRef + os.remove ( obsFileName ) + os.remove ( expFileName ) + stdoutRef = sys.stdout + + + def test_getConsensus_with_AlignBioseqInstance_size_1(self): + isSysExitRaised = False + + iBioseq1 = Bioseq("seq1", "AGAT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1]) + + expFileName = "expFileName" + expFileHandler = open(expFileName,"w") + expMessage = "ERROR: can't make a consensus with less than 2 sequences\n" + expFileHandler.write(expMessage) + expFileHandler.close() + + obsFileName = "obsFileName" + obsFileHandler = open(obsFileName,"w") + + stdoutRef = sys.stdout + sys.stdout = obsFileHandler + + try: + iAlignedBioseqDB.getConsensus(4) + except SystemExit: + isSysExitRaised = True + + obsFileHandler.close() + expFileHandler.close() + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + self.assertTrue( isSysExitRaised ) + + sys.stdout = stdoutRef + os.remove ( obsFileName ) + os.remove ( expFileName ) + stdoutRef = sys.stdout + + + def test_getConsensus_with_gap_assertion_on_warning_msg(self): + iBioseq1 = Bioseq("seq1", "A-GA-T") + iBioseq2 = Bioseq("seq2", "T-GC-A") + iBioseq3 = Bioseq("seq3", "T-AC-T") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) + + expFileName = "expFileName" + expFileHandler = open(expFileName,"w") + expMessage = "WARNING: 2 sites were removed (33.33%)\n" + expFileHandler.write(expMessage) + expFileHandler.close() + + obsFileName = "obsFileName" + obsFileHandler = open(obsFileName,"w") + + stdoutRef = sys.stdout + sys.stdout = obsFileHandler + + iAlignedBioseqDB.getConsensus(2) + + obsFileHandler.close() + expFileHandler.close() + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + + sys.stdout = stdoutRef + os.remove ( obsFileName ) + os.remove ( expFileName ) + stdoutRef = sys.stdout + + + def test_getConsensus_with_gap_assertion_on_result(self): + iBioseq1 = Bioseq("seq1", "A-GA-T") + iBioseq2 = Bioseq("seq2", "T-GC-A") + iBioseq3 = Bioseq("seq3", "T-AC-T") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) + + expConsensus = Bioseq("consensus= length=4 nbAlign=3","TGCT") + obsConsensus = iAlignedBioseqDB.getConsensus(2) + + self.assertEquals(expConsensus, obsConsensus) + + + def test_getConsensus_with_gaps_and_no_consensus_built_assertion_on_result(self): + iBioseq1 = Bioseq("seq1", "A--A-T") + iBioseq2 = Bioseq("seq2", "----A-") + iBioseq3 = Bioseq("seq3", "--A---") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) + + expConsensus = None + obsConsensus = iAlignedBioseqDB.getConsensus(2) + self.assertEquals(expConsensus, obsConsensus) + + + def test_getConsensus_with_gaps_and_no_consensus_built_assertion_on_warning_messages(self): + iBioseq1 = Bioseq("seq1", "A--A-T") + iBioseq2 = Bioseq("seq2", "----A-") + iBioseq3 = Bioseq("seq3", "--A---") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3]) + + expFileName = "expFileName" + expFileHandler = open(expFileName,"w") + expMessage1 = "WARNING: 1 site was removed (16.67%)\n" + expMessage2 = "WARNING: no consensus can be built (no sequence left)\n" + expFileHandler.write(expMessage1) + expFileHandler.write(expMessage2) + expFileHandler.close() + + obsFileName = "obsFileName" + obsFileHandler = open(obsFileName,"w") + + stdoutRef = sys.stdout + sys.stdout = obsFileHandler + + iAlignedBioseqDB.getConsensus(2) + + obsFileHandler.close() + expFileHandler.close() + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + + sys.stdout = stdoutRef + os.remove ( obsFileName ) + os.remove ( expFileName ) + stdoutRef = sys.stdout + + + def test_getConsensus_with_unacceptable_N_proportion_assertion_on_result(self): + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "CCCA") + iBioseq3 = Bioseq("seq3", "TTCT") + iBioseq4 = Bioseq("seq4", "TTAT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expConsensus = None + obsConsensus = iAlignedBioseqDB.getConsensus(2,0.75) + + self.assertEquals(expConsensus, obsConsensus) + + + def test_getConsensus_with_unacceptable_N_proportion_assertion_on_warning_msg(self): + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "CCCA") + iBioseq3 = Bioseq("seq3", "TTCT") + iBioseq4 = Bioseq("seq4", "TTAT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expFileName = "expFileName" + expFileHandler = open(expFileName,"w") + expMessage = "WARNING: no consensus can be built (75% of N's >= 40%)\n" + expFileHandler.write(expMessage) + expFileHandler.close() + + obsFileName = "obsFileName" + obsFileHandler = open(obsFileName,"w") + + stdoutRef = sys.stdout + sys.stdout = obsFileHandler + + iAlignedBioseqDB.getConsensus(2,0.75) + + obsFileHandler.close() + expFileHandler.close() + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + + sys.stdout = stdoutRef + os.remove ( obsFileName ) + os.remove ( expFileName ) + stdoutRef = sys.stdout + + + def test_getConsensus_with_acceptable_N_proportion_assertion_on_warning_msg(self): + iBioseq1 = Bioseq("seq1", "AGATAA") + iBioseq2 = Bioseq("seq2", "CTCAAA") + iBioseq3 = Bioseq("seq3", "TTCTAA") + iBioseq4 = Bioseq("seq4", "GTATCC") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expFileName = "expFileName" + expFileHandler = open(expFileName,"w") + expMessage = "WARNING: 33% of N's\n" + expFileHandler.write(expMessage) + expFileHandler.close() + + obsFileName = "obsFileName" + obsFileHandler = open(obsFileName,"w") + + stdoutRef = sys.stdout + sys.stdout = obsFileHandler + + iAlignedBioseqDB.getConsensus(2,0.6) + + obsFileHandler.close() + expFileHandler.close() + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + + sys.stdout = stdoutRef + os.remove ( obsFileName ) + os.remove ( expFileName ) + stdoutRef = sys.stdout + + + def test_getConsensus_with_acceptable_N_proportion_assertion_on_result(self): + iBioseq1 = Bioseq("seq1", "AGATAA") + iBioseq2 = Bioseq("seq2", "CTCAAA") + iBioseq3 = Bioseq("seq3", "TTCTAA") + iBioseq4 = Bioseq("seq4", "GTATCC") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expConsensus = Bioseq("consensus= length=6 nbAlign=4","NTNTAA") + obsConsensus = iAlignedBioseqDB.getConsensus(2,0.6) + + self.assertEquals(expConsensus, obsConsensus) + + + def test_getConsensus_with_chimeric_seq_in_alignment(self): + iBioseq1 = Bioseq("seq1", "AGAGTTGTAA") + iBioseq2 = Bioseq("seq2", "ATC----AAA") + iBioseq3 = Bioseq("seq3", "ATC----TAA") + iBioseq4 = Bioseq("seq4", "GTC----TAA") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expConsensus = Bioseq("consensus= length=6 nbAlign=4","ATCTAA") + obsConsensus = iAlignedBioseqDB.getConsensus(2) + + self.assertEquals(expConsensus, obsConsensus) + + + def test_getConsensus_with_SATannot_data(self): + iBioseq1 = Bioseq("MbS69Gr8Cl511 chunk21 {Fragment} 165740..165916", "--TTTAGCCGAAGTCCATATGAGTCTTTGTGTTTGTATCTTCTAACAAGGAAACACTACTTAGGCTTTTAGGATAAGATTGCGGTTTAAGTTCTTATACTCAATCATACACATGACATCAAGTCATATTCGAATCCAAAACTCTAAGCAAGCTTCTTCTTGCTTCTCAAA-GCTTTGATG") + iBioseq2 = Bioseq("MbS68Gr8Cl511 chunk21 {Fragment} 165916..166092", "GGTCTAGCCGAAGTCCATATGAGTCTTTGTCTTTGTATCTTCTAACAAGAAAACACTACTTAGGCTTTTAGGATAAGGTTGCAGTTTAAGTTTTTATACTAAATCATACACATCACATCAAGTCATATTCGACTCCCAAACACTAACCAAGCTTCTT--TGCTTCTCAAC-GCTTTGATG") + iBioseq3 = Bioseq("MbS67Gr8Cl511 chunk21 {Fragment} 166093..166269", "-GTTTAGCCGAAGTCCATATGAGTCGTTGTGTTTGTATCTTCTAACAAGGAAACACTACTTACGCTTTTAGGATAAGATTGTTGTTTAAGTTCTTATACTTAATCATACACATGACATAAAGTCATATTCGACTCCAAAACACTAATCAAGCTTCTTCTTGCTTCTCAAA-GCTTTGTT-") + iBioseq4 = Bioseq("MbS66Gr8Cl511 chunk21 {Fragment} 173353..173529", "--TTTAGCAAAATTCTATATGAGTCTTTATCTTTGTATCTTCTAACAAGGAAACACTACTTAGGCTTTTAGGATAAGGTTGCGGGTTAAGTTCTTATACTCAATCATACACATGATATCAAGTCATATTCGACTCCAAAACACTAACCAAGCTTCTTCTTGCTTCTTAAAAGCTTTGAA-") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expConsensus = Bioseq("consensus= length=178 nbAlign=4 pile=511 pyramid=8", "GTTTAGCCGAAGTCCATATGAGTCTTTGTNTTTGTATCTTCTAACAAGGAAACACTACTTAGGCTTTTAGGATAAGNTTGCNGTTTAAGTTCTTATACTNAATCATACACATGACATCAAGTCATATTCGACTCCAAAACACTAANCAAGCTTCTTCTTGCTTCTCAAAGCTTTGATG") + obsConsensus = iAlignedBioseqDB.getConsensus(2, 0.6, isHeaderSAtannot=True) + + self.assertEquals(expConsensus, obsConsensus) + + + def test_getEntropy_equal_zero(self): + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "AGAT") + iBioseq3 = Bioseq("seq3", "AGAT") + iBioseq4 = Bioseq("seq4", "AGAT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expStats = Stat() + expStats._min = 0 + expStats._max = 0 + expStats._sum = 0 + expStats._sumOfSquares = 0 + expStats._n = 4 + expStats._lValues = [0, 0, 0, 0] + + obsStats = iAlignedBioseqDB.getEntropy() + + self.assertEquals(expStats, obsStats) + + + def test_getEntropy_different_nucl(self): + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "CTCA") + iBioseq3 = Bioseq("seq3", "TTCT") + iBioseq4 = Bioseq("seq4", "GTAT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expStats = Stat() + expStats._min = 0.81127812445913283 + expStats._max = 2.0 + expStats._sum = 4.62255624892 + expStats._sumOfSquares = 6.31634439045 + expStats._n = 4 + expStats._lValues = [2.0, 0.81127812445913283, 1.0, 0.81127812445913283] + + obsStats = iAlignedBioseqDB.getEntropy() + + self.assertEquals(expStats, obsStats) + + + def test_getEntropy_different_nucl_with_N(self): + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "CNCA") + iBioseq3 = Bioseq("seq3", "TTNT") + iBioseq4 = Bioseq("seq4", "GTNT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expStats = Stat() + expStats._min = 0.811278124459 + expStats._max = 2 + expStats._sum = 6.11387090595 + expStats._sumOfSquares = 10.1629200457 + expStats._n = 4 + expStats._lValues = [2, 1.4913146570363986, 1.8112781244591329, 0.81127812445913283] + + obsStats = iAlignedBioseqDB.getEntropy() + + self.assertEquals(expStats, obsStats) + + + def test_getEntropy_different_nucl_with_gap(self): + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "C-CA") + iBioseq3 = Bioseq("seq3", "T--T") + iBioseq4 = Bioseq("seq4", "-TNT") + + iAlignedBioseqDB = AlignedBioseqDB() + iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4]) + + expStats = Stat() + expStats._min = 0.811278124459 + expStats._max = 1.65002242165 + expStats._sum = 5.04626304683 + expStats._sumOfSquares = 6.89285231586 + expStats._n = 4 + expStats._lValues = [1.5849625007211561, 1.0, 1.6500224216483541, 0.81127812445913283] + + obsStats = iAlignedBioseqDB.getEntropy() + + self.assertEquals(expStats, obsStats) + + + def test_saveAsBinaryMatrix( self ): + iBioseq1 = Bioseq("seq1", "AGAT") + iBioseq2 = Bioseq("seq2", "C-CA") + iBioseq3 = Bioseq("seq3", "T--T") + iBioseq4 = Bioseq("seq4", "-TNT") + + self._i.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4 ] ) + + expFile = "dummyExpFile_%s" % ( self._uniqId ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "seq1\t1\t1\t1\t1\n" ) + expFileHandler.write( "seq2\t1\t0\t1\t1\n" ) + expFileHandler.write( "seq3\t1\t0\t0\t1\n" ) + expFileHandler.write( "seq4\t0\t1\t1\t1\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s" % ( self._uniqId ) + self._i.saveAsBinaryMatrix( obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + for f in [ expFile, obsFile ]: + os.remove( f ) + + + def test_getAlignList( self ): + iBioseq1 = Bioseq( "seq1", "AGAAT" ) + iBioseq2 = Bioseq( "seq2", "-G-AC" ) + self._i.setData( [ iBioseq1, iBioseq2 ] ) + + iAlign1 = Align( Range( "seq1", 2, 2 ), + Range( "seq2", 1, 1 ), + 0, 1, 100 ) + iAlign2 = Align( Range( "seq1", 4, 5 ), + Range( "seq2", 2, 3 ), + 0, 1, 50 ) + lExp = [ iAlign1, iAlign2 ] + + lObs = self._i.getAlignList( "seq1", "seq2" ) + self.assertEquals( lExp, lObs ) + + + def test_removeGaps(self): + iBioseq1 = Bioseq( "seq1", "AGAAT-" ) + iBioseq2 = Bioseq( "seq2", "AG-ACG" ) + self._i.setData( [ iBioseq1, iBioseq2 ] ) + + exp = AlignedBioseqDB() + exp.setData( [ Bioseq( "seq1", "AGAAT" ), + Bioseq( "seq2", "AGACG" ) ] ) + + self._i.removeGaps() + + self.assertEquals(exp, self._i) + + def test_computeMeanPcentIdentity_between_two_sequences_50pcent(self): + iBioseq1 = Bioseq( "seq1", "AGAAT-" ) + iBioseq2 = Bioseq( "seq2", "AG-ACG" ) + self._i.setData( [ iBioseq1, iBioseq2 ] ) + expIdentity = 50.0 + obsIdentity = self._i.computeMeanPcentIdentity() + self.assertEquals(expIdentity, obsIdentity) + + def test_computeMeanPcentIdentity_between_two_sequences_57pcent(self): + iBioseq1 = Bioseq( "seq1", "AGAAT-T" ) + iBioseq2 = Bioseq( "seq2", "AG-ACGT" ) + self._i.setData( [ iBioseq1, iBioseq2 ] ) + expIdentity = 57.0 + obsIdentity = self._i.computeMeanPcentIdentity() + self.assertEquals(expIdentity, obsIdentity) + + def test_compueNeamPcentIdentity_between_two_sequences_gaps_on_same_position(self): + iBioseq1 = Bioseq( "seq1", "AGAAT-T" ) + iBioseq2 = Bioseq( "seq2", "AG-AC-T" ) + self._i.setData( [ iBioseq1, iBioseq2 ] ) + expIdentity = 57.0 + obsIdentity = self._i.computeMeanPcentIdentity() + self.assertEquals(expIdentity, obsIdentity) + + def test_computeMeanPcentIdentity_between_three_sequences_50_pcent(self): + iBioseqRef = Bioseq( "seqRef", "AGAAT-" ) + iBioseq1 = Bioseq( "seq1", "AG-ACG" ) + iBioseq2 = Bioseq( "seq2", "AG-ACG" ) + + self._i.setData( [ iBioseqRef, iBioseq1, iBioseq2 ] ) + + expIdentity = 50.0 + obsIdentity = self._i.computeMeanPcentIdentity() + self.assertEquals(expIdentity, obsIdentity) + + def test_computeMeanPcentIdentity_between_three_sequences_42_pcent(self): + iBioseqRef = Bioseq( "seqRef", "AGAAT-" ) + iBioseq1 = Bioseq( "seq1", "AG-ACG" ) + iBioseq2 = Bioseq( "seq2", "AG-CC-" ) + + self._i.setData( [ iBioseqRef, iBioseq1, iBioseq2 ] ) + + expIdentity = 42.0 + obsIdentity = self._i.computeMeanPcentIdentity() + self.assertEquals(expIdentity, obsIdentity) + +#TODO: test with tthis data: +#>BlastclustCluster2Mb1_chunk7 (dbseq-nr 1) [101523,104351] +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#-------------------------------TAAATCCAACACTGAGAAGAATTATTTAA +#AGAAGGTTTTTATTTAACTTCTTTATTCGGATATCAGTTTAAGACTAAAA-TTCA-AATG +#TAAATTGGATTGAGGAGAAGCCTCAGTATTTTAACAATATTTGTATTTCGGGAGGGCCTC +#GCTTTTGGATTCTTAAGATTAGGAAAAGATTCAAAGAGAGCAGTCAAATCTGCTTATGTC +#AGGCTTTAGGATTTTTACAAGAACGGCTAAGTGCCGCTGCCAAAGAATTCTTTATTAGAA +#ACGGACGCTGCGCAGCTGGGATACATTATGGAAATAACTTAAGTGCATTACTGTTTTCTT +#TGAAATAATTGAAGTGGCCGATTTGGACCACCCAAATACGTACCTCCCCTTCCCTTAAAG +#TGAAACCTATACTTGGGCTGGGATTGATAGATATTGTATGGAAAATGGAGTTTGTTCTAT +#TTCTATTTGTTGTGGTATGTTTTGATTTTTTCTTATTTTAAGTTTTGCATACAGCAACAT +#AA--ATGGCATAA--ATTATACATA-TCTTAAATATAAGTACAATAAAATTAGTCA--AT +#TATTACAATGGTCATTAGTATG-TAAAGTGTAAT-ATTGTTCTC-TGA-AATCATCTTAA +#CATTGGCGTTGTGTTTTACAATTTTTATTTTTGTTATATTAAGTGGTGTGTCAAGTGGTG +#TCAAATCTATTTCTGGTTTTAACATATTTTCACTTAAAATTATACTATTGATTTCTATTT +#TGCATTGCATTACTCCTTTCATTTTGTTTTCTTCTATTTTTATATTATTAATTGAATTTG +#GGTAATTTTGGTTATGAATGGTTTGGGTTAAGTTCCAGTAATTTGTAATAATTTAAGTTT +#AGTAATTTTGGGTTAAACAAGTCAAGTCTGGAAAGCTGGCATAGCCATTCTAAATCTTCT +#ACGTATTCCGTAAACTGCATTAGCTAGAACAAAAGTGGTTATTTTCATAGTTCTGTTTTT +#TTGGTTTCTTAAGTAAT--TAAT--TTGTATGTTTGATCAAAAAGGGTAACTATTATTTT +#TGTAAT--TAATAACTGATTGGACATTTTTCAACAATTTTCTGTCAATTAGCAT----AT +#CGTAATTATTAGAA-AATACTATT--TCTGGGTAA-CATAATCAAGTCGTTCAAGGTAAT +#AGGGCCATT-TAATGTTAA-AACTTCACTTCTACTATTTTGTATGGGAAGA--CAAAATA +#TATTTTCAT--TGATCATATTAATTGTGGAAGCCATTTGTATGTGTACCCTTTGTATACT +#ATTTTTAAGCATTCGTGTTGTCCGGGGCTGTTGACCGAAAATTTTCGTTTAGTTCAGAAT +#TATTTTGATTTTGTTCATTATTAATGTGTACGGGTTGGACCTGTTGTTGCCCTTCATTAA +#TGAACCCATGATTTTGATATGGATTGTATTGATTTTGGTCATAATACTGTTGTTCGTAGG +#TTGGGTCTGAAATAATTAGGTACGTGCCACATTGGTGGGTTGTAAGTCGTCTGCTCTGAT +#AGCTAGGTCTAAATGGTTGAATGTATTGATTAAAATTAGGCTGGAAGGGTTGGGAATATT +#GTGGGTAACTTGGGCGAGTTTGAACGTTTGTATTGAATTTATGTGCTTTCGAATTCTGAT +#TAAAATTTGAATTTTTATTACGGTATTCTGGGTTTATAAAATTCAAAATTAATGAGTTTT +#TCATAAATTCCCTCATTTTGTGCAATGGTTATTAAGGATCTTAAGTCTGTAATA-TCGTG +#ATGAG-ATAA-AATAGTAAATATAGTAG-AGTCTTTA--ATAGCCTGAATAAAAATAAGA +#AAATCCGATTGGTTACCTTCCAGGTGTAGTTTCGAAATTAATAATTGACGTCGTCTTTCC +#CCTTCTTCGGAGAATGTTTTTAGGTTTCCTCTGTATGGTGTCTCCCTGAAGCTCTCCAGA +#AGTTTGTAGTTTGGTGTTTGAGTTTTAAATTCCGCGATGAGTCTTGCTTTGAGGGTAGGC +#CAATGCTCGGAAGTCCCAAAGATCGTATTATCCGTCCAAGTTACTTTTGATGGCTCCCAG +#TAGAATCCTCTGTTTACGAAAATTACGTAAATCCACTCTGCTGACGACGGTGTGAAGTGT +#TTCTGGATCACCCTTAAATGTCATAATGTCTTTAAGCTGCCGACGGGCTTCGGCAAGGTT +#GTTGTCTCTTAGCGCAACGATTTAAGGTGCGGCAATAATTATAATAATTGGTTGAGACAT +#TTTTATTGTAAAATTTTAAATTTTGTGTTTTATTGTTTTATTGTTTCGAATTCAATGCTA +#TTATTTATTTTTTTTTTTGTTTTCTAGTTTCACTTTTTTTTTTATTGTATTGCTTTATTG +#TTCTTATTTTATTTTTATATGTTGGTTTTAAAACTTAGTTGCCTTTGGACTTAATGTTTT +#TGTTTCGTATTTCACTTCCACTTTAAATTGGATAACAGAATTGGAATTAAAATCCAAGTT +#GAAGAGTTTCCACGAATTTATTTGGGAAATGTTTCGAGCACTGGAATCCAGTGACTGGAT +#TATAAAATTTAACTTATTTCC-ACTCGAAGGTTCTTTTTT--CGG--------ATACTTT +#TTGTATCAGT--TGACTAAGAGCAACACTGAGAAGAATTATTTAAAGAAGGTTTTTATTT +#AACTTCTTTATTCGGATATCAGTTTAAGACTAAAA-TTCA-AATGTAAATTGGATTGAGG +#AGAAGCCTCAGTATT--TCAACAATATTTGTATT--TCGGGAGGGCCTCGCTCTTGGAT- +#-TCTTAAGATTAGGAAAAAATTCA-AAGAGAGCAGTCA-AATC-TGCTTATGTCAGGCTT +#TAGGATTTTTACAAGAAGGGC-TAAGTGCCGCTGAAACGGATGC---------------- +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------ +#>BlastclustCluster2Mb2_chunk7 (dbseq-nr 1) [99136,100579] +#GTAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA +#ATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATA +#ATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATC +#ATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA +#ATCATAATAATCATAATAATCATAATAATCATAATAATAATAATAATCATAATCATAATC +#ATAATAAGCGATAAAAAAATTAAAAAATAAAAATTAAAACCCACTGCAATCACGTTGGAC +#GGCGAGTCACAGACGTCAGAATAGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCA +#AGAAGGTTTTTATTGAACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATC +#ATAAT---AATCATAATAATCATAATAATCATAATAATCATAATAAT------------- +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#-----------------------------------------------CATA-ATAATCAT +#AATAAT--CATAATAATCATA-ATAATCATAATAATCATAATAATCATAATAATCATAAT +#AATCATAATAATCATAATAATCATAA----TAATCATAATAATCATAATAATCATAATAA +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------TCATAA-TAATCATAATAATCGTAA---TAATCATAA----TAATCATAATAAT +#CATAATAATCATAA-TAAT----CAT-----AATAATCAT-----AATAATCATAATAAT +#CATAATAATCATAATAATCATAATAATCATAATAATCATAAT-AA-TCAT--AA--TAAT +#-----CATAATAATCATAATAA--TCA----TAATAATC---AT---AATAATCATAATA +#-AT---CATAATAATCATAATAATC----------------------------------- +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#-----------------------------------ATAATAATCATAAT-AATCA----- +#TAATAA------TCATAAT----AATCATAAT-AATCATAATAA-TCA-TAATAATCATA +#ATAATCATAATAATCATAATAATAATAATAATCATAATCATAATCATAATAAGCATAAAA +#AAAT-------------------------------------------------------- +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#------------------------------------------------------------ +#TAAAAAATAAAAATTAAAACCCACTGCAA---TCACGTTGGACGGCGAGTCACAGACGTC +#A-GAAT-AGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCAAGAAGGTTTTTATTG +#AACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATCATAAT---AATCATA +#ATAA---TCA-TAATAATCAT-AATAATCATAATAATCATAA-----TAA-TCATA-ATA +#ATCATAATAATCATAATAA--TCATAATA-ATCA-TAATAATCATAATAATCATAATCAT +#CATAATAATCATAATAAT--CATAA-T-------AATC--ATAATAATCATAATAATCAT +#AATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAAT +#CATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAAT +#AATCATAATAAT + +test_suite = unittest.TestSuite() +test_suite.addTest( unittest.makeSuite( Test_AlignedBioseqDB ) ) +if __name__ == "__main__": + unittest.TextTestRunner(verbosity=2).run( test_suite )