Mercurial > repos > yufei-luo > s_mart
diff commons/core/coord/test/Test_ConvCoord.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/coord/test/Test_ConvCoord.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,205 @@ +import unittest +import os +import time +from commons.core.coord.ConvCoord import ConvCoord +from commons.core.utils.FileUtils import FileUtils +from commons.core.sql.DbFactory import DbFactory +from commons.core.coord.Map import Map + +class Test_ConvCoord( unittest.TestCase ): + + def setUp( self ): + self._i = ConvCoord() + self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() ) + self._inData = "dummyInData_%s" % ( self._uniqId ) + self._mapData = "dummyMapData_%s" % ( self._uniqId ) + self._expData = "dummyExpData_%s" % ( self._uniqId ) + self._obsData = "dummyObsData_%s" % ( self._uniqId ) + self._iDb = DbFactory.createInstance() + self._i._iDb = self._iDb + + def tearDown( self ): + self._iDb.close() + +#TODO: handle duplicated matchs for path +# def test_convCoordsChkToChrFromFile_duplicated_matchs( self ): +# dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ), +# "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) } +# tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId +# self._writePathFileCoordOnChunk(tmpPathFileName) +# +# expPathFile = "dummyExpPathFile_%s" % self._uniqId +# self._writePathFileCoordOnChrWithOutDoublons(expPathFile) +# +# outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps) +# +# obsPathFile = "dummyObsPathFile_%s" % self._uniqId +# self._iDb.exportDataToFile(outTableName, obsPathFile) +# +# self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile)) +# +# for f in [ expPathFile, obsPathFile, tmpPathFileName ]: +# os.remove( f ) +# self._iDb.dropTable(outTableName) + +#TODO: handle matchs out of chunk overlap ? For one side (=> path 128, remove path 152) ? For two sides (path 129, fusion with path 154) ? +# def test_convCoordsChkToChrFromFile_matchs_out_of_overlap( self ): +# dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ), +# "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) } +# tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId +# self._writePathFileCoordOnChunk_outOfOverlap(tmpPathFileName) +# +# expPathFile = "dummyExpPathFile_%s" % self._uniqId +# self._writePathFileCoordOnChrWithOutDoublons_outOfOverlap(expPathFile) +# +# outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps) +# +# obsPathFile = "dummyObsPathFile_%s" % self._uniqId +# self._iDb.exportDataToFile(outTableName, obsPathFile) +# +# self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile)) +# +# for f in [ expPathFile, obsPathFile, tmpPathFileName ]: +# os.remove( f ) +# self._iDb.dropTable(outTableName) + + def test_mergeCoordsOnChunkOverlaps( self ): + dChunks2CoordMaps = { "chunk1": Map( "chunk1", "chromosome1", 1, 100 ), + "chunk2": Map( "chunk2", "chromosome1", 91, 190 ), + "chunk3": Map( "chunk3", "chromosome2", 1, 100 ) } + tmpPathTable = "dummyTmpPathTable" + linesToProcess = [ + "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n", # hit within the 1st chunk + "3" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 2nd chunk + "2" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 1st chunk + ] + FileUtils.writeLineListInFile( tmpPathTable, linesToProcess ) + self._iDb.createTable( tmpPathTable, "path", tmpPathTable, True) + os.remove( tmpPathTable ) + + expPathFile = "dummyExpPathFile" + linesToProcess = [ "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n", # hit within the 1st chunk + "2" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "1" + "\t" + "8" + "\t" + "8e-58" + "\t" + "11" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 1st chunk + ] + FileUtils.writeLineListInFile( expPathFile, linesToProcess ) + + self._i.mergeCoordsOnChunkOverlaps( dChunks2CoordMaps, tmpPathTable) + + obsPathFile = "dummyObsPathFile" + self._iDb.exportDataToFile( tmpPathTable, obsPathFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expPathFile, obsPathFile ) ) + + for f in [ expPathFile, obsPathFile ]: + os.remove( f ) + self._iDb.dropTable( tmpPathTable ) + + def test_mergeCoordsOnChunkOverlaps_withConnectedMatches( self ): + dChunks2CoordMaps = { "chunk1": Map( "chunk1", "chromosome1", 1, 100 ), + "chunk2": Map( "chunk2", "chromosome1", 91, 190 ), + "chunk3": Map( "chunk3", "chromosome2", 1, 100 ) } + tmpPathTable = "dummyTmpPathTable" + linesToProcess = [ + "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n", # hit on the 1st chunk + "1" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "28" + "\t" + "36" + "\t" + "8e-58" + "\t" + "10" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 1st chunk, connected to the previous + "2" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "28" + "\t" + "36" + "\t" + "8e-58" + "\t" + "10" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 2nd chunk + "2" + "\t" + "chromosome1" + "\t" + "111" + "\t" + "120" + "\t" + "TE1" + "\t" + "37" + "\t" + "46" + "\t" + "8e-58" + "\t" + "15" + "\t" + "97.8" + "\n", # hit on the 2nd chunk, connected to the previous + ] + FileUtils.writeLineListInFile( tmpPathTable, linesToProcess ) + self._iDb.createTable( tmpPathTable, "path", tmpPathTable, True) + os.remove( tmpPathTable ) + + expPathFile = "dummyExpPathFile" + linesToProcess = [ "1" + "\t" + "chromosome1" + "\t" + "21" + "\t" + "37" + "\t" + "TE1" + "\t" + "1" + "\t" + "27" + "\t" + "8e-58" + "\t" + "30" + "\t" + "97.8" + "\n", # hit within the 1st chunk + "1" + "\t" + "chromosome1" + "\t" + "92" + "\t" + "99" + "\t" + "TE1" + "\t" + "28" + "\t" + "36" + "\t" + "8e-58" + "\t" + "10" + "\t" + "97.8" + "\n", # hit included within the chunk overlap, on the 1st chunk + "1" + "\t" + "chromosome1" + "\t" + "111" + "\t" + "120" + "\t" + "TE1" + "\t" + "37" + "\t" + "46" + "\t" + "8e-58" + "\t" + "15" + "\t" + "97.8" + "\n", # hit on the 2nd chunk, connected to the previous + ] + FileUtils.writeLineListInFile( expPathFile, linesToProcess ) + + self._i.mergeCoordsOnChunkOverlaps( dChunks2CoordMaps, tmpPathTable ) + + obsPathFile = "dummyObsPathFile" + self._iDb.exportDataToFile( tmpPathTable, obsPathFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expPathFile, obsPathFile ) ) + + for f in [ expPathFile, obsPathFile ]: + os.remove( f ) + self._iDb.dropTable( tmpPathTable ) + + def _writePathFileCoordOnChrWithOutDoublons(self, pathFileName): + file = open( pathFileName, "w" ) + file.write("123\tdmel_chr4\t868397\t868531\tMariner2_AG_1p:classII:TIR\t53\t97\t8e-19\t28\t35.56\n") + file.write("123\tdmel_chr4\t868545\t869120\tMariner2_AG_1p:classII:TIR\t102\t333\t8e-19\t87\t27.97\n") + file.write("124\tdmel_chr4\t819607\t819714\tLINER1-2_NVi_2p:classI:?\t502\t537\t3e-20\t30\t36.11\n") + file.write("124\tdmel_chr4\t819695\t820156\tLINER1-2_NVi_2p:classI:?\t533\t725\t3e-20\t90\t36.79\n") + file.write("125\tdmel_chr4\t953027\t953101\tCR1-8_AG_1p:classI:LINE\t470\t448\t1e-27\t11\t28.57\n") + file.write("126\tdmel_chr4\t862131\t862178\tTc1-1_TCa_1p:classII:TIR\t288\t274\t5e-29\t18\t52.5\n") + file.write("127\tdmel_chr4\t819520\t819606\tNotoAg1_2p:classI:?\t482\t508\t1e-13\t14\t30.61\n") +# file.write("128\tdmel_chr4\t953866\t953889\tCR1-19_HM_1p:classI:LINE\t898\t891\t5e-21\t4\t34.98\n") +# file.write("129\tdmel_chr4\t953866\t953889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n") + file.write("150\tdmel_chr4\t971176\t971250\tTc1-1_TCa_1p:classII:TIR\t135\t109\t8e-32\t21\t41.57\n") + file.write("151\tdmel_chr4\t1066603\t1066698\tMARWOLEN1_1p:classII:TIR\t285\t320\t7e-25\t28\t41.67\n") + file.write("152\tdmel_chr4\t953866\t953889\tCR1-19_HM_1p:classI:LINE\t898\t891\t5e-21\t4\t34.98\n") + file.write("153\tdmel_chr4\t953951\t954343\tCR1-1_DWil_1p:classI:LINE\t127\t2\t4e-18\t92\t37.59\n") + file.write("154\tdmel_chr4\t953866\t953889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n") + file.write("155\tdmel_chr4\t953102\t953199\tCR1-1_DWil_2p:classI:LINE\t869\t837\t2e-26\t38\t57.89\n") + file.close() + + def _writePathFileCoordOnChunk(self, pathFileName): + pathFile = open( pathFileName, "w" ) + pathFile.write("123\tchunk1\t108397\t108531\tMariner2_AG_1p:classII:TIR\t53\t97\t8e-19\t28\t35.56\n") + pathFile.write("123\tchunk1\t108545\t109120\tMariner2_AG_1p:classII:TIR\t102\t333\t8e-19\t87\t27.97\n") + pathFile.write("124\tchunk1\t59607\t59714\tLINER1-2_NVi_2p:classI:?\t502\t537\t3e-20\t30\t36.11\n") + pathFile.write("124\tchunk1\t59695\t60156\tLINER1-2_NVi_2p:classI:?\t533\t725\t3e-20\t90\t36.79\n") + pathFile.write("125\tchunk1\t193027\t193101\tCR1-8_AG_1p:classI:LINE\t470\t448\t1e-27\t11\t28.57\n") + pathFile.write("126\tchunk1\t102131\t102178\tTc1-1_TCa_1p:classII:TIR\t288\t274\t5e-29\t18\t52.5\n") + pathFile.write("127\tchunk1\t59520\t59606\tNotoAg1_2p:classI:?\t482\t508\t1e-13\t14\t30.61\n") + pathFile.write("128\tchunk1\t193866\t193889\tCR1-19_HM_1p:classI:LINE\t898\t891\t5e-21\t4\t34.98\n") + pathFile.write("129\tchunk1\t193866\t193889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n") + pathFile.write("150\tchunk2\t21176\t21250\tTc1-1_TCa_1p:classII:TIR\t135\t109\t8e-32\t21\t41.57\n") + pathFile.write("151\tchunk2\t116603\t116698\tMARWOLEN1_1p:classII:TIR\t285\t320\t7e-25\t28\t41.67\n") + pathFile.write("152\tchunk2\t3866\t3889\tCR1-19_HM_1p:classI:LINE\t898\t891\t5e-21\t4\t34.98\n") + pathFile.write("153\tchunk2\t3951\t4343\tCR1-1_DWil_1p:classI:LINE\t127\t2\t4e-18\t92\t37.59\n") + pathFile.write("154\tchunk2\t3866\t3889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n") + pathFile.write("155\tchunk2\t3102\t3199\tCR1-1_DWil_2p:classI:LINE\t869\t837\t2e-26\t38\t57.89\n") + pathFile.close() + +# def _writePathFileCoordOnChunk_outOfOverlap(self, pathFileName): +# pathFile = open( pathFileName, "w" ) +# pathFile.write("123\tchunk1\t108397\t108531\tMariner2_AG_1p:classII:TIR\t53\t97\t8e-19\t28\t35.56\n") +# pathFile.write("123\tchunk1\t108545\t109120\tMariner2_AG_1p:classII:TIR\t102\t333\t8e-19\t87\t27.97\n") +# pathFile.write("124\tchunk1\t59607\t59714\tLINER1-2_NVi_2p:classI:?\t502\t537\t3e-20\t30\t36.11\n") +# pathFile.write("124\tchunk1\t59695\t60156\tLINER1-2_NVi_2p:classI:?\t533\t725\t3e-20\t90\t36.79\n") +# pathFile.write("125\tchunk1\t193027\t193101\tCR1-8_AG_1p:classI:LINE\t470\t448\t1e-27\t11\t28.57\n") +# pathFile.write("126\tchunk1\t102131\t102178\tTc1-1_TCa_1p:classII:TIR\t288\t274\t5e-29\t18\t52.5\n") +# pathFile.write("127\tchunk1\t59520\t59606\tNotoAg1_2p:classI:?\t482\t508\t1e-13\t14\t30.61\n") +# pathFile.write("128\tchunk1\t183866\t193889\tCR1-19_HM_1p:classI:LINE\t898\t1891\t5e-21\t4\t34.98\n") +# pathFile.write("129\tchunk1\t183866\t200000\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n") +# pathFile.write("150\tchunk2\t21176\t21250\tTc1-1_TCa_1p:classII:TIR\t135\t109\t8e-32\t21\t41.57\n") +# pathFile.write("151\tchunk2\t116603\t116698\tMARWOLEN1_1p:classII:TIR\t285\t320\t7e-25\t28\t41.67\n") +# pathFile.write("152\tchunk2\t1\t3889\tCR1-19_HM_1p:classI:LINE\t898\t1891\t5e-21\t4\t34.98\n") +# pathFile.write("153\tchunk2\t3951\t4343\tCR1-1_DWil_1p:classI:LINE\t127\t2\t4e-18\t92\t37.59\n") +# pathFile.write("154\tchunk2\t1\t13889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n") +# pathFile.write("155\tchunk2\t3102\t3199\tCR1-1_DWil_2p:classI:LINE\t869\t837\t2e-26\t38\t57.89\n") +# pathFile.close() +# +# def _writePathFileCoordOnChrWithOutDoublons_outOfOverlap(self, pathFileName): +# file = open( pathFileName, "w" ) +# file.write("123\tdmel_chr4\t868397\t868531\tMariner2_AG_1p:classII:TIR\t53\t97\t8e-19\t28\t35.56\n") +# file.write("123\tdmel_chr4\t868545\t869120\tMariner2_AG_1p:classII:TIR\t102\t333\t8e-19\t87\t27.97\n") +# file.write("124\tdmel_chr4\t819607\t819714\tLINER1-2_NVi_2p:classI:?\t502\t537\t3e-20\t30\t36.11\n") +# file.write("124\tdmel_chr4\t819695\t820156\tLINER1-2_NVi_2p:classI:?\t533\t725\t3e-20\t90\t36.79\n") +# file.write("125\tdmel_chr4\t953027\t953101\tCR1-8_AG_1p:classI:LINE\t470\t448\t1e-27\t11\t28.57\n") +# file.write("126\tdmel_chr4\t862131\t862178\tTc1-1_TCa_1p:classII:TIR\t288\t274\t5e-29\t18\t52.5\n") +# file.write("127\tdmel_chr4\t819520\t819606\tNotoAg1_2p:classI:?\t482\t508\t1e-13\t14\t30.61\n") +# file.write("128\tdmel_chr4\t943866\t953889\tCR1-19_HM_1p:classI:LINE\t898\t1891\t5e-21\t4\t34.98\n") +# file.write("129\tdmel_chr4\t943866\t963889\tCR1-83_HM_1p:classI:LINE\t912\t905\t3e-21\t4\t34.62\n") +# file.write("150\tdmel_chr4\t971176\t971250\tTc1-1_TCa_1p:classII:TIR\t135\t109\t8e-32\t21\t41.57\n") +# file.write("151\tdmel_chr4\t1066603\t1066698\tMARWOLEN1_1p:classII:TIR\t285\t320\t7e-25\t28\t41.67\n") +# file.write("153\tdmel_chr4\t953951\t954343\tCR1-1_DWil_1p:classI:LINE\t127\t2\t4e-18\t92\t37.59\n") +# file.write("155\tdmel_chr4\t953102\t953199\tCR1-1_DWil_2p:classI:LINE\t869\t837\t2e-26\t38\t57.89\n") +# file.close() + +if __name__ == "__main__": + unittest.main() \ No newline at end of file