Mercurial > repos > yufei-luo > s_mart
diff smart_toolShed/commons/core/coord/test/Test_AlignUtils.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/commons/core/coord/test/Test_AlignUtils.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,777 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + + +import unittest +import os +import time +import shutil +from commons.core.coord.AlignUtils import AlignUtils +from commons.core.coord.Align import Align +from commons.core.utils.FileUtils import FileUtils +from commons.core.coord.Range import Range + + +class Test_AlignUtils( unittest.TestCase ): + + def setUp( self ): + self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + + + def tearDown( self ): + self._uniqId = "" + + + def test_getAlignListFromFile( self ): + a1 = Align() + a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) ) + a2 = Align() + a2.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) ) + + inFileName = "dummyFile_%s" % ( self._uniqId ) + inFileHandler = open( inFileName, "w" ) + a1.write( inFileHandler ) + a2.write( inFileHandler ) + inFileHandler.close() + + lExp = [ a1, a2 ] + lObs = AlignUtils.getAlignListFromFile( inFileName ) + + self.assertEqual( lExp, lObs ) + + if os.path.exists( inFileName ): + os.remove( inFileName ) + + + def test_getListOfScores( self ): + a1 = Align() + a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) ) + a2 = Align() + a2.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) ) + lAligns = [ a1, a2 ] + + lExp = [ 89, 95 ] + lObs = AlignUtils.getListOfScores( lAligns ) + + self.assertEqual( lExp, lObs ) + + + def test_getScoreListFromFile( self ): + alignFile = "dummyAlignFile" + alignFileHandler = open( alignFile, "w" ) + alignFileHandler.write( "chr3\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.2\n" ) + alignFileHandler.write( "chr7\t1\t200\tchr2\t11\t210\t1e-78\t235\t98.9\n" ) + alignFileHandler.close() + + lExp = [ 133, 235 ] + lObs = AlignUtils.getScoreListFromFile( alignFile ) + self.assertEqual( lExp, lObs ) + + os.remove( alignFile ) + + + def test_getScoreListFromFile_empty_file( self ): + alignFile = "dummyAlignFile" + alignFileHandler = open( alignFile, "w" ) + alignFileHandler.close() + + lExp = [] + lObs = AlignUtils.getScoreListFromFile( alignFile ) + + self.assertEqual( lExp, lObs ) + + os.remove( alignFile ) + + + def test_getScoreListFromFile_with_endline_char( self ): + alignFile = "dummyAlignFile" + alignFileHandler = open( alignFile, "w" ) + alignFileHandler.write( "chr3\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.2\n" ) + alignFileHandler.write( "\n" ) + alignFileHandler.write( "chr7\t1\t200\tchr2\t11\t210\t1e-78\t235\t98.9\n" ) + alignFileHandler.write( "\n" ) + alignFileHandler.close() + + lExp = [133, 235] + lObs = AlignUtils.getScoreListFromFile( alignFile ) + + self.assertEqual( lExp, lObs ) + + os.remove( alignFile ) + + + def test_convertAlignFileIntoMapFileWithQueriesAndSubjects( self ): + alignFile = "dummyAlignFile_%s" % ( self._uniqId ) + alignFileHandler = open( alignFile, "w" ) + alignFileHandler.write( "chr3\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.2\n" ) + alignFileHandler.write( "chr7\t1\t200\tchr2\t11\t210\t1e-78\t235\t98.9\n" ) + alignFileHandler.close() + + expFile = "dummyExpFile_%s" % ( self._uniqId ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "repet\tchr3\t1\t100\n" ) + expFileHandler.write( "repet\tchr5\t11\t110\n" ) + expFileHandler.write( "repet\tchr7\t1\t200\n" ) + expFileHandler.write( "repet\tchr2\t11\t210\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s" % ( self._uniqId ) + + AlignUtils.convertAlignFileIntoMapFileWithQueriesAndSubjects( alignFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ alignFile, expFile, obsFile ]: + if os.path.exists( f ): + os.remove( f ) + + + def test_convertAlignFileIntoMapFileWithSubjectsOnQueries( self ): + alignFile = "dummyAlignFile_%s" % ( self._uniqId ) + alignFileHandler = open( alignFile, "w" ) + alignFileHandler.write( "chr3\t1\t100\tTE1\t11\t110\t1e-52\t133\t87.2\n" ) + alignFileHandler.write( "chr7\t1\t200\tTE1\t11\t210\t1e-78\t235\t98.9\n" ) + alignFileHandler.close() + + expFile = "dummyExpFile_%s" % ( self._uniqId ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "TE1\tchr3\t1\t100\n" ) + expFileHandler.write( "TE1\tchr7\t1\t200\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s" % ( self._uniqId ) + + AlignUtils.convertAlignFileIntoMapFileWithSubjectsOnQueries( alignFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ alignFile, expFile, obsFile ]: + if os.path.exists( f ): + os.remove( f ) + + + def test_getAlignListSortedByDecreasingScoreThenLength( self ): + a1 = Align() + a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) ) + a2 = Align() + a2.setFromTuple( ( "chr7", "121", "200", "seq9", "21", "110", "1e-32", "95", "98.13" ) ) + a3 = Align() + a3.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) ) + lAligns = [ a1, a2, a3 ] + + lExp = [ a3, a2, a1 ] + + lObs = AlignUtils.getAlignListSortedByDecreasingScoreThenLength( lAligns ) + + self.assertEqual( lExp, lObs ) + + + def test_convertAlignFileIntoPathFile( self ): + alignFile = "dummyAlignFile_%s" % ( self._uniqId ) + alignFileHandler = open( alignFile, "w" ) + alignFileHandler.write( "chr3\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.200000\n" ) + alignFileHandler.write( "chr7\t1\t200\tchr2\t11\t210\t1e-78\t235\t98.900000\n" ) + alignFileHandler.close() + + expFile = "dummyExpFile_%s" % ( self._uniqId ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "1\tchr3\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.200000\n" ) + expFileHandler.write( "2\tchr7\t1\t200\tchr2\t11\t210\t1e-78\t235\t98.900000\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s" % ( self._uniqId ) + + AlignUtils.convertAlignFileIntoPathFile( alignFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ alignFile, expFile, obsFile ]: + if os.path.exists( f ): + os.remove( f ) + + + def test_sortAlignFile( self ): + alignFile = "dummyAlignFile_%s" % ( self._uniqId ) + alignFileHandler = open( alignFile, "w" ) + alignFileHandler.write( "chr7\t1\t200\tchr2\t11\t210\t1e-78\t235\t98.900000\n" ) + alignFileHandler.write( "chr3\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.200000\n" ) + alignFileHandler.write( "chr8\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.200000\n" ) + alignFileHandler.write( "chr8\t1\t100\tchr5\t15\t90\t1e-52\t133\t87.200000\n" ) + alignFileHandler.write( "chr8\t1\t100\tchr5\t11\t100\t1e-52\t133\t87.200000\n" ) + alignFileHandler.close() + + expFile = "dummyExpFile_%s" % ( self._uniqId ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "chr3\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.200000\n" ) + expFileHandler.write( "chr7\t1\t200\tchr2\t11\t210\t1e-78\t235\t98.900000\n" ) + expFileHandler.write( "chr8\t1\t100\tchr5\t11\t100\t1e-52\t133\t87.200000\n" ) + expFileHandler.write( "chr8\t1\t100\tchr5\t11\t110\t1e-52\t133\t87.200000\n" ) + expFileHandler.write( "chr8\t1\t100\tchr5\t15\t90\t1e-52\t133\t87.200000\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s" % ( self._uniqId ) + + AlignUtils.sortAlignFile( alignFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ alignFile, expFile, obsFile ]: + if os.path.exists( f ): + os.remove( f ) + + def test_writeListInFile( self ): + line1 = ("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line2 = ("chr1\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line3 = ("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + + expFileName = "expFileName.align" + expFileHandle = open ( expFileName, 'w' ) + expFileHandle.write(line1) + expFileHandle.write(line2) + expFileHandle.write(line3) + expFileHandle.close() + + iAlign1 = Align() + iAlign1.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign2 = Align() + iAlign2.setFromString("chr1\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign3 = Align() + iAlign3.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + + obsFileName = "obsFileName.align" + obsPathList =[iAlign1, iAlign2, iAlign3] + + AlignUtils.writeListInFile( obsPathList, obsFileName ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + + os.remove( obsFileName ) + os.remove( expFileName ) + + + def test_splitAlignListByQueryName_empty_list( self ): + lAlign = [] + + obsLAlign = AlignUtils.splitAlignListByQueryName( lAlign ) + + expLAlign = [] + + self.assertEquals( expLAlign, obsLAlign ) + + + def test_splitAlignListByQueryName( self ): + iAlign1 = Align() + iAlign1.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign2 = Align() + iAlign2.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign3 = Align() + iAlign3.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + lAlign = [ iAlign1, iAlign2, iAlign3 ] + + obsLAlign = AlignUtils.splitAlignListByQueryName( lAlign ) + + expLAlign = [ [ iAlign1, iAlign3 ], + [ iAlign2 ] ] + + self.assertEquals( expLAlign, obsLAlign ) + + + def test_splitAlignListByQueryName_last_align_alone( self ): + iAlign1 = Align() + iAlign1.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign2 = Align() + iAlign2.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign3 = Align() + iAlign3.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign4 = Align() + iAlign4.setFromString("chr3\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign5 = Align() + iAlign5.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign6 = Align() + iAlign6.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign7 = Align() + iAlign7.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign8 = Align() + iAlign8.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign9 = Align() + iAlign9.setFromString("chr4\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + lAlign = [ iAlign1, iAlign2, iAlign3, iAlign4, iAlign5, iAlign6, iAlign7, iAlign8, iAlign9 ] + + obsLAlign = AlignUtils.splitAlignListByQueryName( lAlign ) + + expLAlign = [ [ iAlign1, iAlign3, iAlign6, iAlign7 ], + [ iAlign2, iAlign5, iAlign8 ], + [ iAlign4 ], + [ iAlign9 ] ] + + self.assertEquals( expLAlign, obsLAlign ) + + + def test_createAlignFiles( self ): + expFile1 = "dummyExpAlignFile.align_1" + expFile2 = "dummyExpAlignFile.align_2" + expFile3 = "dummyExpAlignFile.align_3" + expFile4 = "dummyExpAlignFile.align_4" + + f1 = open(expFile1, "w") + iAlign1 = Align() + iAlign1.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign1.write(f1) + iAlign3 = Align() + iAlign3.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign3.write(f1) + iAlign6 = Align() + iAlign6.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign6.write(f1) + iAlign7 = Align() + iAlign7.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign7.write(f1) + f1.close() + + f2 = open(expFile2, "w") + iAlign2 = Align() + iAlign2.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign2.write(f2) + iAlign5 = Align() + iAlign5.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign5.write(f2) + iAlign8 = Align() + iAlign8.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign8.write(f2) + f2.close() + + f3 = open(expFile3, "w") + iAlign4 = Align() + iAlign4.setFromString("chr3\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign4.write(f3) + f3.close() + + f4 = open(expFile4, "w") + iAlign9 = Align() + iAlign9.setFromString("chr4\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign9.write(f4) + f4.close() + + lAlignList = [ [ iAlign1, iAlign3, iAlign6, iAlign7 ], + [ iAlign2, iAlign5, iAlign8 ], + [ iAlign4 ], + [ iAlign9 ] ] + + AlignUtils.createAlignFiles( lAlignList, "dummyAlignFile" ) + + obsFile1 = "dummyAlignFile_1.align" + obsFile2 = "dummyAlignFile_2.align" + obsFile3 = "dummyAlignFile_3.align" + obsFile4 = "dummyAlignFile_4.align" + + self.assertTrue( FileUtils.are2FilesIdentical( expFile1, obsFile1 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile2, obsFile2 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile3, obsFile3 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile4, obsFile4 ) ) + + os.remove(expFile1) + os.remove(expFile2) + os.remove(expFile3) + os.remove(expFile4) + os.remove(obsFile1) + os.remove(obsFile2) + os.remove(obsFile3) + os.remove(obsFile4) + + + def test_createAlignFiles_eleven_output_files( self ): + expFile1 = "dummyExpAlignFile.align_01" + expFile2 = "dummyExpAlignFile.align_02" + expFile3 = "dummyExpAlignFile.align_03" + expFile4 = "dummyExpAlignFile.align_04" + expFile5 = "dummyExpAlignFile.align_05" + expFile6 = "dummyExpAlignFile.align_06" + expFile7 = "dummyExpAlignFile.align_07" + expFile8 = "dummyExpAlignFile.align_08" + expFile9 = "dummyExpAlignFile.align_09" + expFile10 = "dummyExpAlignFile.align_10" + expFile11 = "dummyExpAlignFile.align_11" + lExpFiles = [expFile1, expFile2, expFile3, expFile4, expFile5, expFile6, expFile7, expFile8, expFile9, expFile10, expFile11] + + f1 = open(expFile1, "w") + iAlign1 = Align() + iAlign1.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign1.write(f1) + f1.close() + + f2 = open(expFile2, "w") + iAlign2 = Align() + iAlign2.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign2.write(f2) + f2.close() + + f3 = open(expFile3, "w") + iAlign3 = Align() + iAlign3.setFromString("chr3\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign3.write(f3) + f3.close() + + f4 = open(expFile4, "w") + iAlign4 = Align() + iAlign4.setFromString("chr4\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign4.write(f4) + f4.close() + + f = open(expFile5, "w") + iAlign5 = Align() + iAlign5.setFromString("chr5\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign5.write(f) + f.close() + + f = open(expFile6, "w") + iAlign6 = Align() + iAlign6.setFromString("chr6\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign6.write(f) + f.close() + + f = open(expFile7, "w") + iAlign7 = Align() + iAlign7.setFromString("chr7\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign7.write(f) + f.close() + + f = open(expFile8, "w") + iAlign8 = Align() + iAlign8.setFromString("chr8\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign8.write(f) + f.close() + + f = open(expFile9, "w") + iAlign9 = Align() + iAlign9.setFromString("chr9\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign9.write(f) + f.close() + + f = open(expFile10, "w") + iAlign10 = Align() + iAlign10.setFromString("chr10\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign10.write(f) + f.close() + + f = open(expFile11, "w") + iAlign11 = Align() + iAlign11.setFromString("chr11\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign11.write(f) + f.close() + + lAlignList = [[iAlign1], [iAlign2], [iAlign3], [iAlign4], [iAlign5], [iAlign6], [iAlign7], [iAlign8], [iAlign9], [iAlign10], [iAlign11]] + + AlignUtils.createAlignFiles(lAlignList, "dummyAlignFile") + + obsFile1 = "dummyAlignFile_01.align" + obsFile2 = "dummyAlignFile_02.align" + obsFile3 = "dummyAlignFile_03.align" + obsFile4 = "dummyAlignFile_04.align" + obsFile5 = "dummyAlignFile_05.align" + obsFile6 = "dummyAlignFile_06.align" + obsFile7 = "dummyAlignFile_07.align" + obsFile8 = "dummyAlignFile_08.align" + obsFile9 = "dummyAlignFile_09.align" + obsFile10 = "dummyAlignFile_10.align" + obsFile11 = "dummyAlignFile_11.align" + lObsFiles = [obsFile1, obsFile2, obsFile3, obsFile4, obsFile5, obsFile6, obsFile7, obsFile8, obsFile9, obsFile10, obsFile11] + + self.assertTrue( FileUtils.are2FilesIdentical( expFile1, obsFile1 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile2, obsFile2 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile3, obsFile3 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile4, obsFile4 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile5, obsFile5 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile6, obsFile6 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile7, obsFile7 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile8, obsFile8 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile9, obsFile9 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile10, obsFile10 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile11, obsFile11 ) ) + + for file in lExpFiles: + os.remove(file) + for file in lObsFiles: + os.remove(file) + + + def test_createAlignFiles_dirName_specified( self ): + expFile1 = "dummyExpAlignFile.align_1" + expFile2 = "dummyExpAlignFile.align_2" + expFile3 = "dummyExpAlignFile.align_3" + expFile4 = "dummyExpAlignFile.align_4" + + f1 = open(expFile1, "w") + iAlign1 = Align() + iAlign1.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign1.write(f1) + iAlign3 = Align() + iAlign3.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign3.write(f1) + iAlign6 = Align() + iAlign6.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign6.write(f1) + iAlign7 = Align() + iAlign7.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign7.write(f1) + f1.close() + + f2 = open(expFile2, "w") + iAlign2 = Align() + iAlign2.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign2.write(f2) + iAlign5 = Align() + iAlign5.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign5.write(f2) + iAlign8 = Align() + iAlign8.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign8.write(f2) + f2.close() + + f3 = open(expFile3, "w") + iAlign4 = Align() + iAlign4.setFromString("chr3\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign4.write(f3) + f3.close() + + f4 = open(expFile4, "w") + iAlign9 = Align() + iAlign9.setFromString("chr4\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign9.write(f4) + f4.close() + + lAlignList = [[iAlign1, iAlign3, iAlign6, iAlign7], [iAlign2, iAlign5, iAlign8], [iAlign4], [iAlign9]] + + dirName = "dummyAlignDir" + + AlignUtils.createAlignFiles(lAlignList, "dummyAlignFile", dirName) + + obsFile1 = dirName + "/dummyAlignFile_1.align" + obsFile2 = dirName + "/dummyAlignFile_2.align" + obsFile3 = dirName + "/dummyAlignFile_3.align" + obsFile4 = dirName + "/dummyAlignFile_4.align" + + self.assertTrue( FileUtils.are2FilesIdentical( expFile1, obsFile1 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile2, obsFile2 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile3, obsFile3 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile4, obsFile4 ) ) + + os.remove(expFile1) + os.remove(expFile2) + os.remove(expFile3) + os.remove(expFile4) + shutil.rmtree (dirName) + + + def test_createAlignFiles_dirName_specified_with_ended_slash( self ): + expFile1 = "dummyExpAlignFile.align_1" + expFile2 = "dummyExpAlignFile.align_2" + expFile3 = "dummyExpAlignFile.align_3" + expFile4 = "dummyExpAlignFile.align_4" + + f1 = open(expFile1, "w") + iAlign1 = Align() + iAlign1.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign1.write(f1) + iAlign3 = Align() + iAlign3.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign3.write(f1) + iAlign6 = Align() + iAlign6.setFromString("chr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign6.write(f1) + iAlign7 = Align() + iAlign7.setFromString("chr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign7.write(f1) + f1.close() + + f2 = open(expFile2, "w") + iAlign2 = Align() + iAlign2.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign2.write(f2) + iAlign5 = Align() + iAlign5.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign5.write(f2) + iAlign8 = Align() + iAlign8.setFromString("chr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign8.write(f2) + f2.close() + + f3 = open(expFile3, "w") + iAlign4 = Align() + iAlign4.setFromString("chr3\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign4.write(f3) + f3.close() + + f4 = open(expFile4, "w") + iAlign9 = Align() + iAlign9.setFromString("chr4\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iAlign9.write(f4) + f4.close() + + lAlignList = [[iAlign1, iAlign3, iAlign6, iAlign7], [iAlign2, iAlign5, iAlign8], [iAlign4], [iAlign9]] + + dirName = "dummyAlignDir/" + + AlignUtils.createAlignFiles(lAlignList, "dummyAlignFile", dirName) + + obsFile1 = dirName + "dummyAlignFile_1.align" + obsFile2 = dirName + "dummyAlignFile_2.align" + obsFile3 = dirName + "dummyAlignFile_3.align" + obsFile4 = dirName + "dummyAlignFile_4.align" + + self.assertTrue( FileUtils.are2FilesIdentical( expFile1, obsFile1 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile2, obsFile2 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile3, obsFile3 ) ) + self.assertTrue( FileUtils.are2FilesIdentical( expFile4, obsFile4 ) ) + + os.remove(expFile1) + os.remove(expFile2) + os.remove(expFile3) + os.remove(expFile4) + shutil.rmtree (dirName) + + + def test_sortList( self ): + iAlign1 = Align( Range("qry1",3,80), Range("sbj1",3,80), 1e-20, 103, 97.3 ) # higher query start + iAlign2 = Align( Range("qry1",1,100), Range("sbj1",1,100), 1e-20, 113, 97.3 ) # higher score + iAlign3 = Align( Range("qry2",1,100), Range("sbj1",1,100), 1e-20, 103, 97.3 ) # different query + iAlign4 = Align( Range("qry1",1,100), Range("sbj1",1,100), 1e-20, 103, 97.3 ) # canonical + iAlign5 = Align( Range("qry1",1,100), Range("sbj2",1,100), 1e-20, 103, 97.3 ) # different subject + iAlign6 = Align( Range("qry1",201,300), Range("sbj1",100,1), 1e-20, 103, 97.3 ) # subject on reverse strand + iAlign7 = Align( Range("qry1",401,500), Range("sbj1",1,100), 1e-20, 103, 97.3 ) # higher query start + lAligns = [ iAlign1, iAlign2, iAlign3, iAlign4, iAlign5, iAlign6, iAlign7 ] + lExp = [iAlign4, iAlign2, iAlign1, iAlign6, iAlign7, iAlign5, iAlign3] + lObs = AlignUtils.sortList( lAligns ) + self.assertEquals( lExp, lObs ) + + + def test_isOverlapping( self ): + iAlign1 = Align( Range("chr1",1,100), Range("TE1",11,110), 1e-20, 90.2, 30 ) + iAlign2 = Align( Range("chr1",51,80), Range("TE1",61,90), 1e-20, 90.2, 30 ) + self.assertTrue( iAlign1.isOverlapping( iAlign2 ) ) + + iAlign1 = Align( Range("chr1",1,100), Range("TE1",11,110), 1e-20, 90.2, 30 ) + iAlign2 = Align( Range("chr1",51,80), Range("TE1",161,190), 1e-20, 90.2, 30 ) + self.assertFalse( iAlign1.isOverlapping( iAlign2 ) ) + + + def test_mergeList( self ): + iAlign1 = Align( Range("chr1",81,120), Range("TE1",91,130), 1e-20, 90.2, 30 ) + iAlign2 = Align( Range("chr2",51,80), Range("TE1",61,90), 1e-20, 90.2, 30 ) # different query + iAlign3 = Align( Range("chr1",1,100), Range("TE1",11,110), 1e-20, 90.2, 30 ) # to be merged with 1st line + iAlign4 = Align( Range("chr1",1,200), Range("TE2",11,210), 1e-20, 90.2, 30 ) # different subject + iAlign5 = Align( Range("chr1",1,100), Range("TE1",501,600), 1e-20, 90.2, 30 ) # non-overlapping subject + lAligns = [ iAlign1, iAlign2, iAlign3, iAlign4, iAlign5 ] + + iAlign6 = Align( Range("chr1",1,120), Range("TE1",11,130), 1e-20, 90.2, 30 ) + lExp = [ iAlign6, iAlign5, iAlign4, iAlign2 ] + + lObs = AlignUtils.mergeList( lAligns ) + + self.assertEquals( lExp, lObs ) + + + def test_mergeFile_empty( self ): + inFile = "dummyInFile.align" + inF = open( inFile, "w" ) + inF.close() + + expFile = "dummyExpFile.align" + expF = open( expFile, "w" ) + expF.close() + + obsFile = "dummyObsFile.align" + AlignUtils.mergeFile( inFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ inFile, expFile, obsFile ]: + os.remove( f ) + + + def test_mergeFile( self ): + iAlign = Align() + + inFile = "dummyInFile.align" + inF = open( inFile, "w" ) + iAlign.setFromString( "chr1\t81\t120\tTE1\t91\t130\t1e-20\t30\t90.2\n" ) + iAlign.write( inF ) + iAlign.setFromString( "chr2\t51\t80\tTE1\t61\t90\t1e-20\t30\t90.2\n" ) # different query + iAlign.write( inF ) + iAlign.setFromString( "chr1\t1\t100\tTE1\t11\t110\t1e-20\t30\t90.2\n" ) # to be merged with 1st line + iAlign.write( inF ) + iAlign.setFromString( "chr1\t1\t200\tTE2\t11\t210\t1e-20\t30\t90.2\n" ) # different subject + iAlign.write( inF ) + inF.close() + + expFile = "dummyExpFile.align" + expF = open( expFile, "w" ) + iAlign.setFromString( "chr1\t1\t120\tTE1\t11\t130\t1e-20\t30\t90.2\n" ) + iAlign.write( expF ) + iAlign.setFromString( "chr1\t1\t200\tTE2\t11\t210\t1e-20\t30\t90.2\n" ) + iAlign.write( expF ) + iAlign.setFromString( "chr2\t51\t80\tTE1\t61\t90\t1e-20\t30\t90.2\n" ) + iAlign.write( expF ) + expF.close() + + obsFile = "dummyObsFile.align" + AlignUtils.mergeFile( inFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ inFile, expFile, obsFile ]: + os.remove( f ) + + + def test_updateScoresInFile( self ): + iAlign = Align() + + inFile = "dummyInFile.align" + inHandler = open( inFile, "w" ) + iAlign.setFromString( "query1\t1\t100\tsubject1\t1\t95\t1e-180\t230\t90.2\n" ) + iAlign.write( inHandler ) + inHandler.close() + + expFile = "dummyExpFile.align" + expHandler = open( expFile, "w" ) + iAlign.setFromString( "query1\t1\t100\tsubject1\t1\t95\t1e-180\t%i\t90.2\n" % ( ( 100 - 1 + 1 ) * 90.2 / 100.0 ) ) + iAlign.write( expHandler ) + expHandler.close() + + obsFile = "dummyObsFile.align" + AlignUtils.updateScoresInFile( inFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ inFile, expFile, obsFile ]: + os.remove( f ) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file