Mercurial > repos > yufei-luo > s_mart
diff smart_toolShed/commons/core/coord/test/Test_PathUtils.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/commons/core/coord/test/Test_PathUtils.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,1667 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + + +import unittest +import os +import time +from commons.core.coord.PathUtils import PathUtils +from commons.core.coord.Path import Path +from commons.core.coord.Set import Set +from commons.core.utils.FileUtils import FileUtils +from commons.core.coord.Range import Range +from commons.core.coord.Align import Align + + +class Test_PathUtils ( unittest.TestCase ): + + def test_getSetListFromQueries( self ): + set1 = Set(1,"TE2","chr1",1,10) + set2 = Set(1,"TE2","chr1",10,1) + set3 = Set(1,"TE3","chr4",12,22) + + expList = [set1, set2, set3] + + tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2") + tuple2 = ("1","chr1","10","1","TE2","11","17","1e-20","30","90.2") + tuple3 = ("1","chr4","12","22","TE3","11","17","1e-20","30","90.2") + + pathList = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] ) + + obsList = PathUtils.getSetListFromQueries( pathList ) + + self.assertEquals( expList, obsList ) + + + def test_getSetListFromQueries_on_empty_list( self ): + expList = [] + obsList = PathUtils.getSetListFromQueries( [] ) + + self.assertEquals( expList, obsList ) + + + def test_getSetListFromQueries_on_list_size1( self ): + set1 = Set(1,"TE2","chr1",1,10) + + expList = [set1] + + tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2") + path1 = Path() + path1.setFromTuple(tuple1) + + pathList = [path1] + obsList = PathUtils.getSetListFromQueries( pathList ) + + self.assertEquals( expList, obsList ) + + + def test_getRangeListFromSubjects_initiallyOrdered_directStrand( self ): + tuple1 = ("1","chr1","1","10","TE2","1","10","1e-20","30","90.2") + tuple2 = ("1","chr1","21","30","TE2","11","20","1e-20","30","90.2") + tuple3 = ("1","chr1","41","50","TE2","21","30","1e-20","30","90.2") + lPaths = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] ) + + iSet1 = Range( "TE2", 1, 10 ) + iSet2 = Range( "TE2", 11, 20 ) + iSet3 = Range( "TE2", 21, 30 ) + lExp = [ iSet1, iSet2, iSet3 ] + + lObs = PathUtils.getRangeListFromSubjects( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_getRangeListFromSubjects_initiallyUnordered_directStrand( self ): + tuple1 = ("1","chr1","1","10","TE2","1","10","1e-20","30","90.2") + tuple2 = ("1","chr1","41","50","TE2","21","30","1e-20","30","90.2") + tuple3 = ("1","chr1","21","30","TE2","11","20","1e-20","30","90.2") + lPaths = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] ) + + iSet1 = Range( "TE2", 1, 10 ) + iSet2 = Range( "TE2", 11, 20 ) + iSet3 = Range( "TE2", 21, 30 ) + lExp = [ iSet1, iSet2, iSet3 ] + + lObs = PathUtils.getRangeListFromSubjects( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_getRangeListFromSubjects_initiallyUnordered_reverseStrand( self ): + tuple1 = ("1","chr1","1","10","TE2","10","1","1e-20","30","90.2") + tuple2 = ("1","chr1","41","50","TE2","30","21","1e-20","30","90.2") + tuple3 = ("1","chr1","21","30","TE2","20","11","1e-20","30","90.2") + lPaths = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] ) + + iSet3 = Range( "TE2", 30, 21 ) + iSet2 = Range( "TE2", 20, 11 ) + iSet1 = Range( "TE2", 10, 1 ) + lExp = [ iSet1, iSet2, iSet3 ] + + lObs = PathUtils.getRangeListFromSubjects( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_getQueryMinMaxFromPathList( self ): + tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2") + tuple2 = ("1","chr1","10","1","TE2","11","17","1e-20","30","90.2") + tuple3 = ("1","chr4","12","22","TE3","11","17","1e-20","30","90.2") + + pathList = self._makePathListFromTupleList([tuple1, tuple2, tuple3]) + + obsTuple = PathUtils.getQueryMinMaxFromPathList( pathList ) + expTuple = (1,22) + + self.assertEquals(expTuple, obsTuple) + + def test_getQueryMinMaxFromPathList_on_empty_list( self ): + obsTuple = PathUtils.getQueryMinMaxFromPathList( [] ) + expTuple = (-1,-1) + self.assertEquals( expTuple, obsTuple ) + + def test_getQueryMinMaxFromPathList_on_list_size1( self ): + tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2") + path1 = Path() + path1.setFromTuple(tuple1) + + pathList = [path1] + obsTuple = PathUtils.getQueryMinMaxFromPathList( pathList ) + + expTuple = (1,10) + + self.assertEquals(expTuple, obsTuple) + + def test_getSubjectMinMaxFromPathList( self ): + tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2") + tuple2 = ("1","chr1","10","1","TE2","17","11","1e-20","30","90.2") + tuple3 = ("1","chr4","12","22","TE3","22","34","1e-20","30","90.2") + + pathList = self._makePathListFromTupleList([tuple1, tuple2, tuple3]) + obsTuple = PathUtils.getSubjectMinMaxFromPathList(pathList) + + expTuple = (11,34) + + self.assertEquals(expTuple, obsTuple) + + def test_getSubjectMinMaxFromPathList_on_empty_list( self ): + obsTuple = PathUtils.getSubjectMinMaxFromPathList([]) + expTuple = (-1,-1) + self.assertEquals(expTuple, obsTuple) + + def test_getSubjectMinMaxFromPathList_on_list_size1( self ): + tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2") + path1 = Path() + path1.setFromTuple(tuple1) + + pathList = [path1] + obsTuple = PathUtils.getSubjectMinMaxFromPathList(pathList) + + expTuple = (11,17) + + self.assertEquals(expTuple, obsTuple) + + def test_areQueriesOverlappingBetweenPathLists_list2_empty( self ): + tuple1 = ("1","chr1","100","110","TE2","15","10","1e-20","30","90.2") + tuple2 = ("1","chr1","200","220","TE2","15","10","1e-20","30","90.2") + tuple3 = ("1","chr1","300","330","TE2","15","10","1e-20","30","90.2") + pathList1 = self._makePathListFromTupleList([tuple1, tuple2, tuple3]) + + pathList2 = [] + + expRes = False + obsRes = PathUtils.areQueriesOverlappingBetweenPathLists( pathList1, pathList2 ) + + self.assertEquals( expRes, obsRes ) + + def test_areQueriesOverlappingBetweenPathLists_list2_size1( self ): + tuple1 = ("1","chr1","9","11","TE2","150","200","1e-20","30","90.2") + tuple2 = ("1","chr1","20","22","TE2","150","200","1e-20","30","90.2") + tuple3 = ("1","chr1","30","33","TE2","150","200","1e-20","30","90.2") + pathList1 = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] ) + + tuple11 = ("1","chr1","8","11","TE2","150","200","1e-20","30","90.2") + pathList2 = self._makePathListFromTupleList( [ tuple11 ] ) + + expRes = True + obsRes = PathUtils.areQueriesOverlappingBetweenPathLists( pathList1, pathList2 ) + + self.assertEquals( expRes, obsRes ) + + def test_areQueriesOverlappingBetweenPathLists_list1_greater_list2( self ): + tuple1 = ("1","chr1","100","110","TE2","15","10","1e-20","30","90.2") + tuple2 = ("1","chr1","200","220","TE2","15","10","1e-20","30","90.2") + tuple3 = ("1","chr1","300","330","TE2","15","10","1e-20","30","90.2") + pathList1 = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] ) + + tuple11 = ("1","chr1","10","11","TE2","150","200","1e-20","30","90.2") + tuple22 = ("1","chr1","20","22","TE2","150","200","1e-20","30","90.2") + tuple33 = ("1","chr1","30","33","TE2","150","200","1e-20","30","90.2") + pathList2 = self._makePathListFromTupleList( [ tuple11, tuple22, tuple33 ] ) + + expRes = False + obsRes = PathUtils.areQueriesOverlappingBetweenPathLists( pathList1, pathList2 ) + + self.assertEquals( expRes, obsRes ) + + def test_areQueriesOverlappingBetweenPathLists_unordered_first_item_of_list1_greater_second_item_smaller( self ): + tuple1 = ("1","chr1","400","440","TE2","15","10","1e-20","30","90.2") + tuple2 = ("1","chr1","1","11","TE2","15","10","1e-20","30","90.2") + pathList1 = self._makePathListFromTupleList( [ tuple1, tuple2 ] ) + + tuple11 = ("1","chr1","15","17","TE2","150","200","1e-20","30","90.2") + tuple22 = ("1","chr1","20","22","TE2","150","200","1e-20","30","90.2") + tuple33 = ("1","chr1","30","33","TE2","150","200","1e-20","30","90.2") + pathList2 = self._makePathListFromTupleList( [ tuple11, tuple22, tuple33 ] ) + + expRes = False + obsRes = PathUtils.areQueriesOverlappingBetweenPathLists( pathList1, pathList2 ) + + self.assertEquals( expRes, obsRes ) + + def test_areQueriesOverlappingBetweenPathLists_unorderd_second_item_of_list1_overlap_first_item( self ): + tuple1 = ("1","chr1","400","440","TE2","15","10","1e-20","30","90.2") + tuple2 = ("1","chr1","1","18","TE2","15","10","1e-20","30","90.2") + pathList1 = self._makePathListFromTupleList( [ tuple1, tuple2 ] ) + + tuple11 = ("1","chr1","15","17","TE2","150","200","1e-20","30","90.2") + tuple22 = ("1","chr1","20","22","TE2","150","200","1e-20","30","90.2") + tuple33 = ("1","chr1","30","33","TE2","150","200","1e-20","30","90.2") + pathList2 = self._makePathListFromTupleList( [ tuple11, tuple22, tuple33 ] ) + + expRes = True + obsRes = PathUtils.areQueriesOverlappingBetweenPathLists( pathList1, pathList2 ) + + self.assertEquals( expRes, obsRes ) + + def test_areQueriesOverlappingBetweenPathLists_last_item_list1_overlap_last_item_list2( self ): + tuple1 = ("1","chr1","400","440","TE2","15","10","1e-20","30","90.2") + tuple2 = ("1","chr1","320","340","TE2","15","10","1e-20","30","90.2") + pathList1 = self._makePathListFromTupleList( [ tuple1, tuple2 ] ) + + tuple11 = ("1","chr1","100","110","TE2","150","200","1e-20","30","90.2") + tuple22 = ("1","chr1","200","220","TE2","150","200","1e-20","30","90.2") + tuple33 = ("1","chr1","300","330","TE2","150","200","1e-20","30","90.2") + pathList2 = self._makePathListFromTupleList( [ tuple11, tuple22, tuple33 ] ) + + expRes = True + obsRes = PathUtils.areQueriesOverlappingBetweenPathLists( pathList1, pathList2 ) + + self.assertEquals( expRes, obsRes ) + + def test_writeListInFile( self ): + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line2 = ("1\tchr1\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line3 = ("1\tchr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + + expFileName = "expFileName.path" + expFileHandle = open ( expFileName, 'w' ) + expFileHandle.write(line1) + expFileHandle.write(line2) + expFileHandle.write(line3) + expFileHandle.close() + + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + line2 = ("1\tchr1\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + line3 = ("1\tchr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + + obsFileName = "obsFileName.path" + obsPathList = self._makePathListFromStringList( [ line1, line2, line3 ] ) + + PathUtils.writeListInFile( obsPathList, obsFileName ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + + os.remove( obsFileName ) + os.remove( expFileName ) + + def test_writeListInFile_in_append_mode( self ): + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line2 = ("1\tchr1\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line3 = ("1\tchr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line4 = ("1\tchr1\t400\t410\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line5 = ("1\tchr1\t500\t520\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line6 = ("1\tchr1\t600\t630\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + + expFileName = "expFileName.path" + expFileHandle = open ( expFileName, 'w' ) + expFileHandle.write(line1) + expFileHandle.write(line2) + expFileHandle.write(line3) + expFileHandle.write(line4) + expFileHandle.write(line5) + expFileHandle.write(line6) + expFileHandle.close() + + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line2 = ("1\tchr1\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line3 = ("1\tchr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.200000\n") + line4 = ("1\tchr1\t400\t410\tTE2\t150\t200\t1e-20\t30\t90.2\n") + line5 = ("1\tchr1\t500\t520\tTE2\t150\t200\t1e-20\t30\t90.2\n") + line6 = ("1\tchr1\t600\t630\tTE2\t150\t200\t1e-20\t30\t90.2\n") + + obsFileName = "obsFileName.path" + obsFileHandle = open( obsFileName, 'w' ) + obsFileHandle.write(line1) + obsFileHandle.write(line2) + obsFileHandle.write(line3) + obsFileHandle.close() + + obsPathList = self._makePathListFromStringList( [ line4, line5, line6 ] ) + + PathUtils.writeListInFile( obsPathList, obsFileName, "a" ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) ) + + os.remove(obsFileName) + os.remove(expFileName) + + def test_getPathListWithoutDuplicates_empty_list( self ): + pathList = [] + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList ) + + expPathList = [] + + self.assertEquals( expPathList, obsPathList ) + + def test_getPathListWithoutDuplicates_list_size1( self ): + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathList = self._makePathListFromStringList([line1]) + + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList ) + + expPathList = pathList + + self.assertEquals( expPathList, obsPathList ) + + def test_getPathListWithoutDuplicates_list_with_only_doublons( self ): + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line3 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line4 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathList = self._makePathListFromStringList( [ line1, line2, line3, line4 ] ) + + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList ) + + expPathList = self._makePathListFromStringList( [ line1 ] ) + + self.assertEquals( expPathList, obsPathList ) + + def test_getPathListWithoutDuplicates_list_with_doublons_at_start_and_at_end( self ): + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line3 = ("1\tchr1\t300\t310\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line4 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathList = self._makePathListFromStringList( [ line1, line2, line3, line4 ] ) + + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList ) + + expPathList = self._makePathListFromStringList( [ line1, line2, line3 ] ) + expPathList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( expPathList ) + + self.assertEquals( expPathList, obsPathList ) + + def test_getPathListWithoutDuplicates_list_with_contiguus_doublons( self ): + line1 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line3 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line4 = ("1\tchr1\t300\t310\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathList = self._makePathListFromStringList( [ line1, line2, line3, line4 ] ) + + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList ) + + expPathList = self._makePathListFromStringList( [ line1, line2, line4 ] ) + expPathList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( expPathList ) + + self.assertEquals( expPathList, obsPathList ) + + def test_getPathListWithoutDuplicates_list_with_one_doublon( self ): + line1 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line3 = ("1\tchr1\t210\t250\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line4 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line5 = ("1\tchr1\t300\t310\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathList = self._makePathListFromStringList( [ line1, line2, line3, line4, line5 ] ) + + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList ) + + expPathList = self._makePathListFromStringList( [ line1, line2, line3, line5 ] ) + expPathList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( expPathList ) + + self.assertEquals( expPathList, obsPathList ) + + def test_getPathListWithoutDuplicates_list_with_two_doublons( self ): + line1 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line3 = ("1\tchr1\t210\t250\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line4 = ("1\tchr1\t230\t250\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line5 = ("1\tchr1\t210\t250\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line6 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line7 = ("1\tchr1\t300\t310\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathList = self._makePathListFromStringList( [ line1, line2, line3, line4, line5, line6, line7 ] ) + + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList ) + + expPathList = self._makePathListFromStringList( [ line1, line2, line3, line4, line7 ] ) + expPathList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( expPathList ) + + self.assertEquals( expPathList, obsPathList ) + + def test_getPathListWithoutDuplicates_list_with_two_doublons_useOnlyCoord_is_False_different_id( self ): + line1 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("2\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line3 = ("3\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line4 = ("4\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line5 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line6 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line7 = ("5\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathList = self._makePathListFromStringList( [ line1, line2, line3, line4, line5, line6, line7 ] ) + + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList ) + + expPathList = self._makePathListFromStringList( [ line1, line2, line3, line4, line7 ] ) + expPathList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( expPathList ) + + self.assertEquals( expPathList, obsPathList ) + + def test_getPathListWithoutDuplicates_list_with_two_doublons_useOnlyCoord_is_True_different_id( self ): + line1 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("2\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line3 = ("3\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line4 = ("4\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line5 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line6 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line7 = ("5\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathList = self._makePathListFromStringList( [ line1, line2, line3, line4, line5, line6, line7 ] ) + + obsPathList = PathUtils.getPathListWithoutDuplicates( pathList, True ) + + expPathList = self._makePathListFromStringList( [ line1 ] ) + expPathList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( expPathList ) + + self.assertEquals( expPathList, obsPathList ) + + def test_path_getDictOfListsWithIdAsKey_empty_list( self ): + pathList = [] + + obsDict = PathUtils.getDictOfListsWithIdAsKey( pathList ) + expDict = {} + + self.assertEquals( expDict, obsDict ) + + def test_path_getDictOfListsWithIdAsKey_list_size1( self ): + line1 = ( "1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + pathList = self._makePathListFromStringList( [ line1 ] ) + + obsDict = PathUtils.getDictOfListsWithIdAsKey( pathList ) + + expPathInstance = Path() + expPathInstance.setFromString( line1 ) + expDict = { 1: [ expPathInstance ] } + + self.assertEquals( expDict, obsDict ) + + def test_getDictOfListsWithIdAsKey_ids_only_once( self ): + line1 = ( "1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line2 = ( "2\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line3 = ( "3\tchr1\t210\t250\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + + pathList = self._makePathListFromStringList( [ line1, line2, line3 ] ) + + obsDict = PathUtils.getDictOfListsWithIdAsKey( pathList ) + + expPathInstance1 = Path() + expPathInstance1.setFromString( line1 ) + + expPathInstance2 = Path() + expPathInstance2.setFromString( line2 ) + + expPathInstance3 = Path() + expPathInstance3.setFromString( line3 ) + + expDict = { 1: [ expPathInstance1 ], 2: [ expPathInstance2 ], 3: [ expPathInstance3 ] } + + self.assertEquals( expDict, obsDict ) + + def test_getDictOfListsWithIdAsKey_ids_more_than_only_once( self ): + line1 = ( "1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line2 = ( "2\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line3 = ( "3\tchr1\t210\t250\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + + line4 = ( "1\tchr1\t100\t120\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line5 = ( "2\tchr1\t200\t220\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line6 = ( "3\tchr1\t210\t260\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + + line7 = ( "1\tchr1\t110\t120\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line8 = ( "2\tchr1\t210\t220\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line9 = ( "3\tchr1\t220\t260\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + + pathList = self._makePathListFromStringList( [ line1, line2, line3, line4, line5, line6, line7, line8, line9 ] ) + + obsDict = PathUtils.getDictOfListsWithIdAsKey( pathList ) + + expPathInstance1 = Path() + expPathInstance1.setFromString( line1 ) + + expPathInstance2 = Path() + expPathInstance2.setFromString( line2 ) + + expPathInstance3 = Path() + expPathInstance3.setFromString( line3 ) + + expPathInstance4 = Path() + expPathInstance4.setFromString( line4 ) + + expPathInstance5 = Path() + expPathInstance5.setFromString( line5 ) + + expPathInstance6 = Path() + expPathInstance6.setFromString( line6 ) + + expPathInstance7 = Path() + expPathInstance7.setFromString( line7 ) + + expPathInstance8 = Path() + expPathInstance8.setFromString( line8 ) + + expPathInstance9 = Path() + expPathInstance9.setFromString( line9 ) + + expDict = { 1: [ expPathInstance1, expPathInstance4, expPathInstance7 ], 2 :[ expPathInstance2, expPathInstance5, expPathInstance8 ], 3: [ expPathInstance3, expPathInstance6, expPathInstance9 ] } + + self.assertEquals( expDict, obsDict ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_empty_listToUnjoin_empty( self ): + pathListToKeep = [] + pathListToUnjoin = [] + + expList = [] + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_empty_listToUnjoin_size1( self ): + pathListToKeep = [] + + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ line1 ] ) + + expList = [ pathListToUnjoin ] + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size1_listToUnjoin_empty( self ): + lineKeep1 = ("1\tchr1\t1\t11\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1 ] ) + + pathListToUnjoin = [] + + expList = [] + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_empty( self ): + pathListToKeep = [] + + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line3 = ("1\tchr1\t250\t280\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ line1, line2, line3 ] ) + + expList = [ pathListToUnjoin ] + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size1_listToUnjoin_size1( self ): + lineKeep1 = ("1\tchr1\t1\t11\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1 ] ) + + lineUnjoin1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1 ] ) + + expList = [ pathListToUnjoin ] + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size1_listToUnjoin_size2_noSplit_minKeep( self ): + lineKeep1 = ("1\tchr1\t1\t10\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1 ] ) + + lineUnjoin1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2 ] ) + + expList = [ pathListToUnjoin ] + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size1_listToUnjoin_size3_noSplit_minKeep( self ): + lineKeep1 = ("1\tchr1\t1\t10\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1 ] ) + + lineUnjoin1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin3 = ("1\tchr1\t250\t280\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2, lineUnjoin3 ] ) + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + expList = [ pathListToUnjoin ] + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size1_listToUnjoin_size2_noSplit_minUnjoin( self ): + lineKeep1 = ("1\tchr1\t101\t150\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1 ] ) + + lineUnjoin1 = ("1\tchr1\t1\t10\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t21\t40\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2 ] ) + + expList = [ pathListToUnjoin ] + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size3_listToUnjoin_size2_oneSplit_minKeep( self ): + lineKeep1 = ("1\tchr1\t1\t10\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + lineKeep2 = ("1\tchr1\t21\t30\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + lineKeep3 = ("1\tchr1\t61\t70\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1, lineKeep2, lineKeep3 ] ) + + lineUnjoin1 = ("1\tchr1\t41\t50\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t81\t90\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2 ] ) + + expList = [] + expList.append( self._makePathListFromStringList( [ lineUnjoin1 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin2 ] ) ) + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size3_listToUnjoin_size3_twoSplits_minUnjoin( self ): + lineKeep1 = ("1\tchr1\t21\t30\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + lineKeep2 = ("1\tchr1\t41\t50\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineKeep3 = ("1\tchr1\t81\t90\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1, lineKeep2, lineKeep3 ] ) + + lineUnjoin1 = ("1\tchr1\t1\t10\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t61\t70\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + lineUnjoin3 = ("1\tchr1\t101\t110\tTE2\t150\t90\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2, lineUnjoin3 ] ) + + expList = [] + expList.append( self._makePathListFromStringList( [ lineUnjoin1 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin2 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin3 ] ) ) + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size1_listToUnjoin_size2_split( self ): + lineKeep1 = ("1\tchr1\t51\t80\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1 ] ) + + lineUnjoin1 = ("1\tchr1\t21\t40\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t101\t150\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2 ] ) + + expList = [] + expList.append( self._makePathListFromStringList( [ lineUnjoin1 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin2 ] ) ) + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size2_listToUnjoin_size2_split( self ): + lineKeep1 = ("1\tchr1\t1\t15\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineKeep2 = ("1\tchr1\t81\t130\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1, lineKeep2 ] ) + + lineUnjoin1 = ("1\tchr1\t21\t40\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t201\t250\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2 ] ) + + expList = [] + expList.append( self._makePathListFromStringList( [ lineUnjoin1 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin2 ] ) ) + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size1_listToUnjoin_ordered_OneSplit( self ): + lineKeep1 = ("1\tchr1\t120\t180\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1 ] ) + + lineUnjoin1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin3 = ("1\tchr1\t250\t280\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2, lineUnjoin3 ] ) + + expList = [] + expList.append( self._makePathListFromStringList( [ lineUnjoin1 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin2, lineUnjoin3 ] ) ) + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size1_listToUnjoin_unordered_OneSplit( self ): + lineKeep1 = ("1\tchr1\t120\t180\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1 ] ) + + lineUnjoin1 = ("1\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t250\t280\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin3 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2, lineUnjoin3 ] ) + + expList = [] + expList.append( self._makePathListFromStringList( [ lineUnjoin3 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2 ] ) ) + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_getPathListUnjoinedBasedOnQuery_listToKeep_size2_listToUnjoin_size4_twoSplits( self ): + lineKeep1 = ("1\tchr1\t21\t30\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineKeep2 = ("1\tchr1\t81\t90\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToKeep = self._makePathListFromStringList( [ lineKeep1, lineKeep2 ] ) + + lineUnjoin1 = ("1\tchr1\t1\t10\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin2 = ("1\tchr1\t41\t50\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin3 = ("1\tchr1\t61\t70\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + lineUnjoin4 = ("1\tchr1\t101\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + pathListToUnjoin = self._makePathListFromStringList( [ lineUnjoin1, lineUnjoin2, lineUnjoin3, lineUnjoin4 ] ) + + expList = [] + expList.append( self._makePathListFromStringList( [ lineUnjoin1 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin2, lineUnjoin3 ] ) ) + expList.append( self._makePathListFromStringList( [ lineUnjoin4 ] ) ) + + obsList = PathUtils.getPathListUnjoinedBasedOnQuery( pathListToKeep, pathListToUnjoin ) + + self.assertEquals( expList, obsList ) + + def test_changeIdInList_empty_list ( self ): + pathList = [] + + PathUtils.changeIdInList(pathList,1) + + obsList = pathList + expList = [] + + self.assertEquals(expList, obsList) + + def test_changeIdInList_list_size1 ( self ): + line1 = ("1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + line2 = ("2\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n") + + pathList = self._makePathListFromStringList([line1]) + PathUtils.changeIdInList(pathList,2) + + expPathList = pathList + + obsPathList = self._makePathListFromStringList([line2]) + + self.assertEquals(expPathList, obsPathList) + + def test_changeIdInList( self ): + line1 = ( "1\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line2 = ( "2\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line3 = ( "3\tchr1\t300\t310\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + + pathList = self._makePathListFromStringList( [ line1, line2, line3 ] ) + PathUtils.changeIdInList( pathList, 2 ) + obsPathList = pathList + + line11 = ( "2\tchr1\t100\t110\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line22 = ( "2\tchr1\t200\t210\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + line33 = ( "2\tchr1\t300\t310\tTE2\t150\t200\t0.000000\t30\t90.200000\n" ) + + expPathList = self._makePathListFromStringList( [ line11, line22, line33 ] ) + + self.assertEquals( expPathList, obsPathList ) + + + def test_getIdentityFromPathList( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "100", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "121", "350", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2 ] + exp = ( 90.0 * ( 100-1+1) + 91.2 * (350-121+1) ) / ( (100-1+1) + (350-121+1) ) # 90.836363636363643 + obs = PathUtils.getIdentityFromPathList( lPaths ) + self.assertEqual( exp, obs ) + + + def test_getIdentityFromPathList_withOverlap( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "100", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "21", "80", "sbj1", "21", "80", "0.0", "176", "91.2" ) ) + p3 = Path() + p3.setFromTuple( ( "2", "qry1", "121", "350", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2, p3 ] + exp = ( 91.2 * ( 100-1+1) + 91.2 * (350-121+1) ) / ( (100-1+1) + (350-121+1) ) + obs = PathUtils.getIdentityFromPathList( lPaths ) + self.assertEqual( exp, obs ) + + + def test_getIdentityFromPathList_diffQueries( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "100", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry2", "121", "350", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2 ] + try: + obs = PathUtils.getIdentityFromPathList( lPaths ) + except: + pass + + + def test_getIdentityFromPathList_diffSubjects_check( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "100", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "1", "qry1", "121", "350", "sbj2", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2 ] + try: + obs = PathUtils.getIdentityFromPathList( lPaths, True ) + except: + pass + + + def test_getIdentityFromPathList_diffSubjects_noCheck( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "100", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "1", "qry1", "121", "350", "sbj2", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2 ] + exp = ( 90.0 * ( 100-1+1) + 91.2 * (350-121+1) ) / ( (100-1+1) + (350-121+1) ) # 90.836363636363643 + obs = PathUtils.getIdentityFromPathList( lPaths, False ) + self.assertEqual( exp, obs ) + + + def test_getPathListSortedByIncreasingMinQueryThenMaxQuery_alreadyOrdered_diffIdentifier( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "10", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "21", "30", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2 ] + + expList = [ p1, p2 ] + + obsList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths ) + + self.assertEqual( expList, obsList ) + + def test_getPathListSortedByIncreasingMinQueryThenMaxQuery_unordered_diffIdentifier( self ): + p1 = Path() + p1.setFromTuple( ( "2", "qry1", "21", "30", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p2 = Path() + p2.setFromTuple( ( "1", "qry1", "1", "10", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + lPaths = [ p1, p2 ] + + expList = [ p2, p1 ] + + obsList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths ) + + self.assertEqual( expList, obsList ) + + def test_getPathListSortedByIncreasingMinQueryThenMaxQuery_unordered_sameIdentifier( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "21", "30", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p2 = Path() + p2.setFromTuple( ( "1", "qry1", "1", "10", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + lPaths = [ p1, p2 ] + + expList = [ p2, p1 ] + + obsList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths ) + + self.assertEqual( expList, obsList ) + + def test_getPathListSortedByIncreasingMinQueryThenMaxQuery_unordered_overlapping( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "6", "15", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "1", "10", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + lPaths = [ p1, p2 ] + + expList = [ p2, p1 ] + + obsList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths ) + + self.assertEqual( expList, obsList ) + + def test_getPathListSortedByIncreasingMinQueryThenMaxQuery_unordered_sameMin_threeSets( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "15", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "1", "10", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p3 = Path() + p3.setFromTuple( ( "2", "qry1", "1", "12", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2, p3 ] + + expList = [ p2, p3, p1 ] + + obsList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths ) + + self.assertEqual( expList, obsList ) + + def test_getPathListSortedByIncreasingMinQueryThenMaxQuery_unordered_included( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "2", "4", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "1", "5", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + lPaths = [ p1, p2 ] + + expList = [ p2, p1 ] + + obsList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths ) + + self.assertEqual( expList, obsList ) + + def test_getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier_sameCoord_diffId( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "5", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "1", "5", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p2, p1 ] + + expList = [ p1, p2 ] + + obsList = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier( lPaths ) + + self.assertEqual( expList, obsList ) + + def test_getListOfDistinctIdentifiers( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "100", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "121", "350", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p3 = Path() + p3.setFromTuple( ( "2", "qry1", "121", "350", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2, p3 ] + lExp = [ 1, 2 ] + lObs = PathUtils.getListOfDistinctIdentifiers( lPaths ) + lExp.sort() + lObs.sort() + self.assertEqual( lObs, lExp ) + + def test_getListOfDistinctQueryNames( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "100", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry2", "121", "350", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p3 = Path() + p3.setFromTuple( ( "2", "qry2", "121", "350", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2, p3 ] + lExp = [ "qry1", "qry2" ] + lObs = PathUtils.getListOfDistinctQueryNames( lPaths ) + lExp.sort() + lObs.sort() + self.assertEqual( lObs, lExp ) + + def test_getListOfDistinctSubjectNames( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "100", "sbj1", "1", "100", "0.0", "239", "90.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry2", "121", "350", "sbj1", "101", "200", "0.0", "176", "91.2" ) ) + p3 = Path() + p3.setFromTuple( ( "2", "qry2", "121", "350", "sbj2", "101", "200", "0.0", "176", "91.2" ) ) + lPaths = [ p1, p2, p3 ] + lExp = [ "sbj1", "sbj2" ] + lObs = PathUtils.getListOfDistinctSubjectNames( lPaths ) + lExp.sort() + lObs.sort() + self.assertEqual( lObs, lExp ) + + def test_getListOfJoinCoordinatesOnQuery_returnCoord( self ): + p1a = Path() + p1a.setFromTuple( ( "1", "qry1", "1", "500", "sbj1", "1", "500", "0.0", "532", "95.0" ) ) + p1b = Path() + p1b.setFromTuple( ( "1", "qry1", "701", "900", "sbj1", "501", "700", "0.0", "232", "95.3" ) ) + lExp = [ [ 501, 700 ] ] + lPaths = [ p1a, p1b ] + lObs = PathUtils.getListOfJoinCoordinatesOnQuery( lPaths ) + lExp.sort() + lObs.sort() + self.assertEqual( lObs, lExp ) + + def test_getListOfJoinCoordinatesOnQuery_overlap( self ): + p1a = Path() + p1a.setFromTuple( ( "1", "qry1", "1", "500", "sbj1", "1", "500", "0.0", "532", "95.0" ) ) + p1b = Path() + p1b.setFromTuple( ( "1", "qry1", "491", "900", "sbj1", "501", "770", "0.0", "232", "95.3" ) ) + lExp = [] + lPaths = [ p1a, p1b ] + minLength = 100 + lObs = PathUtils.getListOfJoinCoordinatesOnQuery( lPaths, minLength ) + lExp.sort() + lObs.sort() + self.assertEqual( lObs, lExp ) + + def test_getListOfJoinCoordinates_tooShort( self ): + p1a = Path() + p1a.setFromTuple( ( "1", "qry1", "1", "500", "sbj1", "1", "500", "0.0", "532", "95.0" ) ) + p1b = Path() + p1b.setFromTuple( ( "1", "qry1", "551", "900", "sbj1", "501", "750", "0.0", "232", "95.3" ) ) + lExp = [] + lPaths = [ p1a, p1b ] + minLength = 100 + lObs = PathUtils.getListOfJoinCoordinatesOnQuery( lPaths, minLength ) + lExp.sort() + lObs.sort() + self.assertEqual( lObs, lExp ) + + def test_getLengthOnQueryFromPathList( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "1", "70", "sbj1", "1", "70", "0.0", "132", "95.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "51", "90", "sbj2", "40", "1", "0.0", "132", "95.0" ) ) + lPaths = [ p1, p2 ] + exp = 90 + obs = PathUtils.getLengthOnQueryFromPathList( lPaths ) + self.assertEqual( obs, exp ) + + def test_convertPathFileIntoAlignFile( self ): + pathFile = "dummyPathFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + pathFileHandler = open( pathFile, "w" ) + pathFileHandler.write( "3\tchr2\t250\t151\tseq5\t1\t100\t1e-31\t147\t98.3\n" ) + pathFileHandler.close() + + expFile = "dummyExpFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "chr2\t151\t250\tseq5\t100\t1\t1e-31\t147\t98.300000\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + + PathUtils.convertPathFileIntoAlignFile( pathFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ pathFile, expFile, obsFile ]: + if os.path.exists( f ): + os.remove ( f ) + + def test_convertPathFileIntoMapFileWithQueryCoordsOnly( self ): + pathFile = "dummyPathFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + pathFileHandler = open( pathFile, "w" ) + pathFileHandler.write( "3\tchr2\t250\t151\tseq5\t1\t100\t1e-31\t147\t98.3\n" ) + pathFileHandler.write( "4\tchr2\t191\t230\tseq8\t237\t387\t1e-11\t187\t95.3\n" ) + pathFileHandler.write( "3\tchr2\t500\t301\tseq5\t101\t300\t1e-81\t247\t96.2\n" ) + pathFileHandler.close() + + expFile = "dummyExpFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "seq5\tchr2\t250\t151\n" ) + expFileHandler.write( "seq8\tchr2\t191\t230\n" ) + expFileHandler.write( "seq5\tchr2\t500\t301\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + + PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly( pathFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ pathFile, expFile, obsFile ]: + if os.path.exists( f ): + os.remove( f ) + + + def test_mergeMatchesOnQueries( self ): + pathFile = "dummyPathFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + pathFileHandler = open( pathFile, "w" ) + pathFileHandler.write( "3\tchr2\t250\t151\tseq5\t1\t100\t1e-31\t147\t98.3\n" ) + pathFileHandler.write( "4\tchr2\t230\t191\tseq8\t237\t387\t1e-11\t187\t95.3\n" ) + pathFileHandler.write( "3\tchr2\t500\t301\tseq5\t101\t300\t1e-81\t247\t96.2\n" ) + pathFileHandler.close() + + expFile = "dummyExpFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "0\tchr2\t151\t250\tseq5\t0\t0\t0.0\t0\t0\n" ) + expFileHandler.write( "0\tchr2\t301\t500\tseq5\t0\t0\t0.0\t0\t0\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + + PathUtils.mergeMatchesOnQueries( pathFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ pathFile, expFile, obsFile ]: + if os.path.exists( f ): + os.remove( f ) + + + def test_filterPathListOnChainLength( self ): + p1a = Path() + p1a.setFromTuple( ( "1", "qry1", "1", "12", "sbj1", "1", "12", "0.0", "132", "95.0" ) ) + p1b = Path() + p1b.setFromTuple( ( "1", "qry1", "15", "30", "sbj1", "13", "28", "0.0", "132", "95.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "51", "90", "sbj2", "40", "1", "0.0", "132", "95.0" ) ) + p3 = Path() + p3.setFromTuple( ( "3", "qry2", "1", "12", "sbj3", "15", "1", "0.0", "132", "95.0" ) ) + lPaths = [ p1a, p1b, p2, p3 ] + lExp = [ p1a, p1b, p2 ] + lObs = PathUtils.filterPathListOnChainLength( lPaths, 20 ) + self.assertEqual( lExp, lObs ) + + def test_getPathListFromFile(self): + file = "dummyFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + fileHandler = open( file, "w" ) + fileHandler.write( "1\tchr2\t151\t250\tseq5\t0\t0\t0.0\t0\t0\n" ) + fileHandler.write( "2\tchr2\t301\t500\tseq5\t0\t0\t0.0\t0\t0\n" ) + fileHandler.close() + p1 = Path() + p1.setFromTuple( ( "1", "chr2", "151", "250", "seq5", "0", "0", "0.0", "0", "0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "chr2", "301", "500", "seq5", "0", "0", "0.0", "0", "0" ) ) + expLPath = [ p1, p2 ] + obsLPath = PathUtils.getPathListFromFile(file) + expLPathSorted = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength(expLPath) + obsLPathSorted = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength(obsLPath) + os.remove(file) + self.assertEqual( expLPathSorted, obsLPathSorted ) + + def test_getPathListFromFile_empty_file(self): + file = "dummyFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + fileHandler = open( file, "w" ) + fileHandler.close() + expLPath = [] + obsLPath = PathUtils.getPathListFromFile(file) + expLPathSorted = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength(expLPath) + obsLPathSorted = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength(obsLPath) + os.remove(file) + self.assertEqual( expLPathSorted, obsLPathSorted ) + + def test_convertPathFileIntoAlignFileViaPathrange_sortedInput( self ): + p1a = Path() + p1a.setFromTuple( ( "1", "qry1", "1", "12", "sbj1", "1", "12", "0.0", "132", "95.0" ) ) + p1b = Path() + p1b.setFromTuple( ( "1", "qry1", "16", "30", "sbj1", "13", "28", "1e-270", "150", "97.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "51", "90", "sbj2", "40", "1", "0.0", "132", "95.0" ) ) + inFile = "dummyInFile" + inF = open( inFile, "w" ) + for iPath in [ p1a, p1b, p2 ]: + iPath.write( inF ) + inF.close() + + expFile = "dummyExpFile" + expF = open( expFile, "w" ) + a1 = Align() + a1.setFromTuple( ( "qry1", "1", "30", "sbj1", "1", "28", "0.0", "282", str((95*12+97*15)/float(12+15)) ) ) + a2 = Align() + a2.setFromTuple( ( "qry1", "51", "90", "sbj2", "40", "1", "0.0", "132", "95.0" ) ) + for iAlign in [ a1, a2 ]: + iAlign.write( expF ) + expF.close() + + obsFile = "dummyObsFile" + + PathUtils.convertPathFileIntoAlignFileViaPathrange( inFile, obsFile, 0 ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ inFile, expFile, obsFile ]: + os.remove( f ) + + + def test_convertPathFileIntoAlignFileViaPathrange_unsortedInput( self ): + p1a = Path() + p1a.setFromTuple( ( "1", "qry1", "1", "12", "sbj1", "1", "12", "0.0", "132", "95.0" ) ) + p1b = Path() + p1b.setFromTuple( ( "1", "qry1", "16", "30", "sbj1", "13", "28", "0.0", "150", "97.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "51", "90", "sbj2", "40", "1", "0.0", "132", "95.0" ) ) + inFile = "dummyInFile" + inF = open( inFile, "w" ) + for iPath in [ p1b, p2, p1a ]: + iPath.write( inF ) + inF.close() + + expFile = "dummyExpFile" + expF = open( expFile, "w" ) + a1 = Align() + a1.setFromTuple( ( "qry1", "1", "30", "sbj1", "1", "28", "0.0", "282", str((95*12+97*15)/float(12+15)) ) ) + a2 = Align() + a2.setFromTuple( ( "qry1", "51", "90", "sbj2", "40", "1", "0.0", "132", "95.0" ) ) + for iAlign in [ a1, a2 ]: + iAlign.write( expF ) + expF.close() + + obsFile = "dummyObsFile" + + PathUtils.convertPathFileIntoAlignFileViaPathrange( inFile, obsFile, 0 ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ inFile, expFile, obsFile ]: + os.remove( f ) + + + def test_convertPathFileIntoAlignFileViaPathrange_sortedInput_subjectReverseStrand( self ): + p1a = Path() + p1a.setFromTuple( ( "1", "qry1", "1", "12", "sbj1", "12", "1", "0.0", "132", "95.0" ) ) + p1b = Path() + p1b.setFromTuple( ( "1", "qry1", "16", "30", "sbj1", "28", "13", "0.0", "150", "97.0" ) ) + p2 = Path() + p2.setFromTuple( ( "2", "qry1", "51", "90", "sbj2", "40", "1", "0.0", "132", "95.0" ) ) + inFile = "dummyInFile" + inF = open( inFile, "w" ) + for iPath in [ p1a, p1b, p2 ]: + iPath.write( inF ) + inF.close() + + expFile = "dummyExpFile" + expF = open( expFile, "w" ) + a1 = Align() + a1.setFromTuple( ( "qry1", "1", "30", "sbj1", "28", "1", "0.0", "282", str((95*12+97*15)/float(12+15)) ) ) + a2 = Align() + a2.setFromTuple( ( "qry1", "51", "90", "sbj2", "40", "1", "0.0", "132", "95.0" ) ) + for iAlign in [ a1, a2 ]: + iAlign.write( expF ) + expF.close() + + obsFile = "dummyObsFile" + + PathUtils.convertPathFileIntoAlignFileViaPathrange( inFile, obsFile, 0 ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ inFile, expFile, obsFile ]: + os.remove( f ) + + + def test_splitPathListByQueryName_empty_list( self ): + lPath = [] + + obsLPath = PathUtils.splitPathListByQueryName( lPath ) + + expLPath = [] + + self.assertEquals( expLPath, obsLPath ) + + + def test_splitPathListByQueryName( self ): + iPath1 = Path() + iPath1.setFromString("1\tchr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath2 = Path() + iPath2.setFromString("2\tchr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath3 = Path() + iPath3.setFromString("3\tchr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + lPath = [ iPath1, iPath2, iPath3 ] + + obsLPath = PathUtils.splitPathListByQueryName( lPath ) + + expLPath = [ [ iPath1, iPath3 ], + [ iPath2 ] ] + + self.assertEquals( expLPath, obsLPath ) + + + def test_splitPathListByQueryName_last_align_alone( self ): + iPath1 = Path() + iPath1.setFromString("1\tchr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath2 = Path() + iPath2.setFromString("2\tchr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath3 = Path() + iPath3.setFromString("3\tchr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath4 = Path() + iPath4.setFromString("4\tchr3\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath5 = Path() + iPath5.setFromString("5\tchr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath6 = Path() + iPath6.setFromString("6\tchr1\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath7 = Path() + iPath7.setFromString("7\tchr1\t100\t110\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath8 = Path() + iPath8.setFromString("8\tchr2\t200\t220\tTE2\t150\t200\t1e-20\t30\t90.2\n") + iPath9 = Path() + iPath9.setFromString("9\tchr4\t300\t330\tTE2\t150\t200\t1e-20\t30\t90.2\n") + lPath = [ iPath1, iPath2, iPath3, iPath4, iPath5, iPath6, iPath7, iPath8, iPath9 ] + + obsLPath = PathUtils.splitPathListByQueryName( lPath ) + + expLPath = [ [ iPath1, iPath3, iPath6, iPath7 ], + [ iPath2, iPath5, iPath8 ], + [ iPath4 ], + [ iPath9 ] ] + + self.assertEquals( expLPath, obsLPath ) + + + def test_getPathListSortedByIncreasingQueryMinThenInvQueryLength_alreadyOrdered_diffIdentifier( self ): + iPath1 = Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ) + iPath2 = Path( 2, Range("qry1",21,30), Range("sbj1",11,20), 0.0, 10, 98.7 ) + lPaths = [ iPath1, iPath2 ] + + lExp = [ Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ), + Path( 2, Range("qry1",21,30), Range("sbj1",11,20), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_getPathListSortedByIncreasingQueryMinThenInvQueryLength_unordered_diffIdentifier( self ): + iPath1 = Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ) + iPath2 = Path( 2, Range("qry1",21,30), Range("sbj1",11,20), 0.0, 10, 98.7 ) + lPaths = [ iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ), + Path( 2, Range("qry1",21,30), Range("sbj1",11,20), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_getPathListSortedByIncreasingQueryMinThenInvQueryLength_unordered_sameIdentifier( self ): + iPath1a = Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ) + iPath1b = Path( 1, Range("qry1",21,30), Range("sbj1",11,20), 0.0, 10, 98.7 ) + lPaths = [ iPath1b, iPath1a ] + + lExp = [ Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ), + Path( 1, Range("qry1",21,30), Range("sbj1",11,20), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_getPathListSortedByIncreasingQueryMinThenInvQueryLength_unordered_overlapping( self ): + iPath1 = Path( 1, Range("qry1",1,6), Range("sbj1",1,6), 0.0, 10, 98.7 ) + iPath2 = Path( 2, Range("qry1",5,10), Range("sbj1",5,10), 0.0, 10, 98.7 ) + lPaths = [ iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",1,6), Range("sbj1",1,6), 0.0, 10, 98.7 ), + Path( 2, Range("qry1",5,10), Range("sbj1",5,10), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_getPathListSortedByIncreasingQueryMinThenInvQueryLength_threePaths_2sameMin( self ): + iPath1 = Path( 1, Range("qry1",1,6), Range("sbj1",1,6), 0.0, 10, 98.7 ) + iPath2 = Path( 2, Range("qry1",5,12), Range("sbj1",5,12), 0.0, 10, 98.7 ) + iPath3 = Path( 3, Range("qry1",5,10), Range("sbj1",5,10), 0.0, 10, 98.7 ) + lPaths = [ iPath3, iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",1,6), Range("sbj1",1,6), 0.0, 10, 98.7 ), + Path( 2, Range("qry1",5,12), Range("sbj1",5,12), 0.0, 10, 98.7 ), + Path( 3, Range("qry1",5,10), Range("sbj1",5,10), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_getPathListSortedByIncreasingQueryMinThenInvQueryLength_unordered_included( self ): + iPath1 = Path( 1, Range("qry1",1,6), Range("sbj1",1,6), 0.0, 10, 98.7 ) + iPath2 = Path( 2, Range("qry1",2,5), Range("sbj1",2,5), 0.0, 10, 98.7 ) + lPaths = [ iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",1,6), Range("sbj1",1,6), 0.0, 10, 98.7 ), + Path( 2, Range("qry1",2,5), Range("sbj1",2,5), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInList_emptyList( self ): + lPaths = [] + lExp = [] + lObs = PathUtils.mergePathsInList( lPaths ) + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInList_onePath( self ): + iPath1 = Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ) + lPaths = [ iPath1 ] + lExp = [ Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ) ] + lObs = PathUtils.mergePathsInList( lPaths ) + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInList_noOverlap( self ): + iPath1 = Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ) + iPath2 = Path( 1, Range("qry1",21,30), Range("sbj1",11,20), 0.0, 10, 98.7 ) + lPaths = [ iPath1, iPath2 ] + + lExp = [ Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ), + Path( 1, Range("qry1",21,30), Range("sbj1",11,20), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.mergePathsInList( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInList_withOverlap( self ): + iPath1 = Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ) + iPath2 = Path( 1, Range("qry1",6,15), Range("sbj1",6,15), 0.0, 10, 98.7 ) + lPaths = [ iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",1,15), Range("sbj1",1,15), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.mergePathsInList( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInList_withOverlap_reverseOnly( self ): + iPath1 = Path( 1, Range("qry1",10,1), Range("sbj1",10,1), 0.0, 10, 98.7 ) + iPath2 = Path( 1, Range("qry1",15,6), Range("sbj1",15,6), 0.0, 10, 98.7 ) + lPaths = [ iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",15,1), Range("sbj1",15,1), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.mergePathsInList( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInList_withOverlap_directAndReverse( self ): + iPath1 = Path( 1, Range("qry1",10,1), Range("sbj1",10,1), 0.0, 10, 98.7 ) + iPath2 = Path( 1, Range("qry1",15,6), Range("sbj1",15,6), 0.0, 10, 98.7 ) + iPath3 = Path( 1, Range("qry1",2,5), Range("sbj1",2,5), 0.0, 10, 98.7 ) + lPaths = [ iPath3, iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",15,1), Range("sbj1",15,1), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.mergePathsInList( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInList_diffQueries_withOverlap( self ): + iPath1 = Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ) + iPath2 = Path( 2, Range("qry2",6,15), Range("sbj1",6,15), 0.0, 10, 98.7 ) + lPaths = [ iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",1,10), Range("sbj1",1,10), 0.0, 10, 98.7 ), + Path( 2, Range("qry2",6,15), Range("sbj1",6,15), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.mergePathsInList( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInList_nonOverlappingSubjects( self ): + iPath1 = Path( 1, Range("qry1",398,491), Range("sbj1",10,112), 0.0, 10, 98.7 ) + iPath2 = Path( 1, Range("qry1",451,492), Range("sbj1",124,169), 0.0, 10, 98.7 ) + iPath3 = Path( 1, Range("qry1",493,531), Range("sbj1",249,294), 0.0, 10, 98.7 ) + lPaths = [ iPath3, iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",398,491), Range("sbj1",10,112), 0.0, 10, 98.7 ), + Path( 1, Range("qry1",451,492), Range("sbj1",124,169), 0.0, 10, 98.7 ), + Path( 1, Range("qry1",493,531), Range("sbj1",249,294), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.mergePathsInList( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_mergePathsInListUsingQueryCoordsOnly( self ): + iPath1 = Path( 1, Range("qry1",398,491), Range("sbj1",10,112), 0.0, 10, 98.7 ) + iPath2 = Path( 1, Range("qry1",451,492), Range("sbj1",124,169), 0.0, 10, 98.7 ) + iPath3 = Path( 1, Range("qry1",493,531), Range("sbj1",249,294), 0.0, 10, 98.7 ) + lPaths = [ iPath3, iPath2, iPath1 ] + + lExp = [ Path( 1, Range("qry1",398,492), Range("sbj1",10,169), 0.0, 10, 98.7 ), + Path( 1, Range("qry1",493,531), Range("sbj1",249,294), 0.0, 10, 98.7 ) ] + + lObs = PathUtils.mergePathsInListUsingQueryCoordsOnly( lPaths ) + + self.assertEquals( lExp, lObs ) + + + def test_convertPathFileIntoGffFile( self ): + p1 = Path() + p1.setFromTuple( ( "1", "qry1", "12", "1", "sbj1", "1", "12", "0.0", "132", "95.0" ) ) + p2a = Path() + p2a.setFromTuple( ( "2", "qry1", "16", "30", "sbj2", "1", "15", "1e-270", "150", "97.0" ) ) + p2b = Path() + p2b.setFromTuple( ( "2", "qry1", "51", "90", "sbj2", "21", "60", "0.0", "132", "95.9" ) ) + inFile = "dummyInFile" + PathUtils.writeListInFile( [ p1, p2a, p2b ], inFile, "w" ) + + expFile = "dummyExpFile" + expF = open( expFile, "w" ) + expF.write( "qry1\tREPET\tmatch\t1\t12\t0\t-\t.\tID=1;Target=sbj1 1 12\n" ) + expF.write( "qry1\tREPET\tmatch\t16\t90\t0\t+\t.\tID=ms2;Target=sbj2 1 60\n" ) + expF.write( "qry1\tREPET\tmatch_part\t16\t30\t1e-270\t+\t.\tID=mp2-1;Parent=ms2;Target=sbj2 1 15\n" ) + expF.write( "qry1\tREPET\tmatch_part\t51\t90\t0\t+\t.\tID=mp2-2;Parent=ms2;Target=sbj2 21 60\n" ) + expF.close() + + obsFile = "dummyObsFile" + + PathUtils.convertPathFileIntoGffFile( inFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ inFile, expFile, obsFile ]: + os.remove( f ) + + + def test_convertPathFileIntoSetFile( self ): + pathFile = "dummyPathFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + pathFileHandler = open( pathFile, "w" ) + pathFileHandler.write( "3\tchr2\t250\t151\tseq5\t1\t100\t1e-31\t147\t98.3\n" ) + pathFileHandler.close() + + expFile = "dummyExpFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + expFileHandler = open( expFile, "w" ) + expFileHandler.write( "3\tseq5\tchr2\t250\t151\n" ) + expFileHandler.close() + + obsFile = "dummyObsFile_%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() ) + + PathUtils.convertPathFileIntoSetFile( pathFile, obsFile ) + + self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) ) + + for f in [ pathFile, expFile, obsFile ]: + if os.path.exists( f ): + os.remove ( f ) + + + def test_removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(self): + pathFile = "dummyPathFile" + f = open(pathFile, "w") + f.write("1\tG4\t1\t3856\tAtha5Chr4_Pals_Piler_3590_69_MAP_3\t1\t3856\t0\t7642\t99.9741\n") + f.write("1\tG4\t1\t3856\tAtha5Chr4_Pals_Piler_3590_69_MAP_3\t100\t3956\t0\t7642\t99.9741\n") + f.write("2\trooA\t1\t386\tAtha5Chr4_Pals_Piler_3589_69_MAP_3\t1\t386\t6.3e-220\t758\t99.4819\n") + f.write("3\trooA\t7236\t7621\tAtha5Chr4_Pals_Piler_3536_69_MAP_3\t1\t386\t6.3e-220\t758\t99.4819\n") + f.write("4\trooA\t387\t7235\tAtha5Chr4_Pals_Piler_3596_69_MAP_3\t1\t6849\t0\t13580\t99.9854\n") + f.write("5\taurora-element\t4046\t4257\tAtha5Chr4_Pals_Piler_3540_69_MAP_3\t1\t204\t6.1e-80\t300\t96.5686\n") + f.write("6\taurora-element\t274\t381\tAtha5Chr4_Pals_Piler_3595_23_MAP_3\t177\t284\t0\t191\t97.2222\n") + f.write("6\taurora-element\t116\t287\tAtha5Chr4_Pals_Piler_3595_30_MAP_3\t3\t170\t0\t290\t98.8095\n") + f.write("7\taurora-element\t393\t902\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\t1467\t1945\t0\t873\t97.2441\n") + f.write("7\taurora-element\t393\t902\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\t276\t100784\t0\t869\t98.1211\n") + f.write("7\taurora-element\t1387\t2271\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\t276\t10780\t0\t1576\t97.6244\n") + f.write("8\taurora-element\t2486\t2828\tAtha5Chr4_Pals_Piler_3595_50_MAP_3\t4301\t4641\t0\t585\t97.3607\n") + f.write("9\taurora-element\t2265\t2483\tAtha5Chr4_Pals_Piler_3595_62_MAP_3\t3999\t4218\t0\t361\t96.347\n") + f.write("10\taurora-element\t2834\t4045\tAtha5Chr4_Pals_Piler_3595_69_MAP_3\t4800\t6011\t0\t2074\t97.0248\n") + f.write("11\taurora-element\t2\t113\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\t205\t317\t8.5e-37\t157\t93.75\n") + f.write("11\taurora-element\t2\t113\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\t305\t417\t8.5e-37\t157\t93.75\n") + f.write("11\taurora-element\t2\t113\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\t305\t417\t8.5e-37\t157\t93.75\n") + f.close() + + obsPathFile = "obsDummyPathFile" + PathUtils.removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(pathFile, obsPathFile) + + expPathFile = "expDummyPathFile" + f = open(expPathFile, "w") + f.write("1\tG4\t1\t3856\tAtha5Chr4_Pals_Piler_3590_69_MAP_3\t1\t3856\t0\t7642\t99.974100\n") + f.write("2\trooA\t1\t386\tAtha5Chr4_Pals_Piler_3589_69_MAP_3\t1\t386\t6.3e-220\t758\t99.481900\n") + f.write("3\trooA\t7236\t7621\tAtha5Chr4_Pals_Piler_3536_69_MAP_3\t1\t386\t6.3e-220\t758\t99.481900\n") + f.write("4\trooA\t387\t7235\tAtha5Chr4_Pals_Piler_3596_69_MAP_3\t1\t6849\t0\t13580\t99.985400\n") + f.write("5\taurora-element\t4046\t4257\tAtha5Chr4_Pals_Piler_3540_69_MAP_3\t1\t204\t6.1e-80\t300\t96.568600\n") + f.write("6\taurora-element\t274\t381\tAtha5Chr4_Pals_Piler_3595_23_MAP_3\t177\t284\t0\t191\t97.222200\n") + f.write("6\taurora-element\t116\t287\tAtha5Chr4_Pals_Piler_3595_30_MAP_3\t3\t170\t0\t290\t98.809500\n") + f.write("7\taurora-element\t393\t902\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\t1467\t1945\t0\t873\t97.244100\n") + f.write("7\taurora-element\t1387\t2271\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\t276\t10780\t0\t1576\t97.624400\n") + f.write("8\taurora-element\t2486\t2828\tAtha5Chr4_Pals_Piler_3595_50_MAP_3\t4301\t4641\t0\t585\t97.360700\n") + f.write("9\taurora-element\t2265\t2483\tAtha5Chr4_Pals_Piler_3595_62_MAP_3\t3999\t4218\t0\t361\t96.347000\n") + f.write("10\taurora-element\t2834\t4045\tAtha5Chr4_Pals_Piler_3595_69_MAP_3\t4800\t6011\t0\t2074\t97.024800\n") + f.write("11\taurora-element\t2\t113\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\t205\t317\t8.5e-37\t157\t93.750000\n") + f.close() + + self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile)) + + os.remove(pathFile) + os.remove(expPathFile) + os.remove(obsPathFile) + + + def test_getPathListWithoutDuplicatesOnQueryCoord(self): + iPath1 = Path(1, Range("qry1",398,491), Range("sbj1",10,112), 0.0, 10, 98.7) + iPath2 = Path(1, Range("qry1",451,492), Range("sbj1",124,169), 0.0, 10, 98.7) + iPath3 = Path(1, Range("qry1",451,492), Range("sbj1",249,294), 0.0, 10, 98.7) + lPaths = [iPath3, iPath2, iPath1] + + obslPaths = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths) + + explPaths = [iPath1, iPath3] + + self.assertEquals(explPaths, obslPaths) + + + def _makePathListFromTupleList ( self, tupleList ): + pathList = [] + for tuple in tupleList: + path = Path() + path.setFromTuple(tuple) + pathList.append(path) + return pathList + + def _makePathListFromStringList (self, stringList): + pathList = [] + for string in stringList: + path = Path() + path.setFromString(string) + pathList.append(path) + return pathList + + def _show (self, list): + for item in list: + print item.toString() + + +test_suite = unittest.TestSuite() +test_suite.addTest( unittest.makeSuite( Test_PathUtils ) ) +if __name__ == "__main__": + unittest.TextTestRunner(verbosity=2).run( test_suite )