diff TEisotools-1.1.a/commons/core/coord/SetUtils.py @ 16:836ce3d9d47a draft default tip

Uploaded
author urgi-team
date Thu, 21 Jul 2016 07:42:47 -0400
parents 255c852351c5
children
line wrap: on
line diff
--- a/TEisotools-1.1.a/commons/core/coord/SetUtils.py	Thu Jul 21 07:36:44 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,553 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.coord.Set import Set
-
-## Static methods for the manipulation of Set instances
-#
-class SetUtils( object ):
-    
-    ## Change the identifier of each Set instance in the given list
-    #
-    # @param lSets list of Set instances
-    # @param newId new identifier
-    #
-    def changeIdInList(lSets, newId):
-        for iSet in lSets:
-            iSet.id = newId
-            
-    changeIdInList = staticmethod( changeIdInList )
-    
-    ## Return the length of the overlap between two lists of Set instances
-    #
-    # @param lSets1 list of Set instances
-    # @param lSets2 list of Set instances
-    # @return length of overlap
-    # @warning sequence names are supposed to be identical
-    #
-    def getOverlapLengthBetweenLists(lSets1, lSets2):
-        lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)
-        lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)
-        osize = 0
-        i = 0
-        j = 0
-        while i!= len(lSet1Sorted):
-            while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\
-                and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):
-                j+=1
-            jj=j
-            while jj!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[jj]):
-                osize+=lSet1Sorted[i].getOverlapLength(lSet2Sorted[jj])
-                jj+=1
-            i+=1
-        return osize
-    
-    getOverlapLengthBetweenLists = staticmethod( getOverlapLengthBetweenLists )
-    
-    ## Return True if the two lists of Set instances overlap, False otherwise    
-    #
-    # @param lSets1 list of Set instances
-    # @param lSets2 list of Set instances
-    #    
-    def areSetsOverlappingBetweenLists( lSets1, lSets2 ):
-        lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)
-        lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)
-        i=0
-        j=0
-        while i!= len(lSet1Sorted):
-            while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\
-                and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):
-                j+=1
-            if j!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[j]):
-                return True
-            i+=1
-        return False
-    
-    areSetsOverlappingBetweenLists = staticmethod( areSetsOverlappingBetweenLists )
-    
-    ## Merge all overlapping Set instances between two lists of Set and give the next identifier 
-    #
-    # @param lSets1 list of Set instances
-    # @param lSets2 list of Set instances
-    # @param max_id start id value for inserting new Set
-    # @return a new list of the merged Set instances and the next identifier
-    # 
-    def getListOfMergedSetsAndNextId(lSets1, lSets2, max_id=0):
-        lSets_merged = []
-        list2merge = SetUtils.getListOfIdListOfOverlappingSets ( lSets1,lSets2 )
-        idlist1 = SetUtils.getDictOfListsWithIdAsKey(lSets1)
-        idlist2 = SetUtils.getDictOfListsWithIdAsKey(lSets2)
-        if max_id == 0:
-            max_id = max(idlist1.keys()) + 1
-        for i in list2merge:
-            if i == []:
-                continue
-            l = []
-            min_id = max(i)
-            for j in i:
-                if j>0:
-                    if min_id>j:
-                        min_id=j
-                    l.extend(idlist1[j])
-                    del idlist1[j]
-                else:
-                    l.extend(idlist2[j*-1])
-                    del idlist2[j*-1]
-            l = SetUtils.mergeSetsInList(l)
-            SetUtils.changeIdInList(l, min_id)
-            lSets_merged.extend(l)
-        for id, alist in idlist1.items():
-            lSets_merged.extend(alist)
-        for id,alist in idlist2.items():
-            SetUtils.changeIdInList(alist,max_id)
-            lSets_merged.extend(alist)
-            max_id+=1
-        return lSets_merged, max_id
-    
-    getListOfMergedSetsAndNextId = staticmethod ( getListOfMergedSetsAndNextId )
-    
-#    ## Concatenate two Set instance lists and give the next identifier 
-#    #
-#    # @param lSets1 list of Set instances
-#    # @param lSets2 list of Set instances
-#    # @param maxId start id value for inserting new Set
-#    # @return a new list of Set instances and the next identifier
-#    # 
-#    @staticmethod
-#    def getSetsListOfTwoConcatenatedSetsListAndNextId(lSets1, lSets2, maxId = 0):
-#        lOutSets = lSets1
-#        dId2SetsList2 = SetUtils.getDictOfListsWithIdAsKey(lSets2)
-#        if maxId == 0:
-#            dId2SetsList1 = SetUtils.getDictOfListsWithIdAsKey(lSets1)
-#            maxId = max(dId2SetsList1.keys())
-#        for lSets in dId2SetsList2.values():
-#            SetUtils.changeIdInList(lSets, maxId)
-#            lOutSets.extend(lSets)
-#            maxId += 1
-#        return lOutSets, maxId
-    
-    ## Return the sum of the length of each Set instance in the given list
-    #
-    # @param lSets: list of Set instances
-    #
-    def getCumulLength(lSets):
-        length = 0
-        for i in lSets:
-            length += i.getLength()
-        return length
-    
-    getCumulLength = staticmethod( getCumulLength )
-    
-    ## Return a tuple with min and max coordinates of Set instances in the given list
-    #
-    # @param lSets list of Set instances
-    # 
-    def getListBoundaries(lSets):
-        qmin = -1
-        qmax = -1
-        for iSet in lSets:
-            if qmin == -1:
-                qmin = iSet.start
-            qmin = min(qmin, iSet.getMin())
-            qmax = max(qmax, iSet.getMax())
-        return (qmin, qmax)
-    
-    getListBoundaries = staticmethod( getListBoundaries )
-    
-    ## Show Set instances contained in the given list
-    #
-    # @param lSets list of Set instances
-    # 
-    def showList(lSets):
-        for iSet in lSets:
-            iSet.show()
-            
-    showList = staticmethod( showList )
-    
-    ## Write Set instances contained in the given list
-    #
-    # @param lSets list of Set instances
-    # @param fileName a file name
-    # @param mode the open mode of the file '"w"' or '"a"' 
-    #
-    def writeListInFile(lSets, fileName, mode="w"):
-        fileHandler = open(fileName, mode)
-        for iSet in lSets:
-            iSet.write(fileHandler)
-        fileHandler.close()
-        
-    writeListInFile = staticmethod( writeListInFile )
-    
-    ## Split a Set list in several Set lists according to the identifier
-    #
-    # @param lSets list of Set instances
-    # @return a dictionary which keys are identifiers and values Set lists
-    #
-    def getDictOfListsWithIdAsKey(lSets):
-        dId2SetList = {}
-        for iSet in lSets:
-            if dId2SetList.has_key(iSet.id):
-                dId2SetList[iSet.id].append(iSet)
-            else:
-                dId2SetList[iSet.id] = [iSet]
-        return dId2SetList
-    
-    getDictOfListsWithIdAsKey = staticmethod( getDictOfListsWithIdAsKey )
-    
-    
-    ## Split a Set list in several Set lists according to the identifier
-    #
-    # @param lSets list of Set instances
-    # @return a dictionary which keys are identifiers and values Set lists
-    #
-    def getDictOfListsWithIdAsKeyFromFile( setFile ):
-        dId2SetList = {}
-        setFileHandler = open( setFile, "r" )
-        while True:
-            line = setFileHandler.readline()
-            if line == "":
-                break
-            iSet = Set()
-            iSet.setFromTuple( line[:-1].split("\t") )
-            if not dId2SetList.has_key( iSet.id ):
-                dId2SetList[ iSet.id ] = []
-            dId2SetList[ iSet.id ].append( iSet )
-        setFileHandler.close()
-        return dId2SetList
-    
-    getDictOfListsWithIdAsKeyFromFile = staticmethod( getDictOfListsWithIdAsKeyFromFile )
-    
-    
-    ## Return a Map list from the given Set List
-    #
-    # @param lSets list of Set instances
-    # 
-    def getMapListFromSetList(lSets):
-        lMaps = []
-        for iSet in lSets:
-            lMaps.append(iSet.set2map())
-        return lMaps
-    
-    getMapListFromSetList = staticmethod( getMapListFromSetList )
-    
-    ## Construct a Set list from a Map list
-    #
-    # @param lMaps list of Map instances
-    # 
-    def getSetListFromMapList(lMaps):
-        lSets = []
-        c = 0
-        for iMap in lMaps:
-            c += 1
-            lSets.append( Set(c, iMap.name, iMap.seqname, iMap.start, iMap.end) )
-        return lSets
-    
-    getSetListFromMapList = staticmethod( getSetListFromMapList )
-    
-    ## Merge all overlapping Set instances in a list without considering the identifiers.
-    #  Start by sorting Set instances by their increasing Min coordinate.
-    #
-    # @return: a new list of the merged Set instances
-    #
-    def mergeSetsInList(lSets):
-        l=[]
-        if len(lSets)==0:
-            return l
-        
-        lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenInvLength( lSets )
-        
-        prev_count = 0
-        for iSet in lSortedSets[0:]:
-            if prev_count != len(lSortedSets):
-                for i in lSortedSets[ prev_count + 1: ]:
-                    if iSet.isOverlapping( i ):
-                        iSet.merge( i )
-                IsAlreadyInList = False
-                for newSet in l:
-                    if newSet.isOverlapping( iSet ):
-                        IsAlreadyInList = True
-                        newSet.merge( iSet )
-                        l [ l.index( newSet ) ] = newSet
-                if not IsAlreadyInList:
-                    l.append( iSet )
-                prev_count += 1
-        return l
-    
-    mergeSetsInList = staticmethod( mergeSetsInList )
-    
-    ## Unjoin a Set list according to another
-    #
-    # @param lToKeep: a list of Set instances to keep 
-    # @param lToUnjoin: a list of Set instances to unjoin
-    # @return: lToUnjoin split in several list
-    #    
-    def getSetListUnjoined(lToKeep, lToUnjoin):
-        lSortedToKeep = SetUtils.getSetListSortedByIncreasingMinThenMax( lToKeep )
-        lSortedToUnjoin = SetUtils.getSetListSortedByIncreasingMinThenMax( lToUnjoin )
-        if lSortedToUnjoin == []:
-            return []
-        if lSortedToKeep == []:
-            return [ lSortedToUnjoin ]
-        
-        i=0
-        resultListSet=[]
-        while i<len(lSortedToKeep):
-            j1=0
-            while j1<len(lSortedToUnjoin) and lSortedToKeep[i].getMin() > lSortedToUnjoin[j1].getMax():
-                j1+=1
-            if j1==len(lSortedToUnjoin):
-                break
-            if j1!=0:
-                resultListSet.append(lSortedToUnjoin[:j1])
-                del lSortedToUnjoin[:j1]
-                j1=0
-            if i+1==len(lSortedToKeep):
-                break
-            j2=j1
-            if j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax():
-                while j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax():
-                    j2+=1
-                resultListSet.append(lSortedToUnjoin[j1:j2])
-                del lSortedToUnjoin[j1:j2]
-            i+=1
-    
-        if resultListSet!=[] or i == 0:
-            resultListSet.append(lSortedToUnjoin)
-        return resultListSet
-    
-    getSetListUnjoined = staticmethod(getSetListUnjoined)
-      
-    ## Return new list of Set instances with no duplicate
-    #
-    # @param lSets list of Set instances
-    #
-    def getSetListWithoutDuplicates( lSets ):
-        if len(lSets) < 2:
-            return lSets
-        lSortedSet = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets )
-        lUniqSet = [ lSortedSet[0] ]
-        for iSet in lSortedSet[1:]:
-            if iSet != lUniqSet[-1]:
-                lUniqSet.append( iSet )
-        return lUniqSet
-    
-    getSetListWithoutDuplicates = staticmethod( getSetListWithoutDuplicates )
-    
-    ## Return a list of Set instances sorted in increasing order according to the Min, then the Max, and finally their initial order
-    #
-    # @param lSets: list of Set instances
-    #
-    def getSetListSortedByIncreasingMinThenMax( lSets ):
-        return sorted( lSets, key=lambda iSet: ( iSet.getMin(), iSet.getMax() ) )
-    
-    getSetListSortedByIncreasingMinThenMax = staticmethod( getSetListSortedByIncreasingMinThenMax )
-    
-    ## Return a list of Set instances sorted in increasing order according to the min, then the inverse of the length, and finally their initial order
-    #
-    # @param lSets: list of Set instances
-    #
-    def getSetListSortedByIncreasingMinThenInvLength( lSets ):
-        return sorted( lSets, key=lambda iSet: ( iSet.getMin(), 1 / float(iSet.getLength()) ) )
-    
-    getSetListSortedByIncreasingMinThenInvLength = staticmethod( getSetListSortedByIncreasingMinThenInvLength )
- 
-    ## Return a list of Set instances sorted in increasing order according to the SeqName, then the Name, then the Min, then the Max and finally their initial order
-    #
-    # @param lSets: list of Set instances
-    #   
-    def getSetListSortedBySeqThenRegionThenMinThenMax(lSets):
-        return sorted(lSets, key=lambda iSet: (iSet.getSeqname(), iSet.getName(), iSet.getMin(), iSet.getMax()))
-    
-    getSetListSortedBySeqThenRegionThenMinThenMax = staticmethod(getSetListSortedBySeqThenRegionThenMinThenMax)
-    
-    ## Return a list of identifier lists of overlapping Sets from the subject list, according to the reference list
-    #
-    # @param lRef list of Set instances
-    # @param lSubject list of Set instances
-    #
-    def getListOfIdListOfOverlappingSets(lRef,lSubject):
-        lSortedRef = SetUtils.getSetListSortedByIncreasingMinThenMax( lRef )
-        lSortedSubject = SetUtils.getSetListSortedByIncreasingMinThenMax( lSubject )
-
-        lOverlappingSet = []
-        lOverlappingSetCounter = 0
-
-        id2LOverlappingSet_pos = {}
-    
-        i = 0
-        j = 0
-        while i!= len(lSortedRef):
-            while j!= len(lSortedSubject) and lSortedRef[i].getMin()>lSortedSubject[j].getMax()\
-                and not(lSortedRef[i].isOverlapping(lSortedSubject[j])\
-                      and lSortedRef[i].isOnDirectStrand()==lSortedSubject[j].isOnDirectStrand()):
-                j+=1
-            jj=j
-            while jj!= len(lSortedSubject) and lSortedRef[i].isOverlapping(lSortedSubject[jj])\
-                  and lSortedRef[i].isOnDirectStrand()==lSortedSubject[jj].isOnDirectStrand():
-                id1=lSortedRef[i].id
-                id2=lSortedSubject[jj].id*-1
-                if id2LOverlappingSet_pos.has_key(id1) \
-                   and not id2LOverlappingSet_pos.has_key(id2):
-                    lOverlappingSet[id2LOverlappingSet_pos[id1]].append(id2)
-                    id2LOverlappingSet_pos[id2]=id2LOverlappingSet_pos[id1]
-                if id2LOverlappingSet_pos.has_key(id2) \
-                   and not id2LOverlappingSet_pos.has_key(id1):
-                    lOverlappingSet[id2LOverlappingSet_pos[id2]].append(id1)
-                    id2LOverlappingSet_pos[id1]=id2LOverlappingSet_pos[id2]
-                if not id2LOverlappingSet_pos.has_key(id2) \
-                   and not id2LOverlappingSet_pos.has_key(id1):
-                    lOverlappingSet.append([id1,id2])
-                    id2LOverlappingSet_pos[id1]=lOverlappingSetCounter
-                    id2LOverlappingSet_pos[id2]=lOverlappingSetCounter
-                    lOverlappingSetCounter+=1
-                jj+=1
-            i+=1
-    
-        return lOverlappingSet
-    
-    getListOfIdListOfOverlappingSets = staticmethod (getListOfIdListOfOverlappingSets)
-    
-    ## Return a list of sets without overlapping between two lists of sets
-    #
-    # @param lSet1 and lSet2 
-    #
-    def getListOfSetWithoutOverlappingBetweenTwoListOfSet(lSet1, lSet2):
-        for i in lSet1:
-            for idx,j in enumerate(lSet2):
-                n=j.diff(i)
-                if not n.isEmpty() and n.getLength()>=20:
-                    lSet2.append(n)
-        lSet2WithoutOverlaps=[]
-        for i in lSet2:
-            if not i.isEmpty() and i.getLength()>=20:
-                lSet2WithoutOverlaps.append(i)
-        return lSet2WithoutOverlaps
-        
-    getListOfSetWithoutOverlappingBetweenTwoListOfSet = staticmethod (getListOfSetWithoutOverlappingBetweenTwoListOfSet)
-
-    ## Return a Set list from a Set file
-    #
-    # @param setFile string name of a Set file
-    # @return a list of Set instances
-    #
-    def getSetListFromFile( setFile ):
-        lSets = []
-        setFileHandler = open( setFile, "r" )
-        while True:
-            line = setFileHandler.readline()
-            if line == "":
-                break
-            iSet = Set()
-            iSet.setFromString( line )
-            lSets.append( iSet )
-        setFileHandler.close()
-        return lSets
-    
-    getSetListFromFile = staticmethod( getSetListFromFile )
-    
-    
-    def convertSetFileIntoMapFile( setFile, mapFile ):
-        setFileHandler = open( setFile, "r" )
-        mapFileHandler = open( mapFile, "w" )
-        iSet = Set()
-        while True:
-            line = setFileHandler.readline()
-            if line == "":
-                break
-            iSet.setFromString( line )
-            iMap = iSet.getMapInstance()
-            iMap.write( mapFileHandler )
-        setFileHandler.close()
-        mapFileHandler.close()
-        
-    convertSetFileIntoMapFile = staticmethod( convertSetFileIntoMapFile )
-
-
-    def getDictOfListsWithSeqnameAsKey( lSets ):
-        dSeqnamesToSetList = {}
-        for iSet in lSets:
-            if not dSeqnamesToSetList.has_key( iSet.seqname ):
-                dSeqnamesToSetList[ iSet.seqname ] = []
-            dSeqnamesToSetList[ iSet.seqname ].append( iSet )
-        return dSeqnamesToSetList
-    
-    getDictOfListsWithSeqnameAsKey = staticmethod( getDictOfListsWithSeqnameAsKey )
-    
-    
-    def filterOnLength( lSets, minLength=0, maxLength=10000000000 ):
-        if minLength == 0 and maxLength == 0:
-            return lSets
-        lFiltered = []
-        for iSet in lSets:
-            if minLength <= iSet.getLength() <= maxLength:
-                lFiltered.append( iSet )
-        return lFiltered
-    
-    filterOnLength = staticmethod( filterOnLength )
-    
-    
-    def getListOfNames( setFile ):
-        lNames = []
-        setFileHandler = open( setFile, "r" )
-        iSet = Set()
-        while True:
-            line = setFileHandler.readline()
-            if line == "":
-                break
-            iSet.setFromTuple( line[:-1].split("\t") )
-            if iSet.name not in lNames:
-                lNames.append( iSet.name )
-        setFileHandler.close()
-        return lNames
-    
-    getListOfNames = staticmethod( getListOfNames )
-
-
-    def getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile ):
-        dNames2DictsId = {}
-        setFileHandler = open( setFile, "r" )
-        while True:
-            line = setFileHandler.readline()
-            if line == "":
-                break
-            iSet = Set()
-            iSet.setFromTuple( line[:-1].split("\t") )
-            if not dNames2DictsId.has_key( iSet.name ):
-                dNames2DictsId[ iSet.name ] = { iSet.id: [ iSet ] }
-            else:
-                if not dNames2DictsId[ iSet.name ].has_key( iSet.id ):
-                    dNames2DictsId[ iSet.name ][ iSet.id ] = [ iSet ]
-                else:
-                    dNames2DictsId[ iSet.name ][ iSet.id ].append( iSet )
-        setFileHandler.close()
-        return dNames2DictsId
-    
-    getDictOfDictsWithNamesThenIdAsKeyFromFile = staticmethod( getDictOfDictsWithNamesThenIdAsKeyFromFile )