| 13 | 1 # Copyright INRA (Institut National de la Recherche Agronomique) | 
|  | 2 # http://www.inra.fr | 
|  | 3 # http://urgi.versailles.inra.fr | 
|  | 4 # | 
|  | 5 # This software is governed by the CeCILL license under French law and | 
|  | 6 # abiding by the rules of distribution of free software.  You can  use, | 
|  | 7 # modify and/ or redistribute the software under the terms of the CeCILL | 
|  | 8 # license as circulated by CEA, CNRS and INRIA at the following URL | 
|  | 9 # "http://www.cecill.info". | 
|  | 10 # | 
|  | 11 # As a counterpart to the access to the source code and  rights to copy, | 
|  | 12 # modify and redistribute granted by the license, users are provided only | 
|  | 13 # with a limited warranty  and the software's author,  the holder of the | 
|  | 14 # economic rights,  and the successive licensors  have only  limited | 
|  | 15 # liability. | 
|  | 16 # | 
|  | 17 # In this respect, the user's attention is drawn to the risks associated | 
|  | 18 # with loading,  using,  modifying and/or developing or reproducing the | 
|  | 19 # software by the user in light of its specific status of free software, | 
|  | 20 # that may mean  that it is complicated to manipulate,  and  that  also | 
|  | 21 # therefore means  that it is reserved for developers  and  experienced | 
|  | 22 # professionals having in-depth computer knowledge. Users are therefore | 
|  | 23 # encouraged to load and test the software's suitability as regards their | 
|  | 24 # requirements in conditions enabling the security of their systems and/or | 
|  | 25 # data to be ensured and,  more generally, to use and operate it in the | 
|  | 26 # same conditions as regards security. | 
|  | 27 # | 
|  | 28 # The fact that you are presently reading this means that you have had | 
|  | 29 # knowledge of the CeCILL license and that you accept its terms. | 
|  | 30 | 
|  | 31 | 
|  | 32 import os | 
|  | 33 import sys | 
|  | 34 import copy | 
|  | 35 from commons.core.coord.Map import Map | 
|  | 36 from commons.core.coord.Path import Path | 
|  | 37 from commons.core.coord.Align import Align | 
|  | 38 from commons.core.coord.Range import Range | 
|  | 39 from commons.core.coord.SetUtils import SetUtils | 
|  | 40 from commons.core.coord.AlignUtils import AlignUtils | 
|  | 41 from commons.core.checker.RepetException import RepetDataException | 
|  | 42 | 
|  | 43 ## Static methods for the manipulation of Path instances | 
|  | 44 # | 
|  | 45 class PathUtils ( object ): | 
|  | 46 | 
|  | 47     ## Change the identifier of each Set instance in the given list | 
|  | 48     # | 
|  | 49     # @param lPaths list of Path instances | 
|  | 50     # @param newId new identifier | 
|  | 51     # | 
|  | 52     @staticmethod | 
|  | 53     def changeIdInList(lPaths, newId): | 
|  | 54         for iPath in lPaths: | 
|  | 55             iPath.id = newId | 
|  | 56 | 
|  | 57 | 
|  | 58     ## Return a list of Set instances containing the query range from a list of Path instances | 
|  | 59     # | 
|  | 60     # @param lPaths a list of Path instances | 
|  | 61     # | 
|  | 62     @staticmethod | 
|  | 63     def getSetListFromQueries(lPaths): | 
|  | 64         lSets = [] | 
|  | 65         for iPath in lPaths: | 
|  | 66             lSets.append( iPath.getSubjectAsSetOfQuery() ) | 
|  | 67         return lSets | 
|  | 68 | 
|  | 69 | 
|  | 70     ## Return a list of Set instances containing the subject range from a list of Path instances | 
|  | 71     # | 
|  | 72     # @param lPaths a list of Path instances | 
|  | 73     # | 
|  | 74     @staticmethod | 
|  | 75     def getSetListFromSubjects(lPaths): | 
|  | 76         lSets = [] | 
|  | 77         for iPath in lPaths: | 
|  | 78             lSets.append( iPath.getQuerySetOfSubject() ) | 
|  | 79         return lSets | 
|  | 80 | 
|  | 81 | 
|  | 82     ## Return a sorted list of Range instances containing the subjects from a list of Path instances | 
|  | 83     # | 
|  | 84     # @param lPaths a list of Path instances | 
|  | 85     # @note meaningful only if all Path instances have same identifier | 
|  | 86     # | 
|  | 87     @staticmethod | 
|  | 88     def getRangeListFromSubjects( lPaths ): | 
|  | 89         lRanges = [] | 
|  | 90         for iPath in lPaths: | 
|  | 91             lRanges.append( iPath.range_subject ) | 
|  | 92         if lRanges[0].isOnDirectStrand(): | 
|  | 93             return sorted( lRanges, key=lambda iRange: ( iRange.getMin(), iRange.getMax() ) ) | 
|  | 94         else: | 
|  | 95             return sorted( lRanges, key=lambda iRange: ( iRange.getMax(), iRange.getMin() ) ) | 
|  | 96 | 
|  | 97 | 
|  | 98     ## Return a tuple with min and max of query coordinates from Path instances in the given list | 
|  | 99     # | 
|  | 100     # @param lPaths a list of Path instances | 
|  | 101     # | 
|  | 102     @staticmethod | 
|  | 103     def getQueryMinMaxFromPathList(lPaths): | 
|  | 104         qmin = -1 | 
|  | 105         qmax = -1 | 
|  | 106         for iPath in lPaths: | 
|  | 107             if qmin == -1: | 
|  | 108                 qmin = iPath.range_query.start | 
|  | 109             qmin = min(qmin, iPath.range_query.getMin()) | 
|  | 110             qmax = max(qmax, iPath.range_query.getMax()) | 
|  | 111         return (qmin, qmax) | 
|  | 112 | 
|  | 113 | 
|  | 114     ## Return a tuple with min and max of subject coordinates from Path instances in the given list | 
|  | 115     # | 
|  | 116     # @param lPaths lists of Path instances | 
|  | 117     # | 
|  | 118     @staticmethod | 
|  | 119     def getSubjectMinMaxFromPathList(lPaths): | 
|  | 120         smin = -1 | 
|  | 121         smax = -1 | 
|  | 122         for iPath in lPaths: | 
|  | 123             if smin == -1: | 
|  | 124                 smin = iPath.range_subject.start | 
|  | 125             smin = min(smin, iPath.range_subject.getMin()) | 
|  | 126             smax = max(smax, iPath.range_subject.getMax()) | 
|  | 127         return (smin, smax) | 
|  | 128 | 
|  | 129 | 
|  | 130     ## Returns a Path objects list where Paths query coordinates overlapping with | 
|  | 131     # any Path in a list are removed. | 
|  | 132     # | 
|  | 133     # WARNING: input Path lists are modified (sort) | 
|  | 134     # | 
|  | 135     # @param lRefPaths list of paths to check overlaps | 
|  | 136     # @param lPathsToClean list of paths to remove overlapping Paths on query coordinates | 
|  | 137     # @return path list | 
|  | 138     @staticmethod | 
|  | 139     def removeOverlappingPathsOnQueriesBetweenPathLists(lRefPaths, lPathsToClean): | 
|  | 140         if not lRefPaths: | 
|  | 141             print "WARNING: empty reference Paths list" | 
|  | 142             return lPathsToClean | 
|  | 143 | 
|  | 144         lRefQueries = PathUtils.getListOfDistinctQueryNames(lRefPaths) | 
|  | 145         lToCleanQueries = PathUtils.getListOfDistinctQueryNames(lPathsToClean) | 
|  | 146 | 
|  | 147         lCommonQueries = list(set(lRefQueries) & set(lToCleanQueries)) | 
|  | 148         lCommonQueries.sort() | 
|  | 149         lSpecificToCleanQueries = list(set(lToCleanQueries) - set(lCommonQueries)) | 
|  | 150         lSpecificToCleanQueries.sort() | 
|  | 151 | 
|  | 152         lRefPaths.sort(key=lambda iPath: (iPath.getQueryName(), iPath.getIdentifier(), iPath.getQueryMin(), iPath.getQueryMax())) | 
|  | 153         lPathsToClean.sort(key=lambda iPath: (iPath.getQueryName(), iPath.getIdentifier(), iPath.getQueryMin(), iPath.getQueryMax())) | 
|  | 154 | 
|  | 155         lCleanedPaths = [] | 
|  | 156         lSpecificToCleanQueries = list(set(lToCleanQueries) - set(lCommonQueries)) | 
|  | 157         lCleanedPaths.extend(PathUtils.extractPathsFromQueryNameList(lPathsToClean, lSpecificToCleanQueries)) | 
|  | 158 | 
|  | 159         dRefQueryToPathList = PathUtils.getDictOfListsWithQueryNameAsKey(lRefPaths) | 
|  | 160         dToCleanQueryToPathList = PathUtils.getDictOfListsWithQueryNameAsKey(lPathsToClean) | 
|  | 161 | 
|  | 162         for queryName in lCommonQueries: | 
|  | 163 | 
|  | 164             refQueryHash = PathUtils.getDictOfListsWithIdAsKey(dRefQueryToPathList[queryName]) | 
|  | 165             toCleanQueryHash = PathUtils.getDictOfListsWithIdAsKey(dToCleanQueryToPathList[queryName]) | 
|  | 166 | 
|  | 167             for lCleanPathById in toCleanQueryHash.values(): | 
|  | 168                 isOverlapping = False | 
|  | 169 | 
|  | 170                 for lRefPathById in refQueryHash.values(): | 
|  | 171                     if PathUtils.areQueriesOverlappingBetweenPathLists(lRefPathById, lCleanPathById, areListsAlreadySort = True): | 
|  | 172                         isOverlapping = True | 
|  | 173                         break | 
|  | 174 | 
|  | 175                 if not isOverlapping: | 
|  | 176                     lCleanedPaths.extend(lCleanPathById) | 
|  | 177 | 
|  | 178         return lCleanedPaths | 
|  | 179 | 
|  | 180 | 
|  | 181     ## Return True if the query range of any Path instance from the first list overlaps with the query range of any Path instance from the second list | 
|  | 182     # | 
|  | 183     #  @param lPaths1: list of Path instances | 
|  | 184     #  @param lPaths2: list of Path instances | 
|  | 185     #  @return boolean | 
|  | 186     # | 
|  | 187     @staticmethod | 
|  | 188     def areQueriesOverlappingBetweenPathLists( lPaths1, lPaths2, areListsAlreadySort = False): | 
|  | 189         if not areListsAlreadySort: | 
|  | 190             lSortedPaths1 = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths1 ) | 
|  | 191             lSortedPaths2 = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths2 ) | 
|  | 192         else: | 
|  | 193             lSortedPaths1 = lPaths1 | 
|  | 194             lSortedPaths2 = lPaths2 | 
|  | 195         i = 0 | 
|  | 196         j = 0 | 
|  | 197         while i != len(lSortedPaths1): | 
|  | 198             j = 0 | 
|  | 199             while j != len(lSortedPaths2): | 
|  | 200                 if not lSortedPaths1[i].range_query.isOverlapping( lSortedPaths2[j].range_query ): | 
|  | 201                     j += 1 | 
|  | 202                 else: | 
|  | 203                     return True | 
|  | 204             i += 1 | 
|  | 205         return False | 
|  | 206 | 
|  | 207 | 
|  | 208     ## Show Path instances contained in the given list | 
|  | 209     # | 
|  | 210     # @param lPaths a list of Path instances | 
|  | 211     # | 
|  | 212     @staticmethod | 
|  | 213     def showList(lPaths): | 
|  | 214         for iPath in lPaths: | 
|  | 215             iPath.show() | 
|  | 216 | 
|  | 217 | 
|  | 218     ## Write Path instances contained in the given list | 
|  | 219     # | 
|  | 220     # @param lPaths a list of Path instances | 
|  | 221     # @param fileName name of the file to write the Path instances | 
|  | 222     # @param mode the open mode of the file ""w"" or ""a"" | 
|  | 223     # | 
|  | 224     @staticmethod | 
|  | 225     def writeListInFile(lPaths, fileName, mode="w"): | 
|  | 226         AlignUtils.writeListInFile(lPaths, fileName, mode) | 
|  | 227 | 
|  | 228 | 
|  | 229     ## Return new list of Path instances with no duplicate | 
|  | 230     # | 
|  | 231     # @param lPaths a list of Path instances | 
|  | 232     # @param useOnlyCoord boolean if True, check only coordinates and sequence names | 
|  | 233     # @return lUniqPaths a path instances list | 
|  | 234     # | 
|  | 235     @staticmethod | 
|  | 236     def getPathListWithoutDuplicates(lPaths, useOnlyCoord = False): | 
|  | 237         if len(lPaths) < 2: | 
|  | 238             return lPaths | 
|  | 239         lSortedPaths = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier( lPaths ) | 
|  | 240         lUniqPaths = [ lSortedPaths[0] ] | 
|  | 241         if useOnlyCoord: | 
|  | 242             for iPath in lSortedPaths[1:]: | 
|  | 243                 if iPath.range_query.start != lUniqPaths[-1].range_query.start \ | 
|  | 244                 or iPath.range_query.end != lUniqPaths[-1].range_query.end \ | 
|  | 245                 or iPath.range_query.seqname != lUniqPaths[-1].range_query.seqname \ | 
|  | 246                 or iPath.range_subject.start != lUniqPaths[-1].range_subject.start \ | 
|  | 247                 or iPath.range_subject.end != lUniqPaths[-1].range_subject.end \ | 
|  | 248                 or iPath.range_subject.seqname != lUniqPaths[-1].range_subject.seqname: | 
|  | 249                     lUniqPaths.append( iPath ) | 
|  | 250         else: | 
|  | 251             for iPath in lSortedPaths[1:]: | 
|  | 252                 if iPath != lUniqPaths[-1]: | 
|  | 253                     lUniqPaths.append( iPath ) | 
|  | 254         return lUniqPaths | 
|  | 255 | 
|  | 256 | 
|  | 257     @staticmethod | 
|  | 258     def getPathListWithoutDuplicatesOnQueryCoord(lPaths): | 
|  | 259         if len(lPaths) < 2: | 
|  | 260             return lPaths | 
|  | 261         lSortedPaths = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier( lPaths ) | 
|  | 262         lUniqPaths = [ lSortedPaths[0] ] | 
|  | 263         for iPath in lSortedPaths[1:]: | 
|  | 264             if iPath.range_query.start != lUniqPaths[-1].range_query.start \ | 
|  | 265             or iPath.range_query.end != lUniqPaths[-1].range_query.end \ | 
|  | 266             or iPath.range_query.seqname != lUniqPaths[-1].range_query.seqname: | 
|  | 267                 lUniqPaths.append( iPath ) | 
|  | 268         return lUniqPaths | 
|  | 269 | 
|  | 270 | 
|  | 271     ##  Split a Path list in several Path lists according to the identifier | 
|  | 272     # | 
|  | 273     #  @param lPaths a list of Path instances | 
|  | 274     #  @return a dictionary which keys are identifiers and values Path lists | 
|  | 275     # | 
|  | 276     @staticmethod | 
|  | 277     def getDictOfListsWithIdAsKey(lPaths): | 
|  | 278         dId2PathList = dict((ident, []) for ident in PathUtils.getListOfDistinctIdentifiers(lPaths)) | 
|  | 279         for iPath in lPaths: | 
|  | 280             dId2PathList[iPath.id].append(iPath) | 
|  | 281         return dId2PathList | 
|  | 282 | 
|  | 283 | 
|  | 284     ##  Split a Path list in several Path lists according to the query name | 
|  | 285     # | 
|  | 286     #  @param lPaths a list of Path instances | 
|  | 287     #  @return a dictionary which keys are query_names and values Path lists | 
|  | 288     # | 
|  | 289     @staticmethod | 
|  | 290     def getDictOfListsWithQueryNameAsKey(lPaths): | 
|  | 291         dId2PathList = dict((qn, []) for qn in PathUtils.getListOfDistinctQueryNames(lPaths)) | 
|  | 292         for iPath in lPaths: | 
|  | 293             dId2PathList[iPath.getQueryName()].append(iPath) | 
|  | 294         return dId2PathList | 
|  | 295 | 
|  | 296 | 
|  | 297     ##  Split a Path file in several Path lists according to the identifier | 
|  | 298     # | 
|  | 299     #  @param pathFile name of the input Path file | 
|  | 300     #  @return a dictionary which keys are identifiers and values Path lists | 
|  | 301     # | 
|  | 302     @staticmethod | 
|  | 303     def getDictOfListsWithIdAsKeyFromFile( pathFile ): | 
|  | 304         dId2PathList = {} | 
|  | 305         pathFileHandler = open(pathFile, "r") | 
|  | 306         for line in pathFileHandler: | 
|  | 307             iPath = Path() | 
|  | 308             iPath.setFromString(line) | 
|  | 309             if dId2PathList.has_key(iPath.id): | 
|  | 310                 dId2PathList[ iPath.id ].append(iPath) | 
|  | 311             else: | 
|  | 312                 dId2PathList[ iPath.id ] = [ iPath ] | 
|  | 313         pathFileHandler.close() | 
|  | 314         return dId2PathList | 
|  | 315 | 
|  | 316 | 
|  | 317     ## Return a list of Path list(s) obtained while splitting a list of connected Path instances according to another based on query coordinates | 
|  | 318     #  Only the path instance of lToKeep between path instance of lToUnjoin are used to split lToUnjoin | 
|  | 319     # @param lToKeep: a list of Path instances to keep (reference) | 
|  | 320     # @param lToUnjoin: a list of Path instances to unjoin | 
|  | 321     # @return: list of Path list(s) (can be empty if one of the input lists is empty) | 
|  | 322     # @warning: all the path instances in a given list MUST be connected (i.e. same identifier) | 
|  | 323     # @warning: if the path instances in a given list overlap neither within each other nor with the Path instances of the other list, these path instances are not used to split the lToUnjoin | 
|  | 324     # | 
|  | 325     @staticmethod | 
|  | 326     def getPathListUnjoinedBasedOnQuery( lToKeep, lToUnjoin ): | 
|  | 327         lSortedToKeep = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lToKeep ) | 
|  | 328         length_lSortedToKeep = len(lSortedToKeep) | 
|  | 329 #        PathUtils.showList(lSortedToKeep) | 
|  | 330         lSortedToUnjoin = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lToUnjoin ) | 
|  | 331 #        PathUtils.showList(lSortedToUnjoin) | 
|  | 332         length_lSortedToUnjoin = len(lSortedToUnjoin) | 
|  | 333         if lToUnjoin == []: | 
|  | 334             return [] | 
|  | 335         if lToKeep == []: | 
|  | 336             return [ lToUnjoin ] | 
|  | 337 | 
|  | 338         lLists = [] | 
|  | 339         k = 0 | 
|  | 340         while k < length_lSortedToKeep: | 
|  | 341             j1 = 0 | 
|  | 342             while j1 < length_lSortedToUnjoin and lSortedToKeep[k].range_query.getMin() > lSortedToUnjoin[j1].range_query.getMax(): | 
|  | 343                 j1 += 1 | 
|  | 344             if j1 == length_lSortedToUnjoin: | 
|  | 345                 break | 
|  | 346             if j1 != 0: | 
|  | 347                 lLists.append( lSortedToUnjoin[:j1] ) | 
|  | 348                 del lSortedToUnjoin[:j1] | 
|  | 349                 j1 = 0 | 
|  | 350             if k+1 == len(lSortedToKeep): | 
|  | 351                 break | 
|  | 352             j2 = j1 | 
|  | 353             minQueryOf_lSortedToKeepKplus1 = lSortedToKeep[k+1].range_query.getMin() | 
|  | 354             maxQueryOf_lSortedToUnjoinJ2 = lSortedToUnjoin[j2].range_query.getMax() | 
|  | 355             if j2 < length_lSortedToUnjoin and minQueryOf_lSortedToKeepKplus1 > maxQueryOf_lSortedToUnjoinJ2: | 
|  | 356                 while j2 < len(lSortedToUnjoin) and minQueryOf_lSortedToKeepKplus1 > maxQueryOf_lSortedToUnjoinJ2: | 
|  | 357                     j2 += 1 | 
|  | 358                     maxQueryOf_lSortedToUnjoinJ2 = lSortedToUnjoin[j2].range_query.getMax() | 
|  | 359                 lLists.append( lSortedToUnjoin[j1:j2] ) | 
|  | 360                 del lSortedToUnjoin[j1:j2] | 
|  | 361             k += 1 | 
|  | 362 | 
|  | 363         if lLists != [] or k == 0: | 
|  | 364             lLists.append( lSortedToUnjoin ) | 
|  | 365         else: | 
|  | 366             lLists = lSortedToUnjoin | 
|  | 367 | 
|  | 368         return lLists | 
|  | 369 | 
|  | 370 | 
|  | 371     ## Return the identity of the Path list, the identity of each instance being weighted by the length of each query range | 
|  | 372     #  All Paths should have the same query and subject. | 
|  | 373     #  The Paths are merged using query coordinates only. | 
|  | 374     # | 
|  | 375     # @param lPaths list of Path instances | 
|  | 376     # | 
|  | 377     @staticmethod | 
|  | 378     def getIdentityFromPathList( lPaths, checkSubjects=True ): | 
|  | 379         if len( PathUtils.getListOfDistinctQueryNames( lPaths ) ) > 1: | 
|  | 380             msg = "ERROR: try to compute identity from Paths with different queries" | 
|  | 381             sys.stderr.write( "%s\n" % msg ) | 
|  | 382             sys.stderr.flush() | 
|  | 383             raise Exception | 
|  | 384         if checkSubjects and len( PathUtils.getListOfDistinctSubjectNames( lPaths ) ) > 1: | 
|  | 385             msg = "ERROR: try to compute identity from Paths with different subjects" | 
|  | 386             sys.stderr.write( "%s\n" % msg ) | 
|  | 387             sys.stderr.flush() | 
|  | 388             raise Exception | 
|  | 389         identity = 0 | 
|  | 390         lMergedPaths = PathUtils.mergePathsInListUsingQueryCoordsOnly( lPaths ) | 
|  | 391         lQuerySets = PathUtils.getSetListFromQueries( lMergedPaths ) | 
|  | 392         lMergedQuerySets = SetUtils.mergeSetsInList( lQuerySets ) | 
|  | 393         totalLengthOnQry = SetUtils.getCumulLength( lMergedQuerySets ) | 
|  | 394         for iPath in lMergedPaths: | 
|  | 395             identity += iPath.identity * iPath.getLengthOnQuery() | 
|  | 396         weightedIdentity = identity / float(totalLengthOnQry) | 
|  | 397         if weightedIdentity < 0: | 
|  | 398             msg = "ERROR: weighted identity '%.2f' outside range" % weightedIdentity | 
|  | 399             sys.stderr.write("%s\n" % msg) | 
|  | 400             sys.stderr.flush() | 
|  | 401             raise Exception | 
|  | 402         elif weightedIdentity > 100: | 
|  | 403             msg = "ERROR: weighted identity '%.2f' outside range" % weightedIdentity | 
|  | 404             sys.stderr.write("%s\n" % msg) | 
|  | 405             sys.stderr.flush() | 
|  | 406             raise RepetDataException(msg) | 
|  | 407         return weightedIdentity | 
|  | 408 | 
|  | 409 | 
|  | 410     ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, and finally their initial order. | 
|  | 411     # | 
|  | 412     # @param lPaths list of Path instances | 
|  | 413     # | 
|  | 414     @staticmethod | 
|  | 415     def getPathListSortedByIncreasingMinQueryThenMaxQuery(lPaths): | 
|  | 416         return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), iPath.getQueryMax() ) ) | 
|  | 417 | 
|  | 418 | 
|  | 419     ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, then their identifier, and finally their initial order. | 
|  | 420     # | 
|  | 421     # @param lPaths list of Path instances | 
|  | 422     # | 
|  | 423     @staticmethod | 
|  | 424     def getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier(lPaths): | 
|  | 425         return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), iPath.getQueryMax(), iPath.getIdentifier() ) ) | 
|  | 426 | 
|  | 427 | 
|  | 428     ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, then the min of the subject, then the max of the subject and finally their initial order. | 
|  | 429     # | 
|  | 430     # @param lPaths list of Path instances | 
|  | 431     # | 
|  | 432     @staticmethod | 
|  | 433     def getPathListSortedByIncreasingMinQueryThenMaxQueryThenMinSubjectThenMaxSubject(lPaths): | 
|  | 434         return sorted(lPaths, key=lambda iPath: (iPath.getQueryMin(), iPath.getQueryMax(), iPath.getSubjectMin(), iPath.getSubjectMax())) | 
|  | 435 | 
|  | 436 | 
|  | 437     ## Return a list of Path instances sorted in increasing order according to the min, then the inverse of the query length, and finally their initial order | 
|  | 438     # | 
|  | 439     # @param lPaths: list of Path instances | 
|  | 440     # | 
|  | 441     @staticmethod | 
|  | 442     def getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ): | 
|  | 443         return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), 1 / float(iPath.getLengthOnQuery()) ) ) | 
|  | 444 | 
|  | 445 | 
|  | 446     ## Return a list of the distinct identifiers | 
|  | 447     # | 
|  | 448     # @param lPaths list of Path instances | 
|  | 449     # | 
|  | 450     @staticmethod | 
|  | 451     def getListOfDistinctIdentifiers( lPaths ): | 
|  | 452         sDistinctIdentifiers = set([iPath.id for iPath in lPaths]) | 
|  | 453         return list(sDistinctIdentifiers) | 
|  | 454 | 
|  | 455 | 
|  | 456     ## Return a list of the distinct query names present in the collection | 
|  | 457     # | 
|  | 458     # @param lPaths list of Path instances | 
|  | 459     # | 
|  | 460     @staticmethod | 
|  | 461     def getListOfDistinctQueryNames( lPaths ): | 
|  | 462         sDistinctQueryNames = set([iPath.range_query.seqname for iPath in lPaths]) | 
|  | 463         return list(sDistinctQueryNames) | 
|  | 464 | 
|  | 465 | 
|  | 466     ## Return a list of the distinct subject names present in the collection | 
|  | 467     # | 
|  | 468     # @param lPaths list of Path instances | 
|  | 469     # | 
|  | 470     @staticmethod | 
|  | 471     def getListOfDistinctSubjectNames( lPaths ): | 
|  | 472         sDistinctSubjectNames = set([iPath.range_subject.seqname for iPath in lPaths]) | 
|  | 473         return list(sDistinctSubjectNames) | 
|  | 474 | 
|  | 475 | 
|  | 476     ## Return a list of paths with matching query names | 
|  | 477     # | 
|  | 478     # @param lPaths list of Path instances | 
|  | 479     # @param  queryName query name to extract | 
|  | 480     @staticmethod | 
|  | 481     def extractPathsFromQueryName(lPaths, queryName): | 
|  | 482         return [iPath for iPath in lPaths if iPath.getQueryName() == queryName] | 
|  | 483 | 
|  | 484 | 
|  | 485     ## Return a list of paths with matching query names | 
|  | 486     # | 
|  | 487     # @param lPaths list of Path instances | 
|  | 488     # @param  lQueryName query name list to extract | 
|  | 489     @staticmethod | 
|  | 490     def extractPathsFromQueryNameList(lPaths, lQueryNames): | 
|  | 491         d = dict.fromkeys(lQueryNames) | 
|  | 492         return [iPath for iPath in lPaths if iPath.getQueryName() in d] | 
|  | 493 | 
|  | 494 | 
|  | 495     ## Return a list of paths with matching subject names | 
|  | 496     # | 
|  | 497     # @param lPaths list of Path instances | 
|  | 498     # @param  subjectName subject name to extract | 
|  | 499     @staticmethod | 
|  | 500     def extractPathsFromSubjectName(lPaths, subjectName): | 
|  | 501         return [iPath for iPath in lPaths if iPath.getSubjectName() == subjectName] | 
|  | 502 | 
|  | 503 | 
|  | 504     ## Return a list of paths with coordinates overlap a given range | 
|  | 505     # | 
|  | 506     # @param lPaths list of Path instances | 
|  | 507     # @param  queryName query name to extract | 
|  | 508     # @param start starting position | 
|  | 509     # @param end ending position | 
|  | 510     # @return list of Path instance | 
|  | 511     @staticmethod | 
|  | 512     def extractPathsFromQueryCoord(lPaths, queryName, start, end): | 
|  | 513         lExtractedPaths = [] | 
|  | 514         iAlign = Align(range_q = Range(queryName, start, end)) | 
|  | 515 | 
|  | 516         for path in PathUtils.extractPathsFromQueryName(lPaths, queryName): | 
|  | 517             if path.isQueryOverlapping(iAlign): | 
|  | 518                 lExtractedPaths.append(path) | 
|  | 519 | 
|  | 520         return lExtractedPaths | 
|  | 521 | 
|  | 522 | 
|  | 523     ## Return a list of lists containing query coordinates of the connections sorted in increasing order. | 
|  | 524     # | 
|  | 525     # @param lConnectedPaths: list of Path instances having the same identifier | 
|  | 526     # @param minLength: threshold below which connections are not reported (default= 0 bp) | 
|  | 527     # @note: return only connections longer than threshold | 
|  | 528     # @note: if coordinate on query ends at 100, return 101 | 
|  | 529     # @warning: Path instances MUST be sorted in increasing order according to query coordinates | 
|  | 530     # @warning: Path instances MUST be on direct query strand (and maybe on reverse subject strand) | 
|  | 531     # | 
|  | 532     @staticmethod | 
|  | 533     def getListOfJoinCoordinatesOnQuery(lConnectedPaths, minLength=0): | 
|  | 534         lJoinCoordinates = [] | 
|  | 535         for i in xrange(1,len(lConnectedPaths)): | 
|  | 536             startJoin = lConnectedPaths[i-1].range_query.end | 
|  | 537             endJoin = lConnectedPaths[i].range_query.start | 
|  | 538             if endJoin - startJoin + 1 > minLength: | 
|  | 539                 lJoinCoordinates.append( [ startJoin + 1, endJoin - 1 ] ) | 
|  | 540         return lJoinCoordinates | 
|  | 541 | 
|  | 542 | 
|  | 543     ## Return the length on the query of all Path instance in the given list | 
|  | 544     # | 
|  | 545     # @param lPaths list of Path instances | 
|  | 546     # @note overlapping ranges are not summed but truncated. | 
|  | 547     # | 
|  | 548     @staticmethod | 
|  | 549     def getLengthOnQueryFromPathList( lPaths ): | 
|  | 550         lSets = PathUtils.getSetListFromQueries( lPaths ) | 
|  | 551         lMergedSets = SetUtils.mergeSetsInList( lSets ) | 
|  | 552         length = SetUtils.getCumulLength( lMergedSets ) | 
|  | 553         return length | 
|  | 554 | 
|  | 555 | 
|  | 556     ## Convert a Path file into an Align file | 
|  | 557     # | 
|  | 558     # @param pathFile: name of the input Path file | 
|  | 559     # @param alignFile: name of the output Align file | 
|  | 560     # | 
|  | 561     @staticmethod | 
|  | 562     def convertPathFileIntoAlignFile(pathFile, alignFile): | 
|  | 563         pathFileHandler = open(pathFile, "r") | 
|  | 564         alignFileHandler = open(alignFile, "w") | 
|  | 565         iPath = Path() | 
|  | 566         for line in pathFileHandler: | 
|  | 567             iPath.setFromString(line) | 
|  | 568             iAlign = iPath.getAlignInstance() | 
|  | 569             iAlign.write(alignFileHandler) | 
|  | 570         pathFileHandler.close() | 
|  | 571         alignFileHandler.close() | 
|  | 572 | 
|  | 573 | 
|  | 574     #TODO: duplicated method => to rename with the name of the next method (which is called) ? | 
|  | 575     ## Convert a Path File into a Map file with query coordinates only | 
|  | 576     # | 
|  | 577     # @param pathFile: name of the input Path file | 
|  | 578     # @param mapFile: name of the output Map file | 
|  | 579     # | 
|  | 580     @staticmethod | 
|  | 581     def convertPathFileIntoMapFileWithQueryCoordsOnly( pathFile, mapFile ): | 
|  | 582         pathFileHandler = open(pathFile, "r") | 
|  | 583         mapFileHandler = open(mapFile, "w") | 
|  | 584         p = Path() | 
|  | 585         for line in pathFileHandler: | 
|  | 586             p.reset() | 
|  | 587             p.setFromTuple(line.split("\t")) | 
|  | 588             p.writeSubjectAsMapOfQuery(mapFileHandler) | 
|  | 589         pathFileHandler.close() | 
|  | 590         mapFileHandler.close() | 
|  | 591 | 
|  | 592 | 
|  | 593     ## for each line of a given Path file, write the coordinates of the subject on the query as one line in a Map file | 
|  | 594     # | 
|  | 595     # @param pathFile: name of the input Path file | 
|  | 596     # @param mapFile: name of the output Map file | 
|  | 597     # | 
|  | 598     @staticmethod | 
|  | 599     def convertPathFileIntoMapFileWithSubjectsOnQueries( pathFile, mapFile ): | 
|  | 600         PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly( pathFile, mapFile ) | 
|  | 601 | 
|  | 602 | 
|  | 603     ## Merge matches on queries | 
|  | 604     # | 
|  | 605     # @param inFile: name of the input Path file | 
|  | 606     # @param outFile: name of the output Path file | 
|  | 607     # | 
|  | 608     @staticmethod | 
|  | 609     def mergeMatchesOnQueries(inFile, outFile): | 
|  | 610         mapFile = "%s.map" % inFile | 
|  | 611         PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly(inFile, mapFile) | 
|  | 612         cmd = "mapOp" | 
|  | 613         cmd += " -q %s" % mapFile | 
|  | 614         cmd += " -m" | 
|  | 615         cmd += " 2>&1 > /dev/null" | 
|  | 616         exitStatus = os.system(cmd) | 
|  | 617         if exitStatus != 0: | 
|  | 618             print "ERROR: mapOp returned %i" % exitStatus | 
|  | 619             sys.exit(1) | 
|  | 620         os.remove(mapFile) | 
|  | 621         mergeFile = "%s.merge" % mapFile | 
|  | 622         mergeFileHandler = open(mergeFile, "r") | 
|  | 623         outFileHandler = open(outFile, "w") | 
|  | 624         m = Map() | 
|  | 625         for line in mergeFileHandler: | 
|  | 626             m.reset() | 
|  | 627             m.setFromString(line, "\t") | 
|  | 628             m.writeAsQueryOfPath(outFileHandler) | 
|  | 629         mergeFileHandler.close() | 
|  | 630         os.remove(mergeFile) | 
|  | 631         outFileHandler.close() | 
|  | 632 | 
|  | 633 | 
|  | 634     ## Filter chains of Path(s) which length is below a given threshold | 
|  | 635     # | 
|  | 636     # @param lPaths: list of Path instances | 
|  | 637     # @param minLengthChain: minimum length of a chain to be kept | 
|  | 638     # @note: a chain may contain a single Path instance | 
|  | 639     # @return: a list of Path instances | 
|  | 640     # | 
|  | 641     @staticmethod | 
|  | 642     def filterPathListOnChainLength( lPaths, minLengthChain ): | 
|  | 643         lFilteredPaths = [] | 
|  | 644         dPathnum2Paths = PathUtils.getDictOfListsWithIdAsKey( lPaths ) | 
|  | 645         for pathnum in dPathnum2Paths.keys(): | 
|  | 646             length = PathUtils.getLengthOnQueryFromPathList( dPathnum2Paths[ pathnum ] ) | 
|  | 647             if length >= minLengthChain: | 
|  | 648                 lFilteredPaths += dPathnum2Paths[ pathnum ] | 
|  | 649         return lFilteredPaths | 
|  | 650 | 
|  | 651 | 
|  | 652     ## Return a Path list from a Path file | 
|  | 653     # | 
|  | 654     # @param pathFile string name of a Path file | 
|  | 655     # @return a list of Path instances | 
|  | 656     # | 
|  | 657     @staticmethod | 
|  | 658     def getPathListFromFile( pathFile ): | 
|  | 659         lPaths = [] | 
|  | 660         with open(pathFile, "r") as pathFileHandler: | 
|  | 661             for line in pathFileHandler: | 
|  | 662                 iPath = Path() | 
|  | 663                 iPath.setFromString(line) | 
|  | 664                 lPaths.append(iPath) | 
|  | 665         return lPaths | 
|  | 666 | 
|  | 667 | 
|  | 668     ## Convert a chain into a 'pathrange' | 
|  | 669     # | 
|  | 670     # @param lPaths a list of Path instances with the same identifier | 
|  | 671     # @note: the min and max of each Path is used | 
|  | 672     # | 
|  | 673     @staticmethod | 
|  | 674     def convertPathListToPathrange( lPaths ): | 
|  | 675         if len(lPaths) == 0: | 
|  | 676             return | 
|  | 677         if len(lPaths) == 1: | 
|  | 678             return lPaths[0] | 
|  | 679         iPathrange = copy.deepcopy( lPaths[0] ) | 
|  | 680         iPathrange.identity = lPaths[0].identity * lPaths[0].getLengthOnQuery() | 
|  | 681         cumulQueryLength = iPathrange.getLengthOnQuery() | 
|  | 682         for iPath in lPaths[1:]: | 
|  | 683             if iPath.id != iPathrange.id: | 
|  | 684                 msg = "ERROR: two Path instances in the chain have different identifiers" | 
|  | 685                 sys.stderr.write( "%s\n" % ( msg ) ) | 
|  | 686                 sys.exit(1) | 
|  | 687             if iPathrange.range_subject.isOnDirectStrand() != iPath.range_subject.isOnDirectStrand(): | 
|  | 688                 msg = "ERROR: two Path instances in the chain are on different strands" | 
|  | 689                 sys.stderr.write( "%s\n" % ( msg ) ) | 
|  | 690                 sys.exit(1) | 
|  | 691             iPathrange.range_query.start = min( iPathrange.range_query.start, iPath.range_query.start ) | 
|  | 692             iPathrange.range_query.end = max( iPathrange.range_query.end, iPath.range_query.end ) | 
|  | 693             if iPathrange.range_subject.isOnDirectStrand(): | 
|  | 694                 iPathrange.range_subject.start = min( iPathrange.range_subject.start, iPath.range_subject.start ) | 
|  | 695                 iPathrange.range_subject.end = max( iPathrange.range_subject.end, iPath.range_subject.end ) | 
|  | 696             else: | 
|  | 697                 iPathrange.range_subject.start = max( iPathrange.range_subject.start, iPath.range_subject.start ) | 
|  | 698                 iPathrange.range_subject.end = min( iPathrange.range_subject.end, iPath.range_subject.end ) | 
|  | 699             iPathrange.e_value = min( iPathrange.e_value, iPath.e_value ) | 
|  | 700             iPathrange.score += iPath.score | 
|  | 701             iPathrange.identity += iPath.identity * iPath.getLengthOnQuery() | 
|  | 702             cumulQueryLength += iPath.getLengthOnQuery() | 
|  | 703         iPathrange.identity = iPathrange.identity / float(cumulQueryLength) | 
|  | 704         return iPathrange | 
|  | 705 | 
|  | 706 | 
|  | 707     ## Convert a Path file into an Align file via 'pathrange' | 
|  | 708     # | 
|  | 709     # @param pathFile: name of the input Path file | 
|  | 710     # @param alignFile: name of the output Align file | 
|  | 711     # @param verbose integer verbosity level | 
|  | 712     # @note: the min and max of each Path is used | 
|  | 713     # | 
|  | 714     @staticmethod | 
|  | 715     def convertPathFileIntoAlignFileViaPathrange( pathFile, alignFile, verbose=0 ): | 
|  | 716         lPaths = PathUtils.getPathListFromFile( pathFile ) | 
|  | 717         dId2PathList = PathUtils.getDictOfListsWithIdAsKey( lPaths ) | 
|  | 718         lIds = dId2PathList.keys() | 
|  | 719         lIds.sort() | 
|  | 720         if verbose > 0: | 
|  | 721             msg = "number of chains: %i" % ( len(lIds) ) | 
|  | 722             sys.stdout.write( "%s\n" % ( msg ) ) | 
|  | 723             sys.stdout.flush() | 
|  | 724         alignFileHandler = open( alignFile, "w" ) | 
|  | 725         for identifier in lIds: | 
|  | 726             iPath = PathUtils.convertPathListToPathrange( dId2PathList[ identifier ] ) | 
|  | 727             iAlign = iPath.getAlignInstance() | 
|  | 728             iAlign.write( alignFileHandler ) | 
|  | 729         alignFileHandler.close() | 
|  | 730 | 
|  | 731 | 
|  | 732     ## Split a list of Path instances according to the name of the query | 
|  | 733     # | 
|  | 734     # @param lInPaths list of align instances | 
|  | 735     # @return lOutPathLists list of align instances lists | 
|  | 736     # | 
|  | 737     @staticmethod | 
|  | 738     def splitPathListByQueryName( lInPaths ): | 
|  | 739         lInSortedPaths = sorted( lInPaths, key=lambda o: o.range_query.seqname ) | 
|  | 740         lOutPathLists = [] | 
|  | 741         if len(lInSortedPaths) != 0 : | 
|  | 742             lPathsForCurrentQuery = [] | 
|  | 743             previousQuery = lInSortedPaths[0].range_query.seqname | 
|  | 744             for iPath in lInSortedPaths : | 
|  | 745                 currentQuery = iPath.range_query.seqname | 
|  | 746                 if previousQuery != currentQuery : | 
|  | 747                     lOutPathLists.append( lPathsForCurrentQuery ) | 
|  | 748                     previousQuery = currentQuery | 
|  | 749                     lPathsForCurrentQuery = [] | 
|  | 750                 lPathsForCurrentQuery.append( iPath ) | 
|  | 751 | 
|  | 752             lOutPathLists.append(lPathsForCurrentQuery) | 
|  | 753 | 
|  | 754         return lOutPathLists | 
|  | 755 | 
|  | 756 | 
|  | 757     ## Create an Path file from each list of Path instances in the input list | 
|  | 758     # | 
|  | 759     # @param lPathList list of lists with Path instances | 
|  | 760     # @param pattern string | 
|  | 761     # @param dirName string | 
|  | 762     # | 
|  | 763     @staticmethod | 
|  | 764     def createPathFiles( lPathList, pattern, dirName="" ): | 
|  | 765         nbFiles = len(lPathList) | 
|  | 766         countFile = 1 | 
|  | 767         if dirName != "" : | 
|  | 768             if dirName[-1] != "/": | 
|  | 769                 dirName = dirName + '/' | 
|  | 770             os.mkdir( dirName ) | 
|  | 771 | 
|  | 772         for lPath in lPathList: | 
|  | 773             fileName = dirName + pattern  + "_%s.path" % ( str(countFile).zfill( len(str(nbFiles)) ) ) | 
|  | 774             PathUtils.writeListInFile( lPath, fileName ) | 
|  | 775             countFile += 1 | 
|  | 776 | 
|  | 777 | 
|  | 778     ## Merge all overlapping Path instances in a list without considering the identifiers | 
|  | 779     #  Start by sorting the Path instances by their increasing min coordinate | 
|  | 780     # | 
|  | 781     # @return: a new list with the merged Path instances | 
|  | 782     # | 
|  | 783     @staticmethod | 
|  | 784     def mergePathsInList( lPaths ): | 
|  | 785         lMergedPaths = [] | 
|  | 786         if len(lPaths)==0: | 
|  | 787             return lMergedPaths | 
|  | 788 | 
|  | 789         lSortedPaths = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ) | 
|  | 790 | 
|  | 791         prev_count = 0 | 
|  | 792         for iPath in lSortedPaths[0:]: | 
|  | 793             if prev_count != len(lSortedPaths): | 
|  | 794                 for i in lSortedPaths[ prev_count + 1: ]: | 
|  | 795                     if iPath.isOverlapping( i ): | 
|  | 796                         iPath.merge( i ) | 
|  | 797                 isAlreadyInList = False | 
|  | 798                 for newPath in lMergedPaths: | 
|  | 799                     if newPath.isOverlapping( iPath ): | 
|  | 800                         isAlreadyInList = True | 
|  | 801                         newPath.merge( iPath ) | 
|  | 802                         lMergedPaths [ lMergedPaths.index( newPath ) ] = newPath | 
|  | 803                 if not isAlreadyInList: | 
|  | 804                     lMergedPaths.append( iPath ) | 
|  | 805                 prev_count += 1 | 
|  | 806         return lMergedPaths | 
|  | 807 | 
|  | 808 | 
|  | 809     ## Merge all overlapping Path instances in a list without considering if subjects are overlapping. | 
|  | 810     #  Start by sorting the Path instances by their increasing min coordinate. | 
|  | 811     # | 
|  | 812     # @return: a new list with the merged Path instances | 
|  | 813     # | 
|  | 814     @staticmethod | 
|  | 815     def mergePathsInListUsingQueryCoordsOnly( lPaths ): | 
|  | 816         lMergedPaths = [] | 
|  | 817         if len(lPaths)==0: | 
|  | 818             return lMergedPaths | 
|  | 819 | 
|  | 820         lSortedPaths = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ) | 
|  | 821 | 
|  | 822         prev_count = 0 | 
|  | 823         for iPath in lSortedPaths[0:]: | 
|  | 824             if prev_count != len(lSortedPaths): | 
|  | 825                 for i in lSortedPaths[ prev_count + 1: ]: | 
|  | 826                     if iPath.isQueryOverlapping( i ): | 
|  | 827                         iPath.merge( i ) | 
|  | 828                 isAlreadyInList = False | 
|  | 829                 for newPath in lMergedPaths: | 
|  | 830                     if newPath.isQueryOverlapping( iPath ): | 
|  | 831                         isAlreadyInList = True | 
|  | 832                         newPath.merge( iPath ) | 
|  | 833                         lMergedPaths [ lMergedPaths.index( newPath ) ] = newPath | 
|  | 834                 if not isAlreadyInList: | 
|  | 835                     lMergedPaths.append( iPath ) | 
|  | 836                 prev_count += 1 | 
|  | 837         return lMergedPaths | 
|  | 838 | 
|  | 839 | 
|  | 840     ## Convert a Path file into a GFF file | 
|  | 841     # | 
|  | 842     # @param pathFile: name of the input Path file | 
|  | 843     # @param gffFile: name of the output GFF file | 
|  | 844     # @param source: source to write in the GFF file (column 2) | 
|  | 845     # | 
|  | 846     # @note the 'path' query is supposed to correspond to the 'gff' first column | 
|  | 847     # | 
|  | 848     @staticmethod | 
|  | 849     def convertPathFileIntoGffFile( pathFile, gffFile, source="REPET", verbose=0 ): | 
|  | 850         dId2PathList = PathUtils.getDictOfListsWithIdAsKeyFromFile( pathFile ) | 
|  | 851         if verbose > 0: | 
|  | 852             msg = "number of chains: %i" % ( len(dId2PathList.keys()) ) | 
|  | 853             sys.stdout.write( "%s\n" % msg ) | 
|  | 854             sys.stdout.flush() | 
|  | 855         gffFileHandler = open( gffFile, "w" ) | 
|  | 856         for id in dId2PathList.keys(): | 
|  | 857             if len( dId2PathList[ id ] ) == 1: | 
|  | 858                 iPath = dId2PathList[ id ][0] | 
|  | 859                 string = iPath.toStringAsGff( ID="%i" % iPath.getIdentifier(), | 
|  | 860                                               source=source ) | 
|  | 861                 gffFileHandler.write( "%s\n" % string ) | 
|  | 862             else: | 
|  | 863                 iPathrange = PathUtils.convertPathListToPathrange( dId2PathList[ id ] ) | 
|  | 864                 string = iPathrange.toStringAsGff( ID="ms%i" % iPathrange.getIdentifier(), | 
|  | 865                                                    source=source ) | 
|  | 866                 gffFileHandler.write( "%s\n" % string ) | 
|  | 867                 count = 0 | 
|  | 868                 for iPath in dId2PathList[ id ]: | 
|  | 869                     count += 1 | 
|  | 870                     string = iPath.toStringAsGff( type="match_part", | 
|  | 871                                                   ID="mp%i-%i" % ( iPath.getIdentifier(), count ), | 
|  | 872                                                   Parent="ms%i" % iPathrange.getIdentifier(), | 
|  | 873                                                   source=source ) | 
|  | 874                     gffFileHandler.write( "%s\n" % string ) | 
|  | 875         gffFileHandler.close() | 
|  | 876 | 
|  | 877 | 
|  | 878     ## Convert a Path file into a Set file | 
|  | 879     # replace old parser.pathrange2set | 
|  | 880     # @param pathFile: name of the input Path file | 
|  | 881     # @param setFile: name of the output Set file | 
|  | 882     # | 
|  | 883     @staticmethod | 
|  | 884     def convertPathFileIntoSetFile( pathFile, setFile ): | 
|  | 885         pathFileHandler = open(pathFile, "r") | 
|  | 886         setFileHandler = open(setFile, "w") | 
|  | 887         iPath = Path() | 
|  | 888         for line in pathFileHandler: | 
|  | 889             iPath.setFromString(line) | 
|  | 890             iSet = iPath.getSubjectAsSetOfQuery() | 
|  | 891             iSet.write(setFileHandler) | 
|  | 892         pathFileHandler.close() | 
|  | 893         setFileHandler.close() | 
|  | 894 | 
|  | 895     ## Write Path File without duplicated Path (same query, same subject and same coordinate) | 
|  | 896     # | 
|  | 897     # @param inputFile: name of the input Path file | 
|  | 898     # @param outputFile: name of the output Path file | 
|  | 899     # | 
|  | 900     @staticmethod | 
|  | 901     def removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(inputFile, outputFile): | 
|  | 902         f = open(inputFile, "r") | 
|  | 903         line = f.readline() | 
|  | 904         previousQuery = "" | 
|  | 905         previousSubject = "" | 
|  | 906         lPaths = [] | 
|  | 907         while line: | 
|  | 908             iPath = Path() | 
|  | 909             iPath.setFromString(line) | 
|  | 910             query = iPath.getQueryName() | 
|  | 911             subject = iPath.getSubjectName() | 
|  | 912             if (query != previousQuery or subject != previousSubject) and lPaths != []: | 
|  | 913                 lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths) | 
|  | 914                 PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a") | 
|  | 915                 lPaths = [] | 
|  | 916             lPaths.append(iPath) | 
|  | 917             previousQuery = query | 
|  | 918             previousSubject = subject | 
|  | 919             line = f.readline() | 
|  | 920         lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths) | 
|  | 921         PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a") | 
|  | 922         f.close() |