6
+ − 1 # Copyright INRA (Institut National de la Recherche Agronomique)
+ − 2 # http://www.inra.fr
+ − 3 # http://urgi.versailles.inra.fr
+ − 4 #
+ − 5 # This software is governed by the CeCILL license under French law and
+ − 6 # abiding by the rules of distribution of free software. You can use,
+ − 7 # modify and/ or redistribute the software under the terms of the CeCILL
+ − 8 # license as circulated by CEA, CNRS and INRIA at the following URL
+ − 9 # "http://www.cecill.info".
+ − 10 #
+ − 11 # As a counterpart to the access to the source code and rights to copy,
+ − 12 # modify and redistribute granted by the license, users are provided only
+ − 13 # with a limited warranty and the software's author, the holder of the
+ − 14 # economic rights, and the successive licensors have only limited
+ − 15 # liability.
+ − 16 #
+ − 17 # In this respect, the user's attention is drawn to the risks associated
+ − 18 # with loading, using, modifying and/or developing or reproducing the
+ − 19 # software by the user in light of its specific status of free software,
+ − 20 # that may mean that it is complicated to manipulate, and that also
+ − 21 # therefore means that it is reserved for developers and experienced
+ − 22 # professionals having in-depth computer knowledge. Users are therefore
+ − 23 # encouraged to load and test the software's suitability as regards their
+ − 24 # requirements in conditions enabling the security of their systems and/or
+ − 25 # data to be ensured and, more generally, to use and operate it in the
+ − 26 # same conditions as regards security.
+ − 27 #
+ − 28 # The fact that you are presently reading this means that you have had
+ − 29 # knowledge of the CeCILL license and that you accept its terms.
+ − 30
+ − 31
+ − 32 from commons.core.coord.Set import Set
+ − 33
+ − 34 ## Static methods for the manipulation of Set instances
+ − 35 #
+ − 36 class SetUtils( object ):
+ − 37
+ − 38 ## Change the identifier of each Set instance in the given list
+ − 39 #
+ − 40 # @param lSets list of Set instances
+ − 41 # @param newId new identifier
+ − 42 #
+ − 43 def changeIdInList(lSets, newId):
+ − 44 for iSet in lSets:
+ − 45 iSet.id = newId
+ − 46
+ − 47 changeIdInList = staticmethod( changeIdInList )
+ − 48
+ − 49 ## Return the length of the overlap between two lists of Set instances
+ − 50 #
+ − 51 # @param lSets1 list of Set instances
+ − 52 # @param lSets2 list of Set instances
+ − 53 # @return length of overlap
+ − 54 # @warning sequence names are supposed to be identical
+ − 55 #
+ − 56 def getOverlapLengthBetweenLists(lSets1, lSets2):
+ − 57 lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)
+ − 58 lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)
+ − 59 osize = 0
+ − 60 i = 0
+ − 61 j = 0
+ − 62 while i!= len(lSet1Sorted):
+ − 63 while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\
+ − 64 and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):
+ − 65 j+=1
+ − 66 jj=j
+ − 67 while jj!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[jj]):
+ − 68 osize+=lSet1Sorted[i].getOverlapLength(lSet2Sorted[jj])
+ − 69 jj+=1
+ − 70 i+=1
+ − 71 return osize
+ − 72
+ − 73 getOverlapLengthBetweenLists = staticmethod( getOverlapLengthBetweenLists )
+ − 74
+ − 75 ## Return True if the two lists of Set instances overlap, False otherwise
+ − 76 #
+ − 77 # @param lSets1 list of Set instances
+ − 78 # @param lSets2 list of Set instances
+ − 79 #
+ − 80 def areSetsOverlappingBetweenLists( lSets1, lSets2 ):
+ − 81 lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)
+ − 82 lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)
+ − 83 i=0
+ − 84 j=0
+ − 85 while i!= len(lSet1Sorted):
+ − 86 while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\
+ − 87 and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):
+ − 88 j+=1
+ − 89 if j!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[j]):
+ − 90 return True
+ − 91 i+=1
+ − 92 return False
+ − 93
+ − 94 areSetsOverlappingBetweenLists = staticmethod( areSetsOverlappingBetweenLists )
+ − 95
+ − 96 ## Merge all overlapping Set instances between two lists of Set and give the next identifier
+ − 97 #
+ − 98 # @param lSets1 list of Set instances
+ − 99 # @param lSets2 list of Set instances
+ − 100 # @param max_id start id value for inserting new Set
+ − 101 # @return a new list of the merged Set instances and the next identifier
+ − 102 #
+ − 103 def getListOfMergedSetsAndNextId(lSets1, lSets2, max_id=0):
+ − 104 lSets_merged = []
+ − 105 list2merge = SetUtils.getListOfIdListOfOverlappingSets ( lSets1,lSets2 )
+ − 106 idlist1 = SetUtils.getDictOfListsWithIdAsKey(lSets1)
+ − 107 idlist2 = SetUtils.getDictOfListsWithIdAsKey(lSets2)
+ − 108 if max_id == 0:
+ − 109 max_id = max(idlist1.keys()) + 1
+ − 110 for i in list2merge:
+ − 111 if i == []:
+ − 112 continue
+ − 113 l = []
+ − 114 min_id = max(i)
+ − 115 for j in i:
+ − 116 if j>0:
+ − 117 if min_id>j:
+ − 118 min_id=j
+ − 119 l.extend(idlist1[j])
+ − 120 del idlist1[j]
+ − 121 else:
+ − 122 l.extend(idlist2[j*-1])
+ − 123 del idlist2[j*-1]
+ − 124 l = SetUtils.mergeSetsInList(l)
+ − 125 SetUtils.changeIdInList(l, min_id)
+ − 126 lSets_merged.extend(l)
+ − 127 for id, alist in idlist1.items():
+ − 128 lSets_merged.extend(alist)
+ − 129 for id,alist in idlist2.items():
+ − 130 SetUtils.changeIdInList(alist,max_id)
+ − 131 lSets_merged.extend(alist)
+ − 132 max_id+=1
+ − 133 return lSets_merged, max_id
+ − 134
+ − 135 getListOfMergedSetsAndNextId = staticmethod ( getListOfMergedSetsAndNextId )
+ − 136
+ − 137 # ## Concatenate two Set instance lists and give the next identifier
+ − 138 # #
+ − 139 # # @param lSets1 list of Set instances
+ − 140 # # @param lSets2 list of Set instances
+ − 141 # # @param maxId start id value for inserting new Set
+ − 142 # # @return a new list of Set instances and the next identifier
+ − 143 # #
+ − 144 # @staticmethod
+ − 145 # def getSetsListOfTwoConcatenatedSetsListAndNextId(lSets1, lSets2, maxId = 0):
+ − 146 # lOutSets = lSets1
+ − 147 # dId2SetsList2 = SetUtils.getDictOfListsWithIdAsKey(lSets2)
+ − 148 # if maxId == 0:
+ − 149 # dId2SetsList1 = SetUtils.getDictOfListsWithIdAsKey(lSets1)
+ − 150 # maxId = max(dId2SetsList1.keys())
+ − 151 # for lSets in dId2SetsList2.values():
+ − 152 # SetUtils.changeIdInList(lSets, maxId)
+ − 153 # lOutSets.extend(lSets)
+ − 154 # maxId += 1
+ − 155 # return lOutSets, maxId
+ − 156
+ − 157 ## Return the sum of the length of each Set instance in the given list
+ − 158 #
+ − 159 # @param lSets: list of Set instances
+ − 160 #
+ − 161 def getCumulLength(lSets):
+ − 162 length = 0
+ − 163 for i in lSets:
+ − 164 length += i.getLength()
+ − 165 return length
+ − 166
+ − 167 getCumulLength = staticmethod( getCumulLength )
+ − 168
+ − 169 ## Return a tuple with min and max coordinates of Set instances in the given list
+ − 170 #
+ − 171 # @param lSets list of Set instances
+ − 172 #
+ − 173 def getListBoundaries(lSets):
+ − 174 qmin = -1
+ − 175 qmax = -1
+ − 176 for iSet in lSets:
+ − 177 if qmin == -1:
+ − 178 qmin = iSet.start
+ − 179 qmin = min(qmin, iSet.getMin())
+ − 180 qmax = max(qmax, iSet.getMax())
+ − 181 return (qmin, qmax)
+ − 182
+ − 183 getListBoundaries = staticmethod( getListBoundaries )
+ − 184
+ − 185 ## Show Set instances contained in the given list
+ − 186 #
+ − 187 # @param lSets list of Set instances
+ − 188 #
+ − 189 def showList(lSets):
+ − 190 for iSet in lSets:
+ − 191 iSet.show()
+ − 192
+ − 193 showList = staticmethod( showList )
+ − 194
+ − 195 ## Write Set instances contained in the given list
+ − 196 #
+ − 197 # @param lSets list of Set instances
+ − 198 # @param fileName a file name
+ − 199 # @param mode the open mode of the file '"w"' or '"a"'
+ − 200 #
+ − 201 def writeListInFile(lSets, fileName, mode="w"):
+ − 202 fileHandler = open(fileName, mode)
+ − 203 for iSet in lSets:
+ − 204 iSet.write(fileHandler)
+ − 205 fileHandler.close()
+ − 206
+ − 207 writeListInFile = staticmethod( writeListInFile )
+ − 208
+ − 209 ## Split a Set list in several Set lists according to the identifier
+ − 210 #
+ − 211 # @param lSets list of Set instances
+ − 212 # @return a dictionary which keys are identifiers and values Set lists
+ − 213 #
+ − 214 def getDictOfListsWithIdAsKey(lSets):
+ − 215 dId2SetList = {}
+ − 216 for iSet in lSets:
+ − 217 if dId2SetList.has_key(iSet.id):
+ − 218 dId2SetList[iSet.id].append(iSet)
+ − 219 else:
+ − 220 dId2SetList[iSet.id] = [iSet]
+ − 221 return dId2SetList
+ − 222
+ − 223 getDictOfListsWithIdAsKey = staticmethod( getDictOfListsWithIdAsKey )
+ − 224
+ − 225
+ − 226 ## Split a Set list in several Set lists according to the identifier
+ − 227 #
+ − 228 # @param lSets list of Set instances
+ − 229 # @return a dictionary which keys are identifiers and values Set lists
+ − 230 #
+ − 231 def getDictOfListsWithIdAsKeyFromFile( setFile ):
+ − 232 dId2SetList = {}
+ − 233 setFileHandler = open( setFile, "r" )
+ − 234 while True:
+ − 235 line = setFileHandler.readline()
+ − 236 if line == "":
+ − 237 break
+ − 238 iSet = Set()
+ − 239 iSet.setFromTuple( line[:-1].split("\t") )
+ − 240 if not dId2SetList.has_key( iSet.id ):
+ − 241 dId2SetList[ iSet.id ] = []
+ − 242 dId2SetList[ iSet.id ].append( iSet )
+ − 243 setFileHandler.close()
+ − 244 return dId2SetList
+ − 245
+ − 246 getDictOfListsWithIdAsKeyFromFile = staticmethod( getDictOfListsWithIdAsKeyFromFile )
+ − 247
+ − 248
+ − 249 ## Return a Map list from the given Set List
+ − 250 #
+ − 251 # @param lSets list of Set instances
+ − 252 #
+ − 253 def getMapListFromSetList(lSets):
+ − 254 lMaps = []
+ − 255 for iSet in lSets:
+ − 256 lMaps.append(iSet.set2map())
+ − 257 return lMaps
+ − 258
+ − 259 getMapListFromSetList = staticmethod( getMapListFromSetList )
+ − 260
+ − 261 ## Construct a Set list from a Map list
+ − 262 #
+ − 263 # @param lMaps list of Map instances
+ − 264 #
+ − 265 def getSetListFromMapList(lMaps):
+ − 266 lSets = []
+ − 267 c = 0
+ − 268 for iMap in lMaps:
+ − 269 c += 1
+ − 270 lSets.append( Set(c, iMap.name, iMap.seqname, iMap.start, iMap.end) )
+ − 271 return lSets
+ − 272
+ − 273 getSetListFromMapList = staticmethod( getSetListFromMapList )
+ − 274
+ − 275 ## Merge all overlapping Set instances in a list without considering the identifiers.
+ − 276 # Start by sorting Set instances by their increasing Min coordinate.
+ − 277 #
+ − 278 # @return: a new list of the merged Set instances
+ − 279 #
+ − 280 def mergeSetsInList(lSets):
+ − 281 l=[]
+ − 282 if len(lSets)==0:
+ − 283 return l
+ − 284
+ − 285 lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenInvLength( lSets )
+ − 286
+ − 287 prev_count = 0
+ − 288 for iSet in lSortedSets[0:]:
+ − 289 if prev_count != len(lSortedSets):
+ − 290 for i in lSortedSets[ prev_count + 1: ]:
+ − 291 if iSet.isOverlapping( i ):
+ − 292 iSet.merge( i )
+ − 293 IsAlreadyInList = False
+ − 294 for newSet in l:
+ − 295 if newSet.isOverlapping( iSet ):
+ − 296 IsAlreadyInList = True
+ − 297 newSet.merge( iSet )
+ − 298 l [ l.index( newSet ) ] = newSet
+ − 299 if not IsAlreadyInList:
+ − 300 l.append( iSet )
+ − 301 prev_count += 1
+ − 302 return l
+ − 303
+ − 304 mergeSetsInList = staticmethod( mergeSetsInList )
+ − 305
+ − 306 ## Unjoin a Set list according to another
+ − 307 #
+ − 308 # @param lToKeep: a list of Set instances to keep
+ − 309 # @param lToUnjoin: a list of Set instances to unjoin
+ − 310 # @return: lToUnjoin split in several list
+ − 311 #
+ − 312 def getSetListUnjoined(lToKeep, lToUnjoin):
+ − 313 lSortedToKeep = SetUtils.getSetListSortedByIncreasingMinThenMax( lToKeep )
+ − 314 lSortedToUnjoin = SetUtils.getSetListSortedByIncreasingMinThenMax( lToUnjoin )
+ − 315 if lSortedToUnjoin == []:
+ − 316 return []
+ − 317 if lSortedToKeep == []:
+ − 318 return [ lSortedToUnjoin ]
+ − 319
+ − 320 i=0
+ − 321 resultListSet=[]
+ − 322 while i<len(lSortedToKeep):
+ − 323 j1=0
+ − 324 while j1<len(lSortedToUnjoin) and lSortedToKeep[i].getMin() > lSortedToUnjoin[j1].getMax():
+ − 325 j1+=1
+ − 326 if j1==len(lSortedToUnjoin):
+ − 327 break
+ − 328 if j1!=0:
+ − 329 resultListSet.append(lSortedToUnjoin[:j1])
+ − 330 del lSortedToUnjoin[:j1]
+ − 331 j1=0
+ − 332 if i+1==len(lSortedToKeep):
+ − 333 break
+ − 334 j2=j1
+ − 335 if j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax():
+ − 336 while j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax():
+ − 337 j2+=1
+ − 338 resultListSet.append(lSortedToUnjoin[j1:j2])
+ − 339 del lSortedToUnjoin[j1:j2]
+ − 340 i+=1
+ − 341
+ − 342 if resultListSet!=[] or i == 0:
+ − 343 resultListSet.append(lSortedToUnjoin)
+ − 344 return resultListSet
+ − 345
+ − 346 getSetListUnjoined = staticmethod(getSetListUnjoined)
+ − 347
+ − 348 ## Return new list of Set instances with no duplicate
+ − 349 #
+ − 350 # @param lSets list of Set instances
+ − 351 #
+ − 352 def getSetListWithoutDuplicates( lSets ):
+ − 353 if len(lSets) < 2:
+ − 354 return lSets
+ − 355 lSortedSet = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets )
+ − 356 lUniqSet = [ lSortedSet[0] ]
+ − 357 for iSet in lSortedSet[1:]:
+ − 358 if iSet != lUniqSet[-1]:
+ − 359 lUniqSet.append( iSet )
+ − 360 return lUniqSet
+ − 361
+ − 362 getSetListWithoutDuplicates = staticmethod( getSetListWithoutDuplicates )
+ − 363
+ − 364 ## Return a list of Set instances sorted in increasing order according to the Min, then the Max, and finally their initial order
+ − 365 #
+ − 366 # @param lSets: list of Set instances
+ − 367 #
+ − 368 def getSetListSortedByIncreasingMinThenMax( lSets ):
+ − 369 return sorted( lSets, key=lambda iSet: ( iSet.getMin(), iSet.getMax() ) )
+ − 370
+ − 371 getSetListSortedByIncreasingMinThenMax = staticmethod( getSetListSortedByIncreasingMinThenMax )
+ − 372
+ − 373 ## Return a list of Set instances sorted in increasing order according to the min, then the inverse of the length, and finally their initial order
+ − 374 #
+ − 375 # @param lSets: list of Set instances
+ − 376 #
+ − 377 def getSetListSortedByIncreasingMinThenInvLength( lSets ):
+ − 378 return sorted( lSets, key=lambda iSet: ( iSet.getMin(), 1 / float(iSet.getLength()) ) )
+ − 379
+ − 380 getSetListSortedByIncreasingMinThenInvLength = staticmethod( getSetListSortedByIncreasingMinThenInvLength )
+ − 381
+ − 382 ## Return a list of Set instances sorted in increasing order according to the SeqName, then the Name, then the Min, then the Max and finally their initial order
+ − 383 #
+ − 384 # @param lSets: list of Set instances
+ − 385 #
+ − 386 def getSetListSortedBySeqThenRegionThenMinThenMax(lSets):
+ − 387 return sorted(lSets, key=lambda iSet: (iSet.getSeqname(), iSet.getName(), iSet.getMin(), iSet.getMax()))
+ − 388
+ − 389 getSetListSortedBySeqThenRegionThenMinThenMax = staticmethod(getSetListSortedBySeqThenRegionThenMinThenMax)
+ − 390
+ − 391 ## Return a list of identifier lists of overlapping Sets from the subject list, according to the reference list
+ − 392 #
+ − 393 # @param lRef list of Set instances
+ − 394 # @param lSubject list of Set instances
+ − 395 #
+ − 396 def getListOfIdListOfOverlappingSets(lRef,lSubject):
+ − 397 lSortedRef = SetUtils.getSetListSortedByIncreasingMinThenMax( lRef )
+ − 398 lSortedSubject = SetUtils.getSetListSortedByIncreasingMinThenMax( lSubject )
+ − 399
+ − 400 lOverlappingSet = []
+ − 401 lOverlappingSetCounter = 0
+ − 402
+ − 403 id2LOverlappingSet_pos = {}
+ − 404
+ − 405 i = 0
+ − 406 j = 0
+ − 407 while i!= len(lSortedRef):
+ − 408 while j!= len(lSortedSubject) and lSortedRef[i].getMin()>lSortedSubject[j].getMax()\
+ − 409 and not(lSortedRef[i].isOverlapping(lSortedSubject[j])\
+ − 410 and lSortedRef[i].isOnDirectStrand()==lSortedSubject[j].isOnDirectStrand()):
+ − 411 j+=1
+ − 412 jj=j
+ − 413 while jj!= len(lSortedSubject) and lSortedRef[i].isOverlapping(lSortedSubject[jj])\
+ − 414 and lSortedRef[i].isOnDirectStrand()==lSortedSubject[jj].isOnDirectStrand():
+ − 415 id1=lSortedRef[i].id
+ − 416 id2=lSortedSubject[jj].id*-1
+ − 417 if id2LOverlappingSet_pos.has_key(id1) \
+ − 418 and not id2LOverlappingSet_pos.has_key(id2):
+ − 419 lOverlappingSet[id2LOverlappingSet_pos[id1]].append(id2)
+ − 420 id2LOverlappingSet_pos[id2]=id2LOverlappingSet_pos[id1]
+ − 421 if id2LOverlappingSet_pos.has_key(id2) \
+ − 422 and not id2LOverlappingSet_pos.has_key(id1):
+ − 423 lOverlappingSet[id2LOverlappingSet_pos[id2]].append(id1)
+ − 424 id2LOverlappingSet_pos[id1]=id2LOverlappingSet_pos[id2]
+ − 425 if not id2LOverlappingSet_pos.has_key(id2) \
+ − 426 and not id2LOverlappingSet_pos.has_key(id1):
+ − 427 lOverlappingSet.append([id1,id2])
+ − 428 id2LOverlappingSet_pos[id1]=lOverlappingSetCounter
+ − 429 id2LOverlappingSet_pos[id2]=lOverlappingSetCounter
+ − 430 lOverlappingSetCounter+=1
+ − 431 jj+=1
+ − 432 i+=1
+ − 433
+ − 434 return lOverlappingSet
+ − 435
+ − 436 getListOfIdListOfOverlappingSets = staticmethod (getListOfIdListOfOverlappingSets)
+ − 437
+ − 438 ## Return a list of sets without overlapping between two lists of sets
+ − 439 #
+ − 440 # @param lSet1 and lSet2
+ − 441 #
+ − 442 def getListOfSetWithoutOverlappingBetweenTwoListOfSet(lSet1, lSet2):
+ − 443 for i in lSet1:
+ − 444 for idx,j in enumerate(lSet2):
+ − 445 n=j.diff(i)
+ − 446 if not n.isEmpty() and n.getLength()>=20:
+ − 447 lSet2.append(n)
+ − 448 lSet2WithoutOverlaps=[]
+ − 449 for i in lSet2:
+ − 450 if not i.isEmpty() and i.getLength()>=20:
+ − 451 lSet2WithoutOverlaps.append(i)
+ − 452 return lSet2WithoutOverlaps
+ − 453
+ − 454 getListOfSetWithoutOverlappingBetweenTwoListOfSet = staticmethod (getListOfSetWithoutOverlappingBetweenTwoListOfSet)
+ − 455
+ − 456 ## Return a Set list from a Set file
+ − 457 #
+ − 458 # @param setFile string name of a Set file
+ − 459 # @return a list of Set instances
+ − 460 #
+ − 461 def getSetListFromFile( setFile ):
+ − 462 lSets = []
+ − 463 setFileHandler = open( setFile, "r" )
+ − 464 while True:
+ − 465 line = setFileHandler.readline()
+ − 466 if line == "":
+ − 467 break
+ − 468 iSet = Set()
+ − 469 iSet.setFromString( line )
+ − 470 lSets.append( iSet )
+ − 471 setFileHandler.close()
+ − 472 return lSets
+ − 473
+ − 474 getSetListFromFile = staticmethod( getSetListFromFile )
+ − 475
+ − 476
+ − 477 def convertSetFileIntoMapFile( setFile, mapFile ):
+ − 478 setFileHandler = open( setFile, "r" )
+ − 479 mapFileHandler = open( mapFile, "w" )
+ − 480 iSet = Set()
+ − 481 while True:
+ − 482 line = setFileHandler.readline()
+ − 483 if line == "":
+ − 484 break
+ − 485 iSet.setFromString( line )
+ − 486 iMap = iSet.getMapInstance()
+ − 487 iMap.write( mapFileHandler )
+ − 488 setFileHandler.close()
+ − 489 mapFileHandler.close()
+ − 490
+ − 491 convertSetFileIntoMapFile = staticmethod( convertSetFileIntoMapFile )
+ − 492
+ − 493
+ − 494 def getDictOfListsWithSeqnameAsKey( lSets ):
+ − 495 dSeqnamesToSetList = {}
+ − 496 for iSet in lSets:
+ − 497 if not dSeqnamesToSetList.has_key( iSet.seqname ):
+ − 498 dSeqnamesToSetList[ iSet.seqname ] = []
+ − 499 dSeqnamesToSetList[ iSet.seqname ].append( iSet )
+ − 500 return dSeqnamesToSetList
+ − 501
+ − 502 getDictOfListsWithSeqnameAsKey = staticmethod( getDictOfListsWithSeqnameAsKey )
+ − 503
+ − 504
+ − 505 def filterOnLength( lSets, minLength=0, maxLength=10000000000 ):
+ − 506 if minLength == 0 and maxLength == 0:
+ − 507 return lSets
+ − 508 lFiltered = []
+ − 509 for iSet in lSets:
+ − 510 if minLength <= iSet.getLength() <= maxLength:
+ − 511 lFiltered.append( iSet )
+ − 512 return lFiltered
+ − 513
+ − 514 filterOnLength = staticmethod( filterOnLength )
+ − 515
+ − 516
+ − 517 def getListOfNames( setFile ):
+ − 518 lNames = []
+ − 519 setFileHandler = open( setFile, "r" )
+ − 520 iSet = Set()
+ − 521 while True:
+ − 522 line = setFileHandler.readline()
+ − 523 if line == "":
+ − 524 break
+ − 525 iSet.setFromTuple( line[:-1].split("\t") )
+ − 526 if iSet.name not in lNames:
+ − 527 lNames.append( iSet.name )
+ − 528 setFileHandler.close()
+ − 529 return lNames
+ − 530
+ − 531 getListOfNames = staticmethod( getListOfNames )
+ − 532
+ − 533
+ − 534 def getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile ):
+ − 535 dNames2DictsId = {}
+ − 536 setFileHandler = open( setFile, "r" )
+ − 537 while True:
+ − 538 line = setFileHandler.readline()
+ − 539 if line == "":
+ − 540 break
+ − 541 iSet = Set()
+ − 542 iSet.setFromTuple( line[:-1].split("\t") )
+ − 543 if not dNames2DictsId.has_key( iSet.name ):
+ − 544 dNames2DictsId[ iSet.name ] = { iSet.id: [ iSet ] }
+ − 545 else:
+ − 546 if not dNames2DictsId[ iSet.name ].has_key( iSet.id ):
+ − 547 dNames2DictsId[ iSet.name ][ iSet.id ] = [ iSet ]
+ − 548 else:
+ − 549 dNames2DictsId[ iSet.name ][ iSet.id ].append( iSet )
+ − 550 setFileHandler.close()
+ − 551 return dNames2DictsId
+ − 552
+ − 553 getDictOfDictsWithNamesThenIdAsKeyFromFile = staticmethod( getDictOfDictsWithNamesThenIdAsKeyFromFile )