13
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 from commons.core.coord.Set import Set
|
|
33
|
|
34 ## Static methods for the manipulation of Set instances
|
|
35 #
|
|
36 class SetUtils( object ):
|
|
37
|
|
38 ## Change the identifier of each Set instance in the given list
|
|
39 #
|
|
40 # @param lSets list of Set instances
|
|
41 # @param newId new identifier
|
|
42 #
|
|
43 def changeIdInList(lSets, newId):
|
|
44 for iSet in lSets:
|
|
45 iSet.id = newId
|
|
46
|
|
47 changeIdInList = staticmethod( changeIdInList )
|
|
48
|
|
49 ## Return the length of the overlap between two lists of Set instances
|
|
50 #
|
|
51 # @param lSets1 list of Set instances
|
|
52 # @param lSets2 list of Set instances
|
|
53 # @return length of overlap
|
|
54 # @warning sequence names are supposed to be identical
|
|
55 #
|
|
56 def getOverlapLengthBetweenLists(lSets1, lSets2):
|
|
57 lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)
|
|
58 lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)
|
|
59 osize = 0
|
|
60 i = 0
|
|
61 j = 0
|
|
62 while i!= len(lSet1Sorted):
|
|
63 while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\
|
|
64 and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):
|
|
65 j+=1
|
|
66 jj=j
|
|
67 while jj!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[jj]):
|
|
68 osize+=lSet1Sorted[i].getOverlapLength(lSet2Sorted[jj])
|
|
69 jj+=1
|
|
70 i+=1
|
|
71 return osize
|
|
72
|
|
73 getOverlapLengthBetweenLists = staticmethod( getOverlapLengthBetweenLists )
|
|
74
|
|
75 ## Return True if the two lists of Set instances overlap, False otherwise
|
|
76 #
|
|
77 # @param lSets1 list of Set instances
|
|
78 # @param lSets2 list of Set instances
|
|
79 #
|
|
80 def areSetsOverlappingBetweenLists( lSets1, lSets2 ):
|
|
81 lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)
|
|
82 lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)
|
|
83 i=0
|
|
84 j=0
|
|
85 while i!= len(lSet1Sorted):
|
|
86 while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\
|
|
87 and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):
|
|
88 j+=1
|
|
89 if j!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[j]):
|
|
90 return True
|
|
91 i+=1
|
|
92 return False
|
|
93
|
|
94 areSetsOverlappingBetweenLists = staticmethod( areSetsOverlappingBetweenLists )
|
|
95
|
|
96 ## Merge all overlapping Set instances between two lists of Set and give the next identifier
|
|
97 #
|
|
98 # @param lSets1 list of Set instances
|
|
99 # @param lSets2 list of Set instances
|
|
100 # @param max_id start id value for inserting new Set
|
|
101 # @return a new list of the merged Set instances and the next identifier
|
|
102 #
|
|
103 def getListOfMergedSetsAndNextId(lSets1, lSets2, max_id=0):
|
|
104 lSets_merged = []
|
|
105 list2merge = SetUtils.getListOfIdListOfOverlappingSets ( lSets1,lSets2 )
|
|
106 idlist1 = SetUtils.getDictOfListsWithIdAsKey(lSets1)
|
|
107 idlist2 = SetUtils.getDictOfListsWithIdAsKey(lSets2)
|
|
108 if max_id == 0:
|
|
109 max_id = max(idlist1.keys()) + 1
|
|
110 for i in list2merge:
|
|
111 if i == []:
|
|
112 continue
|
|
113 l = []
|
|
114 min_id = max(i)
|
|
115 for j in i:
|
|
116 if j>0:
|
|
117 if min_id>j:
|
|
118 min_id=j
|
|
119 l.extend(idlist1[j])
|
|
120 del idlist1[j]
|
|
121 else:
|
|
122 l.extend(idlist2[j*-1])
|
|
123 del idlist2[j*-1]
|
|
124 l = SetUtils.mergeSetsInList(l)
|
|
125 SetUtils.changeIdInList(l, min_id)
|
|
126 lSets_merged.extend(l)
|
|
127 for id, alist in idlist1.items():
|
|
128 lSets_merged.extend(alist)
|
|
129 for id,alist in idlist2.items():
|
|
130 SetUtils.changeIdInList(alist,max_id)
|
|
131 lSets_merged.extend(alist)
|
|
132 max_id+=1
|
|
133 return lSets_merged, max_id
|
|
134
|
|
135 getListOfMergedSetsAndNextId = staticmethod ( getListOfMergedSetsAndNextId )
|
|
136
|
|
137 # ## Concatenate two Set instance lists and give the next identifier
|
|
138 # #
|
|
139 # # @param lSets1 list of Set instances
|
|
140 # # @param lSets2 list of Set instances
|
|
141 # # @param maxId start id value for inserting new Set
|
|
142 # # @return a new list of Set instances and the next identifier
|
|
143 # #
|
|
144 # @staticmethod
|
|
145 # def getSetsListOfTwoConcatenatedSetsListAndNextId(lSets1, lSets2, maxId = 0):
|
|
146 # lOutSets = lSets1
|
|
147 # dId2SetsList2 = SetUtils.getDictOfListsWithIdAsKey(lSets2)
|
|
148 # if maxId == 0:
|
|
149 # dId2SetsList1 = SetUtils.getDictOfListsWithIdAsKey(lSets1)
|
|
150 # maxId = max(dId2SetsList1.keys())
|
|
151 # for lSets in dId2SetsList2.values():
|
|
152 # SetUtils.changeIdInList(lSets, maxId)
|
|
153 # lOutSets.extend(lSets)
|
|
154 # maxId += 1
|
|
155 # return lOutSets, maxId
|
|
156
|
|
157 ## Return the sum of the length of each Set instance in the given list
|
|
158 #
|
|
159 # @param lSets: list of Set instances
|
|
160 #
|
|
161 def getCumulLength(lSets):
|
|
162 length = 0
|
|
163 for i in lSets:
|
|
164 length += i.getLength()
|
|
165 return length
|
|
166
|
|
167 getCumulLength = staticmethod( getCumulLength )
|
|
168
|
|
169 ## Return a tuple with min and max coordinates of Set instances in the given list
|
|
170 #
|
|
171 # @param lSets list of Set instances
|
|
172 #
|
|
173 def getListBoundaries(lSets):
|
|
174 qmin = -1
|
|
175 qmax = -1
|
|
176 for iSet in lSets:
|
|
177 if qmin == -1:
|
|
178 qmin = iSet.start
|
|
179 qmin = min(qmin, iSet.getMin())
|
|
180 qmax = max(qmax, iSet.getMax())
|
|
181 return (qmin, qmax)
|
|
182
|
|
183 getListBoundaries = staticmethod( getListBoundaries )
|
|
184
|
|
185 ## Show Set instances contained in the given list
|
|
186 #
|
|
187 # @param lSets list of Set instances
|
|
188 #
|
|
189 def showList(lSets):
|
|
190 for iSet in lSets:
|
|
191 iSet.show()
|
|
192
|
|
193 showList = staticmethod( showList )
|
|
194
|
|
195 ## Write Set instances contained in the given list
|
|
196 #
|
|
197 # @param lSets list of Set instances
|
|
198 # @param fileName a file name
|
|
199 # @param mode the open mode of the file '"w"' or '"a"'
|
|
200 #
|
|
201 def writeListInFile(lSets, fileName, mode="w"):
|
|
202 fileHandler = open(fileName, mode)
|
|
203 for iSet in lSets:
|
|
204 iSet.write(fileHandler)
|
|
205 fileHandler.close()
|
|
206
|
|
207 writeListInFile = staticmethod( writeListInFile )
|
|
208
|
|
209 ## Split a Set list in several Set lists according to the identifier
|
|
210 #
|
|
211 # @param lSets list of Set instances
|
|
212 # @return a dictionary which keys are identifiers and values Set lists
|
|
213 #
|
|
214 def getDictOfListsWithIdAsKey(lSets):
|
|
215 dId2SetList = {}
|
|
216 for iSet in lSets:
|
|
217 if dId2SetList.has_key(iSet.id):
|
|
218 dId2SetList[iSet.id].append(iSet)
|
|
219 else:
|
|
220 dId2SetList[iSet.id] = [iSet]
|
|
221 return dId2SetList
|
|
222
|
|
223 getDictOfListsWithIdAsKey = staticmethod( getDictOfListsWithIdAsKey )
|
|
224
|
|
225
|
|
226 ## Split a Set list in several Set lists according to the identifier
|
|
227 #
|
|
228 # @param lSets list of Set instances
|
|
229 # @return a dictionary which keys are identifiers and values Set lists
|
|
230 #
|
|
231 def getDictOfListsWithIdAsKeyFromFile( setFile ):
|
|
232 dId2SetList = {}
|
|
233 setFileHandler = open( setFile, "r" )
|
|
234 while True:
|
|
235 line = setFileHandler.readline()
|
|
236 if line == "":
|
|
237 break
|
|
238 iSet = Set()
|
|
239 iSet.setFromTuple( line[:-1].split("\t") )
|
|
240 if not dId2SetList.has_key( iSet.id ):
|
|
241 dId2SetList[ iSet.id ] = []
|
|
242 dId2SetList[ iSet.id ].append( iSet )
|
|
243 setFileHandler.close()
|
|
244 return dId2SetList
|
|
245
|
|
246 getDictOfListsWithIdAsKeyFromFile = staticmethod( getDictOfListsWithIdAsKeyFromFile )
|
|
247
|
|
248
|
|
249 ## Return a Map list from the given Set List
|
|
250 #
|
|
251 # @param lSets list of Set instances
|
|
252 #
|
|
253 def getMapListFromSetList(lSets):
|
|
254 lMaps = []
|
|
255 for iSet in lSets:
|
|
256 lMaps.append(iSet.set2map())
|
|
257 return lMaps
|
|
258
|
|
259 getMapListFromSetList = staticmethod( getMapListFromSetList )
|
|
260
|
|
261 ## Construct a Set list from a Map list
|
|
262 #
|
|
263 # @param lMaps list of Map instances
|
|
264 #
|
|
265 def getSetListFromMapList(lMaps):
|
|
266 lSets = []
|
|
267 c = 0
|
|
268 for iMap in lMaps:
|
|
269 c += 1
|
|
270 lSets.append( Set(c, iMap.name, iMap.seqname, iMap.start, iMap.end) )
|
|
271 return lSets
|
|
272
|
|
273 getSetListFromMapList = staticmethod( getSetListFromMapList )
|
|
274
|
|
275 ## Merge all overlapping Set instances in a list without considering the identifiers.
|
|
276 # Start by sorting Set instances by their increasing Min coordinate.
|
|
277 #
|
|
278 # @return: a new list of the merged Set instances
|
|
279 #
|
|
280 def mergeSetsInList(lSets):
|
|
281 l=[]
|
|
282 if len(lSets)==0:
|
|
283 return l
|
|
284
|
|
285 lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenInvLength( lSets )
|
|
286
|
|
287 prev_count = 0
|
|
288 for iSet in lSortedSets[0:]:
|
|
289 if prev_count != len(lSortedSets):
|
|
290 for i in lSortedSets[ prev_count + 1: ]:
|
|
291 if iSet.isOverlapping( i ):
|
|
292 iSet.merge( i )
|
|
293 IsAlreadyInList = False
|
|
294 for newSet in l:
|
|
295 if newSet.isOverlapping( iSet ):
|
|
296 IsAlreadyInList = True
|
|
297 newSet.merge( iSet )
|
|
298 l [ l.index( newSet ) ] = newSet
|
|
299 if not IsAlreadyInList:
|
|
300 l.append( iSet )
|
|
301 prev_count += 1
|
|
302 return l
|
|
303
|
|
304 mergeSetsInList = staticmethod( mergeSetsInList )
|
|
305
|
|
306 ## Unjoin a Set list according to another
|
|
307 #
|
|
308 # @param lToKeep: a list of Set instances to keep
|
|
309 # @param lToUnjoin: a list of Set instances to unjoin
|
|
310 # @return: lToUnjoin split in several list
|
|
311 #
|
|
312 def getSetListUnjoined(lToKeep, lToUnjoin):
|
|
313 lSortedToKeep = SetUtils.getSetListSortedByIncreasingMinThenMax( lToKeep )
|
|
314 lSortedToUnjoin = SetUtils.getSetListSortedByIncreasingMinThenMax( lToUnjoin )
|
|
315 if lSortedToUnjoin == []:
|
|
316 return []
|
|
317 if lSortedToKeep == []:
|
|
318 return [ lSortedToUnjoin ]
|
|
319
|
|
320 i=0
|
|
321 resultListSet=[]
|
|
322 while i<len(lSortedToKeep):
|
|
323 j1=0
|
|
324 while j1<len(lSortedToUnjoin) and lSortedToKeep[i].getMin() > lSortedToUnjoin[j1].getMax():
|
|
325 j1+=1
|
|
326 if j1==len(lSortedToUnjoin):
|
|
327 break
|
|
328 if j1!=0:
|
|
329 resultListSet.append(lSortedToUnjoin[:j1])
|
|
330 del lSortedToUnjoin[:j1]
|
|
331 j1=0
|
|
332 if i+1==len(lSortedToKeep):
|
|
333 break
|
|
334 j2=j1
|
|
335 if j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax():
|
|
336 while j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax():
|
|
337 j2+=1
|
|
338 resultListSet.append(lSortedToUnjoin[j1:j2])
|
|
339 del lSortedToUnjoin[j1:j2]
|
|
340 i+=1
|
|
341
|
|
342 if resultListSet!=[] or i == 0:
|
|
343 resultListSet.append(lSortedToUnjoin)
|
|
344 return resultListSet
|
|
345
|
|
346 getSetListUnjoined = staticmethod(getSetListUnjoined)
|
|
347
|
|
348 ## Return new list of Set instances with no duplicate
|
|
349 #
|
|
350 # @param lSets list of Set instances
|
|
351 #
|
|
352 def getSetListWithoutDuplicates( lSets ):
|
|
353 if len(lSets) < 2:
|
|
354 return lSets
|
|
355 lSortedSet = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets )
|
|
356 lUniqSet = [ lSortedSet[0] ]
|
|
357 for iSet in lSortedSet[1:]:
|
|
358 if iSet != lUniqSet[-1]:
|
|
359 lUniqSet.append( iSet )
|
|
360 return lUniqSet
|
|
361
|
|
362 getSetListWithoutDuplicates = staticmethod( getSetListWithoutDuplicates )
|
|
363
|
|
364 ## Return a list of Set instances sorted in increasing order according to the Min, then the Max, and finally their initial order
|
|
365 #
|
|
366 # @param lSets: list of Set instances
|
|
367 #
|
|
368 def getSetListSortedByIncreasingMinThenMax( lSets ):
|
|
369 return sorted( lSets, key=lambda iSet: ( iSet.getMin(), iSet.getMax() ) )
|
|
370
|
|
371 getSetListSortedByIncreasingMinThenMax = staticmethod( getSetListSortedByIncreasingMinThenMax )
|
|
372
|
|
373 ## Return a list of Set instances sorted in increasing order according to the min, then the inverse of the length, and finally their initial order
|
|
374 #
|
|
375 # @param lSets: list of Set instances
|
|
376 #
|
|
377 def getSetListSortedByIncreasingMinThenInvLength( lSets ):
|
|
378 return sorted( lSets, key=lambda iSet: ( iSet.getMin(), 1 / float(iSet.getLength()) ) )
|
|
379
|
|
380 getSetListSortedByIncreasingMinThenInvLength = staticmethod( getSetListSortedByIncreasingMinThenInvLength )
|
|
381
|
|
382 ## Return a list of Set instances sorted in increasing order according to the SeqName, then the Name, then the Min, then the Max and finally their initial order
|
|
383 #
|
|
384 # @param lSets: list of Set instances
|
|
385 #
|
|
386 def getSetListSortedBySeqThenRegionThenMinThenMax(lSets):
|
|
387 return sorted(lSets, key=lambda iSet: (iSet.getSeqname(), iSet.getName(), iSet.getMin(), iSet.getMax()))
|
|
388
|
|
389 getSetListSortedBySeqThenRegionThenMinThenMax = staticmethod(getSetListSortedBySeqThenRegionThenMinThenMax)
|
|
390
|
|
391 ## Return a list of identifier lists of overlapping Sets from the subject list, according to the reference list
|
|
392 #
|
|
393 # @param lRef list of Set instances
|
|
394 # @param lSubject list of Set instances
|
|
395 #
|
|
396 def getListOfIdListOfOverlappingSets(lRef,lSubject):
|
|
397 lSortedRef = SetUtils.getSetListSortedByIncreasingMinThenMax( lRef )
|
|
398 lSortedSubject = SetUtils.getSetListSortedByIncreasingMinThenMax( lSubject )
|
|
399
|
|
400 lOverlappingSet = []
|
|
401 lOverlappingSetCounter = 0
|
|
402
|
|
403 id2LOverlappingSet_pos = {}
|
|
404
|
|
405 i = 0
|
|
406 j = 0
|
|
407 while i!= len(lSortedRef):
|
|
408 while j!= len(lSortedSubject) and lSortedRef[i].getMin()>lSortedSubject[j].getMax()\
|
|
409 and not(lSortedRef[i].isOverlapping(lSortedSubject[j])\
|
|
410 and lSortedRef[i].isOnDirectStrand()==lSortedSubject[j].isOnDirectStrand()):
|
|
411 j+=1
|
|
412 jj=j
|
|
413 while jj!= len(lSortedSubject) and lSortedRef[i].isOverlapping(lSortedSubject[jj])\
|
|
414 and lSortedRef[i].isOnDirectStrand()==lSortedSubject[jj].isOnDirectStrand():
|
|
415 id1=lSortedRef[i].id
|
|
416 id2=lSortedSubject[jj].id*-1
|
|
417 if id2LOverlappingSet_pos.has_key(id1) \
|
|
418 and not id2LOverlappingSet_pos.has_key(id2):
|
|
419 lOverlappingSet[id2LOverlappingSet_pos[id1]].append(id2)
|
|
420 id2LOverlappingSet_pos[id2]=id2LOverlappingSet_pos[id1]
|
|
421 if id2LOverlappingSet_pos.has_key(id2) \
|
|
422 and not id2LOverlappingSet_pos.has_key(id1):
|
|
423 lOverlappingSet[id2LOverlappingSet_pos[id2]].append(id1)
|
|
424 id2LOverlappingSet_pos[id1]=id2LOverlappingSet_pos[id2]
|
|
425 if not id2LOverlappingSet_pos.has_key(id2) \
|
|
426 and not id2LOverlappingSet_pos.has_key(id1):
|
|
427 lOverlappingSet.append([id1,id2])
|
|
428 id2LOverlappingSet_pos[id1]=lOverlappingSetCounter
|
|
429 id2LOverlappingSet_pos[id2]=lOverlappingSetCounter
|
|
430 lOverlappingSetCounter+=1
|
|
431 jj+=1
|
|
432 i+=1
|
|
433
|
|
434 return lOverlappingSet
|
|
435
|
|
436 getListOfIdListOfOverlappingSets = staticmethod (getListOfIdListOfOverlappingSets)
|
|
437
|
|
438 ## Return a list of sets without overlapping between two lists of sets
|
|
439 #
|
|
440 # @param lSet1 and lSet2
|
|
441 #
|
|
442 def getListOfSetWithoutOverlappingBetweenTwoListOfSet(lSet1, lSet2):
|
|
443 for i in lSet1:
|
|
444 for idx,j in enumerate(lSet2):
|
|
445 n=j.diff(i)
|
|
446 if not n.isEmpty() and n.getLength()>=20:
|
|
447 lSet2.append(n)
|
|
448 lSet2WithoutOverlaps=[]
|
|
449 for i in lSet2:
|
|
450 if not i.isEmpty() and i.getLength()>=20:
|
|
451 lSet2WithoutOverlaps.append(i)
|
|
452 return lSet2WithoutOverlaps
|
|
453
|
|
454 getListOfSetWithoutOverlappingBetweenTwoListOfSet = staticmethod (getListOfSetWithoutOverlappingBetweenTwoListOfSet)
|
|
455
|
|
456 ## Return a Set list from a Set file
|
|
457 #
|
|
458 # @param setFile string name of a Set file
|
|
459 # @return a list of Set instances
|
|
460 #
|
|
461 def getSetListFromFile( setFile ):
|
|
462 lSets = []
|
|
463 setFileHandler = open( setFile, "r" )
|
|
464 while True:
|
|
465 line = setFileHandler.readline()
|
|
466 if line == "":
|
|
467 break
|
|
468 iSet = Set()
|
|
469 iSet.setFromString( line )
|
|
470 lSets.append( iSet )
|
|
471 setFileHandler.close()
|
|
472 return lSets
|
|
473
|
|
474 getSetListFromFile = staticmethod( getSetListFromFile )
|
|
475
|
|
476
|
|
477 def convertSetFileIntoMapFile( setFile, mapFile ):
|
|
478 setFileHandler = open( setFile, "r" )
|
|
479 mapFileHandler = open( mapFile, "w" )
|
|
480 iSet = Set()
|
|
481 while True:
|
|
482 line = setFileHandler.readline()
|
|
483 if line == "":
|
|
484 break
|
|
485 iSet.setFromString( line )
|
|
486 iMap = iSet.getMapInstance()
|
|
487 iMap.write( mapFileHandler )
|
|
488 setFileHandler.close()
|
|
489 mapFileHandler.close()
|
|
490
|
|
491 convertSetFileIntoMapFile = staticmethod( convertSetFileIntoMapFile )
|
|
492
|
|
493
|
|
494 def getDictOfListsWithSeqnameAsKey( lSets ):
|
|
495 dSeqnamesToSetList = {}
|
|
496 for iSet in lSets:
|
|
497 if not dSeqnamesToSetList.has_key( iSet.seqname ):
|
|
498 dSeqnamesToSetList[ iSet.seqname ] = []
|
|
499 dSeqnamesToSetList[ iSet.seqname ].append( iSet )
|
|
500 return dSeqnamesToSetList
|
|
501
|
|
502 getDictOfListsWithSeqnameAsKey = staticmethod( getDictOfListsWithSeqnameAsKey )
|
|
503
|
|
504
|
|
505 def filterOnLength( lSets, minLength=0, maxLength=10000000000 ):
|
|
506 if minLength == 0 and maxLength == 0:
|
|
507 return lSets
|
|
508 lFiltered = []
|
|
509 for iSet in lSets:
|
|
510 if minLength <= iSet.getLength() <= maxLength:
|
|
511 lFiltered.append( iSet )
|
|
512 return lFiltered
|
|
513
|
|
514 filterOnLength = staticmethod( filterOnLength )
|
|
515
|
|
516
|
|
517 def getListOfNames( setFile ):
|
|
518 lNames = []
|
|
519 setFileHandler = open( setFile, "r" )
|
|
520 iSet = Set()
|
|
521 while True:
|
|
522 line = setFileHandler.readline()
|
|
523 if line == "":
|
|
524 break
|
|
525 iSet.setFromTuple( line[:-1].split("\t") )
|
|
526 if iSet.name not in lNames:
|
|
527 lNames.append( iSet.name )
|
|
528 setFileHandler.close()
|
|
529 return lNames
|
|
530
|
|
531 getListOfNames = staticmethod( getListOfNames )
|
|
532
|
|
533
|
|
534 def getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile ):
|
|
535 dNames2DictsId = {}
|
|
536 setFileHandler = open( setFile, "r" )
|
|
537 while True:
|
|
538 line = setFileHandler.readline()
|
|
539 if line == "":
|
|
540 break
|
|
541 iSet = Set()
|
|
542 iSet.setFromTuple( line[:-1].split("\t") )
|
|
543 if not dNames2DictsId.has_key( iSet.name ):
|
|
544 dNames2DictsId[ iSet.name ] = { iSet.id: [ iSet ] }
|
|
545 else:
|
|
546 if not dNames2DictsId[ iSet.name ].has_key( iSet.id ):
|
|
547 dNames2DictsId[ iSet.name ][ iSet.id ] = [ iSet ]
|
|
548 else:
|
|
549 dNames2DictsId[ iSet.name ][ iSet.id ].append( iSet )
|
|
550 setFileHandler.close()
|
|
551 return dNames2DictsId
|
|
552
|
|
553 getDictOfDictsWithNamesThenIdAsKeyFromFile = staticmethod( getDictOfDictsWithNamesThenIdAsKeyFromFile )
|