annotate TEisotools-1.1.a/commons/core/coord/SetUtils.py @ 15:255c852351c5 draft

Uploaded
author urgi-team
date Thu, 21 Jul 2016 07:36:44 -0400
parents feef9a0db09d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
2 # http://www.inra.fr
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
4 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
9 # "http://www.cecill.info".
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
10 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
15 # liability.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
16 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
26 # same conditions as regards security.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
27 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
30
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
31
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
32 from commons.core.coord.Set import Set
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
33
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
34 ## Static methods for the manipulation of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
35 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
36 class SetUtils( object ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
37
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
38 ## Change the identifier of each Set instance in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
39 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
40 # @param lSets list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
41 # @param newId new identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
42 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
43 def changeIdInList(lSets, newId):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
44 for iSet in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
45 iSet.id = newId
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
46
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
47 changeIdInList = staticmethod( changeIdInList )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
48
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
49 ## Return the length of the overlap between two lists of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
50 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
51 # @param lSets1 list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
52 # @param lSets2 list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
53 # @return length of overlap
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
54 # @warning sequence names are supposed to be identical
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
55 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
56 def getOverlapLengthBetweenLists(lSets1, lSets2):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
57 lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
58 lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
59 osize = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
60 i = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
61 j = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
62 while i!= len(lSet1Sorted):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
63 while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
64 and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
65 j+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
66 jj=j
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
67 while jj!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[jj]):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
68 osize+=lSet1Sorted[i].getOverlapLength(lSet2Sorted[jj])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
69 jj+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
70 i+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
71 return osize
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
72
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
73 getOverlapLengthBetweenLists = staticmethod( getOverlapLengthBetweenLists )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
74
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
75 ## Return True if the two lists of Set instances overlap, False otherwise
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
76 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
77 # @param lSets1 list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
78 # @param lSets2 list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
79 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
80 def areSetsOverlappingBetweenLists( lSets1, lSets2 ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
81 lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
82 lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
83 i=0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
84 j=0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
85 while i!= len(lSet1Sorted):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
86 while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
87 and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
88 j+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
89 if j!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[j]):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
90 return True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
91 i+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
92 return False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
93
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
94 areSetsOverlappingBetweenLists = staticmethod( areSetsOverlappingBetweenLists )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
95
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
96 ## Merge all overlapping Set instances between two lists of Set and give the next identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
97 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
98 # @param lSets1 list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
99 # @param lSets2 list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
100 # @param max_id start id value for inserting new Set
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
101 # @return a new list of the merged Set instances and the next identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
102 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
103 def getListOfMergedSetsAndNextId(lSets1, lSets2, max_id=0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
104 lSets_merged = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
105 list2merge = SetUtils.getListOfIdListOfOverlappingSets ( lSets1,lSets2 )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
106 idlist1 = SetUtils.getDictOfListsWithIdAsKey(lSets1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
107 idlist2 = SetUtils.getDictOfListsWithIdAsKey(lSets2)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
108 if max_id == 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
109 max_id = max(idlist1.keys()) + 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
110 for i in list2merge:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
111 if i == []:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
112 continue
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
113 l = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
114 min_id = max(i)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
115 for j in i:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
116 if j>0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
117 if min_id>j:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
118 min_id=j
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
119 l.extend(idlist1[j])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
120 del idlist1[j]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
121 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
122 l.extend(idlist2[j*-1])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
123 del idlist2[j*-1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
124 l = SetUtils.mergeSetsInList(l)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
125 SetUtils.changeIdInList(l, min_id)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
126 lSets_merged.extend(l)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
127 for id, alist in idlist1.items():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
128 lSets_merged.extend(alist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
129 for id,alist in idlist2.items():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
130 SetUtils.changeIdInList(alist,max_id)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
131 lSets_merged.extend(alist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
132 max_id+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
133 return lSets_merged, max_id
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
134
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
135 getListOfMergedSetsAndNextId = staticmethod ( getListOfMergedSetsAndNextId )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
136
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
137 # ## Concatenate two Set instance lists and give the next identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
138 # #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
139 # # @param lSets1 list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
140 # # @param lSets2 list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
141 # # @param maxId start id value for inserting new Set
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
142 # # @return a new list of Set instances and the next identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
143 # #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
144 # @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
145 # def getSetsListOfTwoConcatenatedSetsListAndNextId(lSets1, lSets2, maxId = 0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
146 # lOutSets = lSets1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
147 # dId2SetsList2 = SetUtils.getDictOfListsWithIdAsKey(lSets2)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
148 # if maxId == 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
149 # dId2SetsList1 = SetUtils.getDictOfListsWithIdAsKey(lSets1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
150 # maxId = max(dId2SetsList1.keys())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
151 # for lSets in dId2SetsList2.values():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
152 # SetUtils.changeIdInList(lSets, maxId)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
153 # lOutSets.extend(lSets)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
154 # maxId += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
155 # return lOutSets, maxId
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
156
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
157 ## Return the sum of the length of each Set instance in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
158 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
159 # @param lSets: list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
160 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
161 def getCumulLength(lSets):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
162 length = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
163 for i in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
164 length += i.getLength()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
165 return length
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
166
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
167 getCumulLength = staticmethod( getCumulLength )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
168
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
169 ## Return a tuple with min and max coordinates of Set instances in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
170 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
171 # @param lSets list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
172 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
173 def getListBoundaries(lSets):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
174 qmin = -1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
175 qmax = -1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
176 for iSet in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
177 if qmin == -1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
178 qmin = iSet.start
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
179 qmin = min(qmin, iSet.getMin())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
180 qmax = max(qmax, iSet.getMax())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
181 return (qmin, qmax)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
182
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
183 getListBoundaries = staticmethod( getListBoundaries )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
184
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
185 ## Show Set instances contained in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
186 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
187 # @param lSets list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
188 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
189 def showList(lSets):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
190 for iSet in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
191 iSet.show()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
192
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
193 showList = staticmethod( showList )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
194
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
195 ## Write Set instances contained in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
196 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
197 # @param lSets list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
198 # @param fileName a file name
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
199 # @param mode the open mode of the file '"w"' or '"a"'
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
200 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
201 def writeListInFile(lSets, fileName, mode="w"):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
202 fileHandler = open(fileName, mode)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
203 for iSet in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
204 iSet.write(fileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
205 fileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
206
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
207 writeListInFile = staticmethod( writeListInFile )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
208
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
209 ## Split a Set list in several Set lists according to the identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
210 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
211 # @param lSets list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
212 # @return a dictionary which keys are identifiers and values Set lists
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
213 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
214 def getDictOfListsWithIdAsKey(lSets):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
215 dId2SetList = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
216 for iSet in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
217 if dId2SetList.has_key(iSet.id):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
218 dId2SetList[iSet.id].append(iSet)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
219 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
220 dId2SetList[iSet.id] = [iSet]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
221 return dId2SetList
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
222
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
223 getDictOfListsWithIdAsKey = staticmethod( getDictOfListsWithIdAsKey )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
224
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
225
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
226 ## Split a Set list in several Set lists according to the identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
227 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
228 # @param lSets list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
229 # @return a dictionary which keys are identifiers and values Set lists
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
230 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
231 def getDictOfListsWithIdAsKeyFromFile( setFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
232 dId2SetList = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
233 setFileHandler = open( setFile, "r" )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
234 while True:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
235 line = setFileHandler.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
236 if line == "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
237 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
238 iSet = Set()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
239 iSet.setFromTuple( line[:-1].split("\t") )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
240 if not dId2SetList.has_key( iSet.id ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
241 dId2SetList[ iSet.id ] = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
242 dId2SetList[ iSet.id ].append( iSet )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
243 setFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
244 return dId2SetList
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
245
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
246 getDictOfListsWithIdAsKeyFromFile = staticmethod( getDictOfListsWithIdAsKeyFromFile )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
247
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
248
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
249 ## Return a Map list from the given Set List
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
250 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
251 # @param lSets list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
252 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
253 def getMapListFromSetList(lSets):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
254 lMaps = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
255 for iSet in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
256 lMaps.append(iSet.set2map())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
257 return lMaps
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
258
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
259 getMapListFromSetList = staticmethod( getMapListFromSetList )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
260
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
261 ## Construct a Set list from a Map list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
262 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
263 # @param lMaps list of Map instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
264 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
265 def getSetListFromMapList(lMaps):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
266 lSets = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
267 c = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
268 for iMap in lMaps:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
269 c += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
270 lSets.append( Set(c, iMap.name, iMap.seqname, iMap.start, iMap.end) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
271 return lSets
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
272
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
273 getSetListFromMapList = staticmethod( getSetListFromMapList )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
274
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
275 ## Merge all overlapping Set instances in a list without considering the identifiers.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
276 # Start by sorting Set instances by their increasing Min coordinate.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
277 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
278 # @return: a new list of the merged Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
279 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
280 def mergeSetsInList(lSets):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
281 l=[]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
282 if len(lSets)==0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
283 return l
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
284
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
285 lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenInvLength( lSets )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
286
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
287 prev_count = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
288 for iSet in lSortedSets[0:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
289 if prev_count != len(lSortedSets):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
290 for i in lSortedSets[ prev_count + 1: ]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
291 if iSet.isOverlapping( i ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
292 iSet.merge( i )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
293 IsAlreadyInList = False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
294 for newSet in l:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
295 if newSet.isOverlapping( iSet ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
296 IsAlreadyInList = True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
297 newSet.merge( iSet )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
298 l [ l.index( newSet ) ] = newSet
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
299 if not IsAlreadyInList:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
300 l.append( iSet )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
301 prev_count += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
302 return l
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
303
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
304 mergeSetsInList = staticmethod( mergeSetsInList )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
305
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
306 ## Unjoin a Set list according to another
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
307 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
308 # @param lToKeep: a list of Set instances to keep
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
309 # @param lToUnjoin: a list of Set instances to unjoin
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
310 # @return: lToUnjoin split in several list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
311 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
312 def getSetListUnjoined(lToKeep, lToUnjoin):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
313 lSortedToKeep = SetUtils.getSetListSortedByIncreasingMinThenMax( lToKeep )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
314 lSortedToUnjoin = SetUtils.getSetListSortedByIncreasingMinThenMax( lToUnjoin )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
315 if lSortedToUnjoin == []:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
316 return []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
317 if lSortedToKeep == []:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
318 return [ lSortedToUnjoin ]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
319
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
320 i=0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
321 resultListSet=[]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
322 while i<len(lSortedToKeep):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
323 j1=0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
324 while j1<len(lSortedToUnjoin) and lSortedToKeep[i].getMin() > lSortedToUnjoin[j1].getMax():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
325 j1+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
326 if j1==len(lSortedToUnjoin):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
327 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
328 if j1!=0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
329 resultListSet.append(lSortedToUnjoin[:j1])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
330 del lSortedToUnjoin[:j1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
331 j1=0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
332 if i+1==len(lSortedToKeep):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
333 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
334 j2=j1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
335 if j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
336 while j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
337 j2+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
338 resultListSet.append(lSortedToUnjoin[j1:j2])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
339 del lSortedToUnjoin[j1:j2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
340 i+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
341
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
342 if resultListSet!=[] or i == 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
343 resultListSet.append(lSortedToUnjoin)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
344 return resultListSet
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
345
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
346 getSetListUnjoined = staticmethod(getSetListUnjoined)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
347
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
348 ## Return new list of Set instances with no duplicate
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
349 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
350 # @param lSets list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
351 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
352 def getSetListWithoutDuplicates( lSets ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
353 if len(lSets) < 2:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
354 return lSets
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
355 lSortedSet = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
356 lUniqSet = [ lSortedSet[0] ]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
357 for iSet in lSortedSet[1:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
358 if iSet != lUniqSet[-1]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
359 lUniqSet.append( iSet )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
360 return lUniqSet
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
361
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
362 getSetListWithoutDuplicates = staticmethod( getSetListWithoutDuplicates )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
363
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
364 ## Return a list of Set instances sorted in increasing order according to the Min, then the Max, and finally their initial order
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
365 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
366 # @param lSets: list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
367 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
368 def getSetListSortedByIncreasingMinThenMax( lSets ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
369 return sorted( lSets, key=lambda iSet: ( iSet.getMin(), iSet.getMax() ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
370
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
371 getSetListSortedByIncreasingMinThenMax = staticmethod( getSetListSortedByIncreasingMinThenMax )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
372
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
373 ## Return a list of Set instances sorted in increasing order according to the min, then the inverse of the length, and finally their initial order
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
374 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
375 # @param lSets: list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
376 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
377 def getSetListSortedByIncreasingMinThenInvLength( lSets ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
378 return sorted( lSets, key=lambda iSet: ( iSet.getMin(), 1 / float(iSet.getLength()) ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
379
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
380 getSetListSortedByIncreasingMinThenInvLength = staticmethod( getSetListSortedByIncreasingMinThenInvLength )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
381
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
382 ## Return a list of Set instances sorted in increasing order according to the SeqName, then the Name, then the Min, then the Max and finally their initial order
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
383 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
384 # @param lSets: list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
385 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
386 def getSetListSortedBySeqThenRegionThenMinThenMax(lSets):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
387 return sorted(lSets, key=lambda iSet: (iSet.getSeqname(), iSet.getName(), iSet.getMin(), iSet.getMax()))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
388
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
389 getSetListSortedBySeqThenRegionThenMinThenMax = staticmethod(getSetListSortedBySeqThenRegionThenMinThenMax)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
390
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
391 ## Return a list of identifier lists of overlapping Sets from the subject list, according to the reference list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
392 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
393 # @param lRef list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
394 # @param lSubject list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
395 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
396 def getListOfIdListOfOverlappingSets(lRef,lSubject):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
397 lSortedRef = SetUtils.getSetListSortedByIncreasingMinThenMax( lRef )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
398 lSortedSubject = SetUtils.getSetListSortedByIncreasingMinThenMax( lSubject )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
399
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
400 lOverlappingSet = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
401 lOverlappingSetCounter = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
402
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
403 id2LOverlappingSet_pos = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
404
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
405 i = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
406 j = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
407 while i!= len(lSortedRef):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
408 while j!= len(lSortedSubject) and lSortedRef[i].getMin()>lSortedSubject[j].getMax()\
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
409 and not(lSortedRef[i].isOverlapping(lSortedSubject[j])\
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
410 and lSortedRef[i].isOnDirectStrand()==lSortedSubject[j].isOnDirectStrand()):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
411 j+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
412 jj=j
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
413 while jj!= len(lSortedSubject) and lSortedRef[i].isOverlapping(lSortedSubject[jj])\
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
414 and lSortedRef[i].isOnDirectStrand()==lSortedSubject[jj].isOnDirectStrand():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
415 id1=lSortedRef[i].id
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
416 id2=lSortedSubject[jj].id*-1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
417 if id2LOverlappingSet_pos.has_key(id1) \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
418 and not id2LOverlappingSet_pos.has_key(id2):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
419 lOverlappingSet[id2LOverlappingSet_pos[id1]].append(id2)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
420 id2LOverlappingSet_pos[id2]=id2LOverlappingSet_pos[id1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
421 if id2LOverlappingSet_pos.has_key(id2) \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
422 and not id2LOverlappingSet_pos.has_key(id1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
423 lOverlappingSet[id2LOverlappingSet_pos[id2]].append(id1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
424 id2LOverlappingSet_pos[id1]=id2LOverlappingSet_pos[id2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
425 if not id2LOverlappingSet_pos.has_key(id2) \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
426 and not id2LOverlappingSet_pos.has_key(id1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
427 lOverlappingSet.append([id1,id2])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
428 id2LOverlappingSet_pos[id1]=lOverlappingSetCounter
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
429 id2LOverlappingSet_pos[id2]=lOverlappingSetCounter
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
430 lOverlappingSetCounter+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
431 jj+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
432 i+=1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
433
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
434 return lOverlappingSet
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
435
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
436 getListOfIdListOfOverlappingSets = staticmethod (getListOfIdListOfOverlappingSets)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
437
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
438 ## Return a list of sets without overlapping between two lists of sets
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
439 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
440 # @param lSet1 and lSet2
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
441 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
442 def getListOfSetWithoutOverlappingBetweenTwoListOfSet(lSet1, lSet2):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
443 for i in lSet1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
444 for idx,j in enumerate(lSet2):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
445 n=j.diff(i)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
446 if not n.isEmpty() and n.getLength()>=20:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
447 lSet2.append(n)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
448 lSet2WithoutOverlaps=[]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
449 for i in lSet2:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
450 if not i.isEmpty() and i.getLength()>=20:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
451 lSet2WithoutOverlaps.append(i)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
452 return lSet2WithoutOverlaps
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
453
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
454 getListOfSetWithoutOverlappingBetweenTwoListOfSet = staticmethod (getListOfSetWithoutOverlappingBetweenTwoListOfSet)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
455
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
456 ## Return a Set list from a Set file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
457 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
458 # @param setFile string name of a Set file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
459 # @return a list of Set instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
460 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
461 def getSetListFromFile( setFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
462 lSets = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
463 setFileHandler = open( setFile, "r" )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
464 while True:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
465 line = setFileHandler.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
466 if line == "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
467 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
468 iSet = Set()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
469 iSet.setFromString( line )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
470 lSets.append( iSet )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
471 setFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
472 return lSets
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
473
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
474 getSetListFromFile = staticmethod( getSetListFromFile )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
475
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
476
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
477 def convertSetFileIntoMapFile( setFile, mapFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
478 setFileHandler = open( setFile, "r" )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
479 mapFileHandler = open( mapFile, "w" )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
480 iSet = Set()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
481 while True:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
482 line = setFileHandler.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
483 if line == "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
484 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
485 iSet.setFromString( line )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
486 iMap = iSet.getMapInstance()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
487 iMap.write( mapFileHandler )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
488 setFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
489 mapFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
490
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
491 convertSetFileIntoMapFile = staticmethod( convertSetFileIntoMapFile )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
492
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
493
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
494 def getDictOfListsWithSeqnameAsKey( lSets ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
495 dSeqnamesToSetList = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
496 for iSet in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
497 if not dSeqnamesToSetList.has_key( iSet.seqname ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
498 dSeqnamesToSetList[ iSet.seqname ] = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
499 dSeqnamesToSetList[ iSet.seqname ].append( iSet )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
500 return dSeqnamesToSetList
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
501
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
502 getDictOfListsWithSeqnameAsKey = staticmethod( getDictOfListsWithSeqnameAsKey )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
503
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
504
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
505 def filterOnLength( lSets, minLength=0, maxLength=10000000000 ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
506 if minLength == 0 and maxLength == 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
507 return lSets
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
508 lFiltered = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
509 for iSet in lSets:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
510 if minLength <= iSet.getLength() <= maxLength:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
511 lFiltered.append( iSet )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
512 return lFiltered
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
513
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
514 filterOnLength = staticmethod( filterOnLength )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
515
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
516
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
517 def getListOfNames( setFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
518 lNames = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
519 setFileHandler = open( setFile, "r" )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
520 iSet = Set()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
521 while True:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
522 line = setFileHandler.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
523 if line == "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
524 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
525 iSet.setFromTuple( line[:-1].split("\t") )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
526 if iSet.name not in lNames:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
527 lNames.append( iSet.name )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
528 setFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
529 return lNames
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
530
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
531 getListOfNames = staticmethod( getListOfNames )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
532
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
533
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
534 def getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
535 dNames2DictsId = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
536 setFileHandler = open( setFile, "r" )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
537 while True:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
538 line = setFileHandler.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
539 if line == "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
540 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
541 iSet = Set()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
542 iSet.setFromTuple( line[:-1].split("\t") )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
543 if not dNames2DictsId.has_key( iSet.name ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
544 dNames2DictsId[ iSet.name ] = { iSet.id: [ iSet ] }
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
545 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
546 if not dNames2DictsId[ iSet.name ].has_key( iSet.id ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
547 dNames2DictsId[ iSet.name ][ iSet.id ] = [ iSet ]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
548 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
549 dNames2DictsId[ iSet.name ][ iSet.id ].append( iSet )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
550 setFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
551 return dNames2DictsId
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
552
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
553 getDictOfDictsWithNamesThenIdAsKeyFromFile = staticmethod( getDictOfDictsWithNamesThenIdAsKeyFromFile )