comparison SMART/Java/Python/ncList/NCListCursor.py @ 36:44d5973c188c

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 15:02:29 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
35:d94018ca4ada 36:44d5973c188c
1 #! /usr/bin/env python
2 #
3 # Copyright INRA-URGI 2009-2010
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30 #
31 import os, os.path, struct
32 from commons.core.parsing.GffParser import GffParser
33 from SMART.Java.Python.misc.Progress import Progress
34
35
36 class Data(object):
37 def __init__(self, hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end):
38 self.hIndex = hIndex
39 self.transcript = transcript
40 self.firstChildLIndex = firstChildLIndex
41 self.lastChildLIndex = lastChildLIndex
42 self.start = start
43 self.end = end
44
45 class NCListCursor(object):
46
47 def __init__(self, cursor = None, ncList = None, lIndex = 0, verbosity = 0):
48 self._verbosity = verbosity
49 self._mainListData = []
50 if cursor:
51 self.copy(cursor)
52 else:
53 self._ncList = ncList
54 self.setLIndex(lIndex)
55
56 def setLIndex(self, lIndex):
57 self._lIndex = lIndex
58 self._start = None
59 self._end = None
60 self._hIndex = None
61 self._gffIndex = None
62 self._parentGffIndex = None
63 self._parentLIndex = None
64 self._parentHIndex = None
65 self._parentStart = None
66 self._parentEnd = None
67 self._transcript = None
68 self._firstSiblingLIndex = None
69 self._lastSiblingLIndex = None
70 self._firstChildLIndex = None
71 self._lastChildLIndex = None
72 self._mainListIndex = lIndex if lIndex < self._ncList.getSizeFirstList() else None
73
74 def precompute(self):
75 self._mainListIndex = 0
76 progress = Progress(self._ncList.getSizeFirstList(), "Precomputing data", self._verbosity)
77 for i in range(self._ncList.getSizeFirstList()):
78 gffIndex, hIndex, parentLIndex, start, end = self._ncList.getLLineElements(i)
79 transcript = self._ncList.getIntervalFromAdress(gffIndex)
80 firstChildLIndex, nbChildren = self._ncList.getHLineElements(hIndex)
81 lastChildLIndex = -1 if firstChildLIndex == -1 else firstChildLIndex + nbChildren-1
82 self._mainListData.append(Data(hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end))
83 progress.inc()
84 progress.done()
85
86 def _updateFromMainListData(self):
87 if not self._mainListData or self._lIndex >= self._ncList.getSizeFirstList():
88 #print "OUT"
89 return False
90 if self._mainListIndex >= self._ncList.getSizeFirstList():
91 self._hIndex = -1
92 data = self._mainListData[self._mainListIndex]
93 self._hIndex = data.hIndex
94 self._transcript = data.transcript
95 self._firstChildLIndex = data.firstChildLIndex
96 self._lastChildLIndex = data.lastChildLIndex
97 self._start = data.start
98 self._end = data.end
99 return True
100
101 def getLIndex(self):
102 return self._lIndex
103
104 def _getCurrentData(self):
105 self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end = self._ncList.getLLineElements(self._lIndex)
106 #print "-->", self._lIndex, "-->", self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end
107 if self._end == -1:
108 raise Exception("Error")
109
110 def _getParentData(self):
111 if self._parentLIndex == None:
112 self._getCurrentData()
113 self._parentGffIndex, self._parentHIndex, greatParentLIndex, self._parentStart, self._parentEnd = self._ncList.getLLineElements(self._parentLIndex)
114
115 def _getTranscript(self):
116 if self._gffIndex == None:
117 self._getCurrentData()
118 self._transcript = self._ncList.getIntervalFromAdress(self._gffIndex)
119
120 def _getSiblingData(self):
121 if self._parentHIndex == None:
122 self._getParentData()
123 if self._parentHIndex == -1:
124 self._firstSiblingLIndex = 0
125 self._lastSiblingLIndex = self._ncList.getSizeFirstList() - 1
126 else:
127 self._firstSiblingLIndex, nbSiblings = self._ncList.getHLineElements(self._parentHIndex)
128 self._lastSiblingLIndex = -1 if self._firstSiblingLIndex == -1 else self._firstSiblingLIndex + nbSiblings-1
129
130 def _getChildrenData(self):
131 if self._hIndex == None:
132 self._getCurrentData()
133 self._firstChildLIndex, nbChildren = self._ncList.getHLineElements(self._hIndex)
134 self._lastChildLIndex = -1 if self._firstChildLIndex == -1 else self._firstChildLIndex + nbChildren-1
135
136 def getGffAddress(self):
137 if self._gffIndex == None:
138 self._getCurrentData()
139 return self._gffIndex
140
141 def getStart(self):
142 if self._start == None:
143 self._getCurrentData()
144 return self._start
145
146 def getEnd(self):
147 if self._end == None:
148 self._getCurrentData()
149 return self._end
150
151 def compare(self, cursor):
152 return (self._lIndex == cursor._lIndex)
153
154 def getTranscript(self):
155 if self.isOut():
156 return None
157 if self._transcript == None:
158 self._getTranscript()
159 return self._transcript
160
161 def isFirst(self):
162 #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex
163 if self._lIndex < self._ncList.getSizeFirstList() - 1:
164 return (self._lIndex == 0)
165 if self._firstSiblingLIndex == None:
166 self._getSiblingData()
167 return (self._lIndex == self._firstSiblingLIndex)
168
169 def isLast(self):
170 #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex
171 if self._lIndex < self._ncList.getSizeFirstList() - 1:
172 return (self._lIndex == self._ncList.getSizeFirstList() - 1)
173 if self._lastSiblingLIndex == None:
174 self._getSiblingData()
175 return (self._lIndex == self._lastSiblingLIndex)
176
177 def moveUp(self):
178 if self._parentLIndex == None:
179 self._getCurrentData()
180 self._lIndex = self._parentLIndex
181 self._updateFromMainListData()
182 self._hIndex = self._parentHIndex
183 self._gffIndex = self._parentGffIndex
184 self._parentLIndex = None
185 self._parentHIndex = None
186 self._parentGffIndex = None
187 self._transcript = None
188 self._firstSiblingLIndex = None
189 self._lastSiblingLIndex = None
190 self._firstChildLIndex = self._firstChildLIndex
191 self._lastChildLIndex = self._lastChildLIndex
192 self._start = self._parentStart
193 self._end = self._parentEnd
194 self._parentStart = None
195 self._parentEnd = None
196
197 def moveRight(self):
198 if self.isOut():
199 return
200 #print "IN1", self
201 if self._lIndex < self._ncList.getSizeFirstList() - 1 and self._mainListIndex != None:
202 self._mainListIndex += 1
203 self._updateFromMainListData()
204 #print "IN2", self
205 self._lIndex += 1
206 self._hIndex = None
207 self._start = None
208 self._end = None
209 self._transcript = None
210 self._gffIndex = None
211 self._firstChildLIndex = None
212 self._lastChildLIndex = None
213 #print "IN3", self
214
215 def moveNext(self):
216 while not self.isOut() and self.isLast():
217 if self.isTop():
218 self._lIndex = -1
219 return
220 self.moveUp()
221 #print "F1", self
222 self.moveRight()
223 #print "F2", self
224
225 def moveMiddleSibling(self):
226 if self._lIndex < self._ncList.getSizeFirstList() - 1:
227 self._mainListIndex = (self._ncList.getSizeFirstList() - 1) / 2
228 self._updateFromMainListData()
229 if self._lastSiblingLIndex == None:
230 self._getSiblingData()
231 self._lIndex = (self._lastSiblingLIndex + self._firstSiblingLIndex) / 2
232 self._hIndex = None
233 self._start = None
234 self._end = None
235 self._gffIndex = None
236 self._transcript = None
237 self._firstChildLIndex = None
238 self._lastChildLIndex = None
239
240 def moveSibling(self, lIndex):
241 if self._lIndex < self._ncList.getSizeFirstList() - 1:
242 self._mainListIndex = lIndex
243 self._updateFromMainListData()
244 self._lIndex = lIndex
245 self._hIndex = None
246 self._start = None
247 self._end = None
248 self._gffIndex = None
249 self._transcript = None
250 self._firstChildLIndex = None
251 self._lastChildLIndex = None
252
253 def moveLastSibling(self):
254 if self._lIndex < self._ncList.getSizeFirstList() - 1:
255 self._mainListIndex = self._ncList.getSizeFirstList() - 1
256 self._updateFromMainListData()
257 if self._lastSiblingLIndex == None:
258 self._getSiblingData()
259 self._lIndex = self._lastSiblingLIndex
260 self._hIndex = None
261 self._start = None
262 self._end = None
263 self._gffIndex = None
264 self._transcript = None
265 self._firstChildLIndex = None
266 self._lastChildLIndex = None
267
268 def moveDown(self):
269 if self._firstChildLIndex == None:
270 self._getChildrenData()
271 self._parentLIndex = self._lIndex
272 self._parentHIndex = self._hIndex
273 self._parentGffIndex = self._gffIndex
274 self._lIndex = self._firstChildLIndex
275 self._lastSiblingLIndex = self._lastChildLIndex
276 self._hIndex = None
277 self._gffIndex = None
278 self._transcript = None
279 self._firstChildLIndex = None
280 self._lastChildLIndex = None
281 self._parentStart = self._start
282 self._parentEnd = self._end
283 self._start = None
284 self._end = None
285
286 def isOut(self):
287 return (self._lIndex == -1)
288
289 def isTop(self):
290 if self._parentLIndex == None:
291 self._getCurrentData()
292 return (self._parentLIndex == -1)
293
294 def hasChildren(self):
295 if self._hIndex == None:
296 self._getCurrentData()
297 if self._hIndex == -1:
298 return False
299 if self._firstChildLIndex == None:
300 self._getChildrenData()
301 return (self._firstChildLIndex != -1)
302
303 def copy(self, cursor):
304 self._ncList = cursor._ncList
305 self._lIndex = cursor._lIndex
306 self._hIndex = cursor._hIndex
307 self._gffIndex = cursor._gffIndex
308 self._parentLIndex = cursor._parentLIndex
309 self._parentHIndex = cursor._parentHIndex
310 self._parentGffIndex = cursor._parentGffIndex
311 self._transcript = cursor._transcript
312 self._firstSiblingLIndex = cursor._firstSiblingLIndex
313 self._lastSiblingLIndex = cursor._lastSiblingLIndex
314 self._firstChildLIndex = cursor._firstChildLIndex
315 self._lastChildLIndex = cursor._lastChildLIndex
316 self._mainListData = cursor._mainListData
317 self._mainListIndex = cursor._mainListIndex
318 self._verbosity = cursor._verbosity
319 self._parentStart = cursor._parentStart
320 self._parentEnd = cursor._parentEnd
321 self._start = cursor._start
322 self._end = cursor._end
323
324 def __str__(self):
325 return "NC-list: %s, Lindex: %s, Hindex: %s, GFFindex: %s, start: %s, end: %s, parent Lindex: %s, parent Hindex: %s, parent GFFindex: %s, transcript: %s, last sibling: %s" % (self._ncList, self._lIndex, self._hIndex, self._gffIndex, self._start, self._end, self._parentLIndex, self._parentHIndex, self._parentGffIndex, self._transcript, self._lastSiblingLIndex)