comparison TEisotools-1.1.a/commons/core/coord/Range.py @ 13:feef9a0db09d draft

Uploaded
author urgi-team
date Wed, 20 Jul 2016 09:04:42 -0400
parents
children
comparison
equal deleted inserted replaced
12:22b0494ec883 13:feef9a0db09d
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31
32 ## Record a region on a given sequence
33 #
34 class Range( object ):
35
36 __slots__ = ("seqname", "start", "end", '__dict__')
37
38 ## Constructor
39 #
40 # @param seqname the name of the sequence
41 # @param start the start coordinate
42 # @param end the end coordinate
43 #
44 def __init__(self, seqname="", start=-1, end=-1):
45 self.seqname = seqname
46 self.start = int(start)
47 self.end = int(end)
48
49 ## Equal operator
50 #
51 # @param o a Range instance
52 #
53 def __eq__(self, o):
54 if type(o) is type(self) and self.seqname == o.seqname and self.start == o.start and self.end == o.end:
55 return True
56 return False
57
58 ## Unequal operator
59 #
60 # @param o a Range instance
61 #
62 def __ne__(self, o):
63 return not self.__eq__(o)
64
65 ## Convert the object into a string
66 #
67 # @note used in 'print myObject'
68 #
69 def __str__( self ):
70 return self.toString()
71
72 ## Convert the object into a string
73 #
74 # @note used in 'repr(myObject)' for debugging
75 #
76 def __repr__( self ):
77 return self.toString().replace("\t",";")
78
79 def setStart(self, start):
80 self.start = start
81
82 def setEnd(self, end):
83 self.end = end
84
85 def setSeqName(self, seqName):
86 self.seqname = seqName
87
88 ## Reset
89 #
90 def reset(self):
91 self.seqname = ""
92 self.start = -1
93 self.end = -1
94
95 ## Return the attributes as a formatted string
96 #
97 def toString(self):
98 string = "%s" % (self.seqname)
99 string += "\t%d" % (self.start)
100 string += "\t%d" % (self.end)
101 return string
102
103 ## Show the attributes
104 #
105 def show(self):
106 print self.toString()
107
108 ## Return seqname
109 #
110 def getSeqname(self):
111 return self.seqname
112
113 ## Return the start coordinate
114 #
115 def getStart(self):
116 return self.start
117
118 ## Return the end coordinate
119 #
120 def getEnd(self):
121 return self.end
122
123 ## Return the lowest value between start and end coordinates
124 #
125 def getMin(self):
126 return min(self.start, self.end)
127
128 ## Return the greatest value between start and end attributes
129 #
130 def getMax(self):
131 return max(self.start, self.end)
132
133 ## Return True if the instance is on the direct strand, False otherwise
134 #
135 def isOnDirectStrand(self):
136 if self.start <= self.end:
137 return True
138 else:
139 return False
140
141 ## Return True if the instance is on the reverse strand, False otherwise
142 #
143 def isOnReverseStrand(self):
144 return not self.isOnDirectStrand()
145
146 ## Return '+' if the instance is on the direct strand, '-' otherwise
147 #
148 def getStrand(self):
149 if self.isOnDirectStrand():
150 return '+'
151 else:
152 return '-'
153
154 ## Exchange start and end coordinates
155 #
156 def reverse(self):
157 tmp = self.start
158 self.start = self.end
159 self.end = tmp
160
161 ## Return the length of the instance
162 #
163 # @warning old name is 'length'
164 #
165 def getLength(self):
166 return int(abs(self.start-self.end))+1
167
168 ## Return True if the instance is empty, False otherwise
169 #
170 def isEmpty(self):
171 if self.start==self.end and (self.start==0 or self.start==-1):
172 return True
173 return False
174
175 ## Set attributes from tuple
176 #
177 # @param tuple a tuple with (name,start,end)
178 #
179 def setFromTuple(self, tuple):
180 self.seqname = tuple[0]
181 self.start = int(tuple[1])
182 self.end = int(tuple[2])
183
184 ## Set attributes from string
185 #
186 # @param string a string formatted like name<sep>start<sep>end
187 # @param sep field separator
188 #
189 def setFromString(self, string, sep="\t"):
190 if string[-1] == "\n":
191 string = string[:-1]
192 self.setFromTuple( string.split(sep) )
193
194 ## Merge the instance with another Range instance
195 #
196 # @param o a Range instance
197 #
198 def merge(self, o):
199 if self.seqname != o.seqname:
200 return
201 if self.isOnDirectStrand():
202 self.start = min(self.getMin(), o.getMin())
203 self.end = max(self.getMax(), o.getMax())
204 else:
205 self.start = max(self.getMax(), o.getMax())
206 self.end = min(self.getMin(), o.getMin())
207
208 ## Return True if the instance overlaps with another Range instance, False otherwise
209 #
210 # @param o a Range instance
211 #
212 def isOverlapping(self, o):
213 if o.seqname != self.seqname:
214 return False
215 smin = self.getMin()
216 smax = self.getMax()
217 omin = o.getMin()
218 omax = o.getMax()
219 if omin <= smin and omax >= smax:
220 return True
221 if omin >= smin and omin <= smax or omax >= smin and omax <= smax:
222 return True
223 return False
224
225
226 ## Return the length of the overlap between the instance and another Range, 0 if no overlap
227 #
228 # @param o a Range instance
229 #
230 def getOverlapLength( self, o ):
231 if self.isOverlapping( o ):
232 if self.isIncludedIn( o ):
233 return self.getLength()
234 elif o.isIncludedIn( self ):
235 return o.getLength()
236 elif o.getMin() <= self.getMax() and o.getMin() >= self.getMin():
237 return self.getMax() - o.getMin() + 1
238 elif o.getMax() <= self.getMax() and o.getMax() >= self.getMin():
239 return o.getMax() - self.getMin() + 1
240 return 0
241
242
243 ## Return True if the instance is included within another Range, False otherwise
244 #
245 # @param o a Range instance
246 #
247 # @note the min (respectively max) coordinates can be equal
248 #
249 def isIncludedIn( self, o ):
250 if o.seqname != self.seqname:
251 return False
252 if self.getMin() >= o.getMin() and self.getMax() <= o.getMax():
253 return True
254 else:
255 return False
256
257
258 ## Return the distance between the start of the instance and the start of another Range instance
259 #
260 # @param o a Range instance
261 #
262 def getDistance(self, o):
263 if self.isOnDirectStrand() == o.isOnDirectStrand():
264 if self.isOverlapping(o):
265 return 0
266 elif self.isOnDirectStrand():
267 if self.start > o.start:
268 return self.start - o.end
269 else:
270 return o.start - self.end
271 else:
272 if self.start > o.start:
273 return self.end - o.start
274 else:
275 return o.end - self.start
276 return -1
277
278 ## Remove in the instance the region overlapping with another Range instance
279 #
280 # @param o a Range instance
281 #
282 def diff(self, o):
283 new_range = Range(self.seqname)
284 if not self.isOverlapping(o) or self.seqname != o.seqname:
285 return new_range
286
287 istart = min(self.start, self.end)
288 iend = max(self.start, self.end)
289 jstart = min(o.start, o.end)
290 jend = max(o.start, o.end)
291 if istart < jstart:
292 if iend <= jend:
293 if self.isOnDirectStrand():
294 self.start = istart
295 self.end = jstart - 1
296 else:
297 self.start = jstart - 1
298 self.end = istart
299 else:
300 if self.isOnDirectStrand():
301 self.start = istart
302 self.end = jstart - 1
303 new_range.start = jend + 1
304 new_range.end = iend
305 else:
306 self.start = jstart - 1;
307 self.end = istart;
308 new_range.start = iend
309 new_range.end = jend + 1
310 else: #istart>=jstart
311 if iend <= jend:
312 self.start = 0
313 self.end = 0
314 else:
315 if self.isOnDirectStrand():
316 self.start = jend + 1
317 self.end = iend
318 else:
319 self.start = iend
320 self.end = jend + 1
321 return new_range
322
323 ## Find the bin that contains the instance and compute its index
324 #
325 # @note Required for coordinate indexing via a hierarchical bin system
326 #
327 def findIdx(self):
328 min_lvl = 3
329 max_lvl = 6
330 for bin_lvl in xrange(min_lvl, max_lvl):
331 if getBin(self.start, bin_lvl) == getBin(self.end, bin_lvl):
332 return getIdx(self.start, bin_lvl)
333 return getIdx(self.start, max_lvl)
334
335 ## Get a bin for fast database access
336 #
337 # @return bin number (float)
338 #
339 def getBin(self):
340 for i in xrange(3, 8):
341 bin_lvl = pow(10, i)
342 if int(self.start/bin_lvl) == int(self.end/bin_lvl):
343 return float(bin_lvl+(int(self.start/bin_lvl)/1e10))
344 bin_lvl = pow(10, 8)
345 return float(bin_lvl+(int(self.start/bin_lvl)/1e10))
346
347
348 # Functions
349
350 # Get the bin number of a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system
351 #
352 def getBin(val, bin_lvl):
353 bin_size = pow(10, bin_lvl)
354 return long(val / bin_size)
355
356 # Get an index from a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system
357 #
358 def getIdx(val, bin_lvl):
359 min_lvl = 3
360 max_lvl = 6
361 if bin_lvl >= max_lvl:
362 return long((bin_lvl-min_lvl+1)*pow(10,max_lvl))
363 return long(((bin_lvl-min_lvl+1)*pow(10,max_lvl))+getBin(val,bin_lvl))