Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.1.a/commons/core/coord/Range.py @ 13:feef9a0db09d draft
Uploaded
| author | urgi-team |
|---|---|
| date | Wed, 20 Jul 2016 09:04:42 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 12:22b0494ec883 | 13:feef9a0db09d |
|---|---|
| 1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
| 2 # http://www.inra.fr | |
| 3 # http://urgi.versailles.inra.fr | |
| 4 # | |
| 5 # This software is governed by the CeCILL license under French law and | |
| 6 # abiding by the rules of distribution of free software. You can use, | |
| 7 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 9 # "http://www.cecill.info". | |
| 10 # | |
| 11 # As a counterpart to the access to the source code and rights to copy, | |
| 12 # modify and redistribute granted by the license, users are provided only | |
| 13 # with a limited warranty and the software's author, the holder of the | |
| 14 # economic rights, and the successive licensors have only limited | |
| 15 # liability. | |
| 16 # | |
| 17 # In this respect, the user's attention is drawn to the risks associated | |
| 18 # with loading, using, modifying and/or developing or reproducing the | |
| 19 # software by the user in light of its specific status of free software, | |
| 20 # that may mean that it is complicated to manipulate, and that also | |
| 21 # therefore means that it is reserved for developers and experienced | |
| 22 # professionals having in-depth computer knowledge. Users are therefore | |
| 23 # encouraged to load and test the software's suitability as regards their | |
| 24 # requirements in conditions enabling the security of their systems and/or | |
| 25 # data to be ensured and, more generally, to use and operate it in the | |
| 26 # same conditions as regards security. | |
| 27 # | |
| 28 # The fact that you are presently reading this means that you have had | |
| 29 # knowledge of the CeCILL license and that you accept its terms. | |
| 30 | |
| 31 | |
| 32 ## Record a region on a given sequence | |
| 33 # | |
| 34 class Range( object ): | |
| 35 | |
| 36 __slots__ = ("seqname", "start", "end", '__dict__') | |
| 37 | |
| 38 ## Constructor | |
| 39 # | |
| 40 # @param seqname the name of the sequence | |
| 41 # @param start the start coordinate | |
| 42 # @param end the end coordinate | |
| 43 # | |
| 44 def __init__(self, seqname="", start=-1, end=-1): | |
| 45 self.seqname = seqname | |
| 46 self.start = int(start) | |
| 47 self.end = int(end) | |
| 48 | |
| 49 ## Equal operator | |
| 50 # | |
| 51 # @param o a Range instance | |
| 52 # | |
| 53 def __eq__(self, o): | |
| 54 if type(o) is type(self) and self.seqname == o.seqname and self.start == o.start and self.end == o.end: | |
| 55 return True | |
| 56 return False | |
| 57 | |
| 58 ## Unequal operator | |
| 59 # | |
| 60 # @param o a Range instance | |
| 61 # | |
| 62 def __ne__(self, o): | |
| 63 return not self.__eq__(o) | |
| 64 | |
| 65 ## Convert the object into a string | |
| 66 # | |
| 67 # @note used in 'print myObject' | |
| 68 # | |
| 69 def __str__( self ): | |
| 70 return self.toString() | |
| 71 | |
| 72 ## Convert the object into a string | |
| 73 # | |
| 74 # @note used in 'repr(myObject)' for debugging | |
| 75 # | |
| 76 def __repr__( self ): | |
| 77 return self.toString().replace("\t",";") | |
| 78 | |
| 79 def setStart(self, start): | |
| 80 self.start = start | |
| 81 | |
| 82 def setEnd(self, end): | |
| 83 self.end = end | |
| 84 | |
| 85 def setSeqName(self, seqName): | |
| 86 self.seqname = seqName | |
| 87 | |
| 88 ## Reset | |
| 89 # | |
| 90 def reset(self): | |
| 91 self.seqname = "" | |
| 92 self.start = -1 | |
| 93 self.end = -1 | |
| 94 | |
| 95 ## Return the attributes as a formatted string | |
| 96 # | |
| 97 def toString(self): | |
| 98 string = "%s" % (self.seqname) | |
| 99 string += "\t%d" % (self.start) | |
| 100 string += "\t%d" % (self.end) | |
| 101 return string | |
| 102 | |
| 103 ## Show the attributes | |
| 104 # | |
| 105 def show(self): | |
| 106 print self.toString() | |
| 107 | |
| 108 ## Return seqname | |
| 109 # | |
| 110 def getSeqname(self): | |
| 111 return self.seqname | |
| 112 | |
| 113 ## Return the start coordinate | |
| 114 # | |
| 115 def getStart(self): | |
| 116 return self.start | |
| 117 | |
| 118 ## Return the end coordinate | |
| 119 # | |
| 120 def getEnd(self): | |
| 121 return self.end | |
| 122 | |
| 123 ## Return the lowest value between start and end coordinates | |
| 124 # | |
| 125 def getMin(self): | |
| 126 return min(self.start, self.end) | |
| 127 | |
| 128 ## Return the greatest value between start and end attributes | |
| 129 # | |
| 130 def getMax(self): | |
| 131 return max(self.start, self.end) | |
| 132 | |
| 133 ## Return True if the instance is on the direct strand, False otherwise | |
| 134 # | |
| 135 def isOnDirectStrand(self): | |
| 136 if self.start <= self.end: | |
| 137 return True | |
| 138 else: | |
| 139 return False | |
| 140 | |
| 141 ## Return True if the instance is on the reverse strand, False otherwise | |
| 142 # | |
| 143 def isOnReverseStrand(self): | |
| 144 return not self.isOnDirectStrand() | |
| 145 | |
| 146 ## Return '+' if the instance is on the direct strand, '-' otherwise | |
| 147 # | |
| 148 def getStrand(self): | |
| 149 if self.isOnDirectStrand(): | |
| 150 return '+' | |
| 151 else: | |
| 152 return '-' | |
| 153 | |
| 154 ## Exchange start and end coordinates | |
| 155 # | |
| 156 def reverse(self): | |
| 157 tmp = self.start | |
| 158 self.start = self.end | |
| 159 self.end = tmp | |
| 160 | |
| 161 ## Return the length of the instance | |
| 162 # | |
| 163 # @warning old name is 'length' | |
| 164 # | |
| 165 def getLength(self): | |
| 166 return int(abs(self.start-self.end))+1 | |
| 167 | |
| 168 ## Return True if the instance is empty, False otherwise | |
| 169 # | |
| 170 def isEmpty(self): | |
| 171 if self.start==self.end and (self.start==0 or self.start==-1): | |
| 172 return True | |
| 173 return False | |
| 174 | |
| 175 ## Set attributes from tuple | |
| 176 # | |
| 177 # @param tuple a tuple with (name,start,end) | |
| 178 # | |
| 179 def setFromTuple(self, tuple): | |
| 180 self.seqname = tuple[0] | |
| 181 self.start = int(tuple[1]) | |
| 182 self.end = int(tuple[2]) | |
| 183 | |
| 184 ## Set attributes from string | |
| 185 # | |
| 186 # @param string a string formatted like name<sep>start<sep>end | |
| 187 # @param sep field separator | |
| 188 # | |
| 189 def setFromString(self, string, sep="\t"): | |
| 190 if string[-1] == "\n": | |
| 191 string = string[:-1] | |
| 192 self.setFromTuple( string.split(sep) ) | |
| 193 | |
| 194 ## Merge the instance with another Range instance | |
| 195 # | |
| 196 # @param o a Range instance | |
| 197 # | |
| 198 def merge(self, o): | |
| 199 if self.seqname != o.seqname: | |
| 200 return | |
| 201 if self.isOnDirectStrand(): | |
| 202 self.start = min(self.getMin(), o.getMin()) | |
| 203 self.end = max(self.getMax(), o.getMax()) | |
| 204 else: | |
| 205 self.start = max(self.getMax(), o.getMax()) | |
| 206 self.end = min(self.getMin(), o.getMin()) | |
| 207 | |
| 208 ## Return True if the instance overlaps with another Range instance, False otherwise | |
| 209 # | |
| 210 # @param o a Range instance | |
| 211 # | |
| 212 def isOverlapping(self, o): | |
| 213 if o.seqname != self.seqname: | |
| 214 return False | |
| 215 smin = self.getMin() | |
| 216 smax = self.getMax() | |
| 217 omin = o.getMin() | |
| 218 omax = o.getMax() | |
| 219 if omin <= smin and omax >= smax: | |
| 220 return True | |
| 221 if omin >= smin and omin <= smax or omax >= smin and omax <= smax: | |
| 222 return True | |
| 223 return False | |
| 224 | |
| 225 | |
| 226 ## Return the length of the overlap between the instance and another Range, 0 if no overlap | |
| 227 # | |
| 228 # @param o a Range instance | |
| 229 # | |
| 230 def getOverlapLength( self, o ): | |
| 231 if self.isOverlapping( o ): | |
| 232 if self.isIncludedIn( o ): | |
| 233 return self.getLength() | |
| 234 elif o.isIncludedIn( self ): | |
| 235 return o.getLength() | |
| 236 elif o.getMin() <= self.getMax() and o.getMin() >= self.getMin(): | |
| 237 return self.getMax() - o.getMin() + 1 | |
| 238 elif o.getMax() <= self.getMax() and o.getMax() >= self.getMin(): | |
| 239 return o.getMax() - self.getMin() + 1 | |
| 240 return 0 | |
| 241 | |
| 242 | |
| 243 ## Return True if the instance is included within another Range, False otherwise | |
| 244 # | |
| 245 # @param o a Range instance | |
| 246 # | |
| 247 # @note the min (respectively max) coordinates can be equal | |
| 248 # | |
| 249 def isIncludedIn( self, o ): | |
| 250 if o.seqname != self.seqname: | |
| 251 return False | |
| 252 if self.getMin() >= o.getMin() and self.getMax() <= o.getMax(): | |
| 253 return True | |
| 254 else: | |
| 255 return False | |
| 256 | |
| 257 | |
| 258 ## Return the distance between the start of the instance and the start of another Range instance | |
| 259 # | |
| 260 # @param o a Range instance | |
| 261 # | |
| 262 def getDistance(self, o): | |
| 263 if self.isOnDirectStrand() == o.isOnDirectStrand(): | |
| 264 if self.isOverlapping(o): | |
| 265 return 0 | |
| 266 elif self.isOnDirectStrand(): | |
| 267 if self.start > o.start: | |
| 268 return self.start - o.end | |
| 269 else: | |
| 270 return o.start - self.end | |
| 271 else: | |
| 272 if self.start > o.start: | |
| 273 return self.end - o.start | |
| 274 else: | |
| 275 return o.end - self.start | |
| 276 return -1 | |
| 277 | |
| 278 ## Remove in the instance the region overlapping with another Range instance | |
| 279 # | |
| 280 # @param o a Range instance | |
| 281 # | |
| 282 def diff(self, o): | |
| 283 new_range = Range(self.seqname) | |
| 284 if not self.isOverlapping(o) or self.seqname != o.seqname: | |
| 285 return new_range | |
| 286 | |
| 287 istart = min(self.start, self.end) | |
| 288 iend = max(self.start, self.end) | |
| 289 jstart = min(o.start, o.end) | |
| 290 jend = max(o.start, o.end) | |
| 291 if istart < jstart: | |
| 292 if iend <= jend: | |
| 293 if self.isOnDirectStrand(): | |
| 294 self.start = istart | |
| 295 self.end = jstart - 1 | |
| 296 else: | |
| 297 self.start = jstart - 1 | |
| 298 self.end = istart | |
| 299 else: | |
| 300 if self.isOnDirectStrand(): | |
| 301 self.start = istart | |
| 302 self.end = jstart - 1 | |
| 303 new_range.start = jend + 1 | |
| 304 new_range.end = iend | |
| 305 else: | |
| 306 self.start = jstart - 1; | |
| 307 self.end = istart; | |
| 308 new_range.start = iend | |
| 309 new_range.end = jend + 1 | |
| 310 else: #istart>=jstart | |
| 311 if iend <= jend: | |
| 312 self.start = 0 | |
| 313 self.end = 0 | |
| 314 else: | |
| 315 if self.isOnDirectStrand(): | |
| 316 self.start = jend + 1 | |
| 317 self.end = iend | |
| 318 else: | |
| 319 self.start = iend | |
| 320 self.end = jend + 1 | |
| 321 return new_range | |
| 322 | |
| 323 ## Find the bin that contains the instance and compute its index | |
| 324 # | |
| 325 # @note Required for coordinate indexing via a hierarchical bin system | |
| 326 # | |
| 327 def findIdx(self): | |
| 328 min_lvl = 3 | |
| 329 max_lvl = 6 | |
| 330 for bin_lvl in xrange(min_lvl, max_lvl): | |
| 331 if getBin(self.start, bin_lvl) == getBin(self.end, bin_lvl): | |
| 332 return getIdx(self.start, bin_lvl) | |
| 333 return getIdx(self.start, max_lvl) | |
| 334 | |
| 335 ## Get a bin for fast database access | |
| 336 # | |
| 337 # @return bin number (float) | |
| 338 # | |
| 339 def getBin(self): | |
| 340 for i in xrange(3, 8): | |
| 341 bin_lvl = pow(10, i) | |
| 342 if int(self.start/bin_lvl) == int(self.end/bin_lvl): | |
| 343 return float(bin_lvl+(int(self.start/bin_lvl)/1e10)) | |
| 344 bin_lvl = pow(10, 8) | |
| 345 return float(bin_lvl+(int(self.start/bin_lvl)/1e10)) | |
| 346 | |
| 347 | |
| 348 # Functions | |
| 349 | |
| 350 # Get the bin number of a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system | |
| 351 # | |
| 352 def getBin(val, bin_lvl): | |
| 353 bin_size = pow(10, bin_lvl) | |
| 354 return long(val / bin_size) | |
| 355 | |
| 356 # Get an index from a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system | |
| 357 # | |
| 358 def getIdx(val, bin_lvl): | |
| 359 min_lvl = 3 | |
| 360 max_lvl = 6 | |
| 361 if bin_lvl >= max_lvl: | |
| 362 return long((bin_lvl-min_lvl+1)*pow(10,max_lvl)) | |
| 363 return long(((bin_lvl-min_lvl+1)*pow(10,max_lvl))+getBin(val,bin_lvl)) |
