Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/structure/Interval.py @ 36:44d5973c188c
Uploaded
| author | m-zytnicki |
|---|---|
| date | Tue, 30 Apr 2013 15:02:29 -0400 |
| parents | |
| children | 169d364ddd91 |
comparison
equal
deleted
inserted
replaced
| 35:d94018ca4ada | 36:44d5973c188c |
|---|---|
| 1 # | |
| 2 # Copyright INRA-URGI 2009-2010 | |
| 3 # | |
| 4 # This software is governed by the CeCILL license under French law and | |
| 5 # abiding by the rules of distribution of free software. You can use, | |
| 6 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 7 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 8 # "http://www.cecill.info". | |
| 9 # | |
| 10 # As a counterpart to the access to the source code and rights to copy, | |
| 11 # modify and redistribute granted by the license, users are provided only | |
| 12 # with a limited warranty and the software's author, the holder of the | |
| 13 # economic rights, and the successive licensors have only limited | |
| 14 # liability. | |
| 15 # | |
| 16 # In this respect, the user's attention is drawn to the risks associated | |
| 17 # with loading, using, modifying and/or developing or reproducing the | |
| 18 # software by the user in light of its specific status of free software, | |
| 19 # that may mean that it is complicated to manipulate, and that also | |
| 20 # therefore means that it is reserved for developers and experienced | |
| 21 # professionals having in-depth computer knowledge. Users are therefore | |
| 22 # encouraged to load and test the software's suitability as regards their | |
| 23 # requirements in conditions enabling the security of their systems and/or | |
| 24 # data to be ensured and, more generally, to use and operate it in the | |
| 25 # same conditions as regards security. | |
| 26 # | |
| 27 # The fact that you are presently reading this means that you have had | |
| 28 # knowledge of the CeCILL license and that you accept its terms. | |
| 29 # | |
| 30 | |
| 31 from SMART.Java.Python.structure.Bins import * | |
| 32 from commons.core.coord.Range import Range | |
| 33 | |
| 34 class Interval(Range): | |
| 35 """ | |
| 36 Store a genomic interval | |
| 37 @ivar name: name of the interval [optional] | |
| 38 @type name: string | |
| 39 @ivar id: id of the interval [optional] | |
| 40 @type id: int | |
| 41 @ivar bin: bin in which the interval should be if stored in a database [computed] | |
| 42 @type bin: int | |
| 43 @ival tags: information about the transcript [optional] | |
| 44 @type tags: dict | |
| 45 @ivar verbosity: verbosity | |
| 46 @type verbosity: int [default: 0] | |
| 47 """ | |
| 48 | |
| 49 def __init__(self, interval = None, verbosity = 0): | |
| 50 """ | |
| 51 Constructor | |
| 52 @param interval: interval to be copied | |
| 53 @type interval: class L{Interval<Interval>} | |
| 54 @param verbosity: verbosity | |
| 55 @type verbosity: int | |
| 56 """ | |
| 57 Range.__init__(self) | |
| 58 self.name = None | |
| 59 self.id = None | |
| 60 self.bin = None | |
| 61 self.verbosity = verbosity | |
| 62 self.tags = {} | |
| 63 if interval != None: | |
| 64 self.copy(interval) | |
| 65 | |
| 66 #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!# | |
| 67 #In case strand = "+", start < end; strand = "-", start > end | |
| 68 def getStart(self): | |
| 69 if self.start == -1: | |
| 70 return -1 | |
| 71 if self.end == -1: | |
| 72 return self.start | |
| 73 return self.getMin() | |
| 74 | |
| 75 | |
| 76 def getEnd(self): | |
| 77 if self.end == -1: | |
| 78 return -1 | |
| 79 if self.start == -1: | |
| 80 return self.end | |
| 81 return self.getMax() | |
| 82 | |
| 83 | |
| 84 def getChromosome(self): | |
| 85 return self.getSeqname() | |
| 86 | |
| 87 | |
| 88 def getDirection(self): | |
| 89 return 1 if self.getStrand() == "+" else -1 | |
| 90 | |
| 91 | |
| 92 def getName(self): | |
| 93 return self.name | |
| 94 | |
| 95 | |
| 96 def isSet(self): | |
| 97 """ | |
| 98 Check if the interval is set | |
| 99 """ | |
| 100 return self.getStart() == None and self.getEnd() == None | |
| 101 | |
| 102 | |
| 103 def copy(self, interval): | |
| 104 """ | |
| 105 Copy method | |
| 106 @param interval: interval to be copied | |
| 107 @type interval: class L{Interval<Interval>} | |
| 108 """ | |
| 109 self.setStart(interval.getStart()) | |
| 110 self.setEnd(interval.getEnd()) | |
| 111 self.setChromosome(interval.getChromosome()) | |
| 112 self.setDirection(interval.getDirection()) | |
| 113 self.name = interval.name | |
| 114 self.id = interval.id | |
| 115 self.bin = interval.bin | |
| 116 self.tags = {} | |
| 117 for tag in interval.tags: | |
| 118 self.tags[tag] = interval.tags[tag] | |
| 119 self.verbosity = interval.verbosity | |
| 120 | |
| 121 | |
| 122 def setName(self, name): | |
| 123 """ | |
| 124 Set the name | |
| 125 @param name: name of the interval | |
| 126 @type name: string | |
| 127 """ | |
| 128 if len(name) > 100: | |
| 129 name = name[:100] | |
| 130 self.name = name | |
| 131 | |
| 132 | |
| 133 def setChromosome(self, chromosome=""): | |
| 134 """ | |
| 135 Set the chromosome | |
| 136 @param chromosome: chromosome on which the interval is | |
| 137 @type chromosome: string | |
| 138 """ | |
| 139 if not chromosome: | |
| 140 self.seqname = None | |
| 141 else: | |
| 142 self.seqname = chromosome.replace(".", "_").replace("|", "_") | |
| 143 | |
| 144 | |
| 145 def setStart(self, start): | |
| 146 """ | |
| 147 Set the start point | |
| 148 Possibly reset bin | |
| 149 @param start: start point of the interval | |
| 150 @type start: int | |
| 151 """ | |
| 152 self.bin = None | |
| 153 direction = self.getDirection() | |
| 154 if self.start == -1: | |
| 155 self.start = start | |
| 156 elif self.end == -1: | |
| 157 self.end = start | |
| 158 else: | |
| 159 if direction == 1: | |
| 160 self.start = start | |
| 161 else: | |
| 162 self.end = start | |
| 163 if direction == 1: | |
| 164 self.start, self.end = min(self.start, self.end), max(self.start, self.end) | |
| 165 else: | |
| 166 self.start, self.end = max(self.start, self.end), min(self.start, self.end) | |
| 167 | |
| 168 | |
| 169 def setEnd(self, end): | |
| 170 """ | |
| 171 Set the end point | |
| 172 Possibly reset bin | |
| 173 @param end: end point of the interval of the interval | |
| 174 @type end: int | |
| 175 """ | |
| 176 self.bin = None | |
| 177 direction = self.getDirection() | |
| 178 if self.end == -1: | |
| 179 self.end = end | |
| 180 elif self.start == -1: | |
| 181 self.start = end | |
| 182 else: | |
| 183 if direction == 1: | |
| 184 self.end = end | |
| 185 else: | |
| 186 self.start = end | |
| 187 if direction == 1: | |
| 188 self.start, self.end = min(self.start, self.end), max(self.start, self.end) | |
| 189 else: | |
| 190 self.start, self.end = max(self.start, self.end), min(self.start, self.end) | |
| 191 | |
| 192 | |
| 193 def setSize(self, size): | |
| 194 """ | |
| 195 Possibly modify the end point | |
| 196 @param size: size of the transcript | |
| 197 @type size: int | |
| 198 """ | |
| 199 if self.end == None and self.start != None: | |
| 200 self.setEnd(self.start + self.getSize() - 1) | |
| 201 elif self.start == None and self.end != None: | |
| 202 self.setStart(self.end - self.getSize() + 1) | |
| 203 | |
| 204 | |
| 205 def getSize(self): | |
| 206 """ | |
| 207 Get the size | |
| 208 """ | |
| 209 return self.getEnd() - self.getStart() + 1 | |
| 210 | |
| 211 | |
| 212 def _setDirection(self, direction): | |
| 213 """ | |
| 214 Set the direction of the interval (connection to Range) | |
| 215 @param direction: direction of the transcript (+ / -) | |
| 216 @type direction: int (1 or -1) | |
| 217 """ | |
| 218 if direction * self.getDirection() < 0: | |
| 219 self.reverse() | |
| 220 | |
| 221 | |
| 222 def setDirection(self, direction): | |
| 223 """ | |
| 224 Set the direction of the interval | |
| 225 Possibly parse different formats | |
| 226 @param direction: direction of the transcript (+ / -) | |
| 227 @type direction: int or string | |
| 228 """ | |
| 229 if type(direction).__name__ == 'int': | |
| 230 self._setDirection(direction / abs(direction)) | |
| 231 elif type(direction).__name__ == 'str': | |
| 232 if direction == "+": | |
| 233 self._setDirection(1) | |
| 234 elif direction == "-": | |
| 235 self._setDirection(-1) | |
| 236 elif direction == "1" or direction == "-1": | |
| 237 self._setDirection(int(direction)) | |
| 238 elif direction.lower() == "plus": | |
| 239 self._setDirection(1) | |
| 240 elif direction.lower() == "minus": | |
| 241 self._setDirection(-1) | |
| 242 else: | |
| 243 raise Exception("Cannot understand direction %s" % (direction)) | |
| 244 else: | |
| 245 raise Exception("Cannot understand direction %s" % (direction)) | |
| 246 | |
| 247 | |
| 248 def extendStart(self, size): | |
| 249 """ | |
| 250 Extend the interval by the 5' end | |
| 251 @param size: the size to be exended | |
| 252 @type size: int | |
| 253 """ | |
| 254 if self.getDirection() == 1: | |
| 255 self.setStart(max(0, self.getStart() - size)) | |
| 256 else: | |
| 257 self.setEnd(self.getEnd() + size) | |
| 258 self.bin = None | |
| 259 | |
| 260 | |
| 261 def extendEnd(self, size): | |
| 262 """ | |
| 263 Extend the interval by the 3' end | |
| 264 @param size: the size to be exended | |
| 265 @type size: int | |
| 266 """ | |
| 267 if self.getDirection() == 1: | |
| 268 self.setEnd(self.getEnd() + size) | |
| 269 else: | |
| 270 self.setStart(max(0, self.getStart() - size)) | |
| 271 self.bin = None | |
| 272 | |
| 273 | |
| 274 def restrictStart(self, size = 1): | |
| 275 """ | |
| 276 Restrict the interval by some nucleotides, start from its start position | |
| 277 Remove the exons | |
| 278 @param size: the size to be restricted to | |
| 279 @type size: int | |
| 280 """ | |
| 281 if self.getDirection() == 1: | |
| 282 self.setEnd(min(self.getEnd(), self.getStart() + size - 1)) | |
| 283 else: | |
| 284 self.setStart(max(self.getStart(), self.getEnd() - size + 1)) | |
| 285 self.bin = None | |
| 286 | |
| 287 | |
| 288 def restrictEnd(self, size = 1): | |
| 289 """ | |
| 290 Restrict the interval by some nucleotides, end from its end position | |
| 291 Remove the exons | |
| 292 @param size: the size to be restricted to | |
| 293 @type size: int | |
| 294 """ | |
| 295 if self.getDirection() == 1: | |
| 296 self.setStart(max(self.getStart(), self.getEnd() - size + 1)) | |
| 297 else: | |
| 298 self.setEnd(min(self.getEnd(), self.getStart() + size - 1)) | |
| 299 self.bin = None | |
| 300 | |
| 301 | |
| 302 | |
| 303 def setTagValue(self, name, value): | |
| 304 """ | |
| 305 Set a tag | |
| 306 @param name: name of the tag | |
| 307 @type name: string | |
| 308 @param value: value of the tag | |
| 309 @type value: int or string | |
| 310 """ | |
| 311 self.tags[name] = value | |
| 312 | |
| 313 | |
| 314 def getTagNames(self): | |
| 315 """ | |
| 316 Get all the names of the tags | |
| 317 """ | |
| 318 return self.tags.keys() | |
| 319 | |
| 320 | |
| 321 def getTagValue(self, tag): | |
| 322 """ | |
| 323 Get the value of a tag | |
| 324 @param tag: name of a tag | |
| 325 @type tag: string | |
| 326 """ | |
| 327 if tag not in self.tags: | |
| 328 return None | |
| 329 return self.tags[tag] | |
| 330 | |
| 331 | |
| 332 def getTagValues(self, tagSep = "; ", fieldSep = " ", surrounder = ""): | |
| 333 """ | |
| 334 Get the formatted tag values | |
| 335 @param tagSep: separator between tags | |
| 336 @type tagSep: string | |
| 337 @param fieldSep: separator between tag name and tag value | |
| 338 @type fieldSep: string | |
| 339 @param surrounder: string which optionally surround values | |
| 340 @type surrounder: string | |
| 341 """ | |
| 342 tags = [] | |
| 343 for name, value in self.tags.iteritems(): | |
| 344 if value == None: | |
| 345 continue | |
| 346 if isinstance(value, basestring): | |
| 347 tags.append("%s%s%s%s%s" % (name, fieldSep, surrounder, value.replace("'", "\\'"), surrounder)) | |
| 348 elif type(value) is int: | |
| 349 tags.append("%s%s%s%i%s" % (name, fieldSep, surrounder, value, surrounder)) | |
| 350 elif type(value) is float: | |
| 351 tags.append("%s%s%s%f%s" % (name, fieldSep, surrounder, value, surrounder)) | |
| 352 else: | |
| 353 raise Exception("Do not know how to print '" + value + "'.") | |
| 354 if self.getName() != None: | |
| 355 tags.append("%s%s%s%s%s" % ("Name", fieldSep, surrounder, self.getName(), surrounder)) | |
| 356 return tagSep.join(tags) | |
| 357 | |
| 358 | |
| 359 def setTagValues(self, tags, tagSep = "; ", fieldSep = " "): | |
| 360 """ | |
| 361 Set the tag values using given string | |
| 362 @param tags: the tags, concatenated | |
| 363 @type tags: string | |
| 364 @param tagSep: separator between tags | |
| 365 @type tagSep: string | |
| 366 @param fieldSep: separator between tag name and tag value | |
| 367 @type fieldSep: string | |
| 368 """ | |
| 369 if tags == "": | |
| 370 self.tags = {} | |
| 371 return | |
| 372 for splittedTag in tags.split(tagSep): | |
| 373 if fieldSep not in splittedTag: | |
| 374 raise Exception("Weird field '%s' in tags '%s'" % (splittedTag, tags)) | |
| 375 tag, value = splittedTag.split(fieldSep, 1) | |
| 376 if tag == "Name": | |
| 377 self.setName(value) | |
| 378 continue | |
| 379 try: | |
| 380 intValue = int(value) | |
| 381 self.tags[tag] = intValue | |
| 382 except ValueError: | |
| 383 try: | |
| 384 floatValue = float(value) | |
| 385 self.tags[tag] = floatValue | |
| 386 except ValueError: | |
| 387 self.tags[tag] = value | |
| 388 | |
| 389 | |
| 390 def deleteTag(self, tag): | |
| 391 """ | |
| 392 Remove a tag | |
| 393 @param tag: the tag to be removed | |
| 394 @type tag: string | |
| 395 """ | |
| 396 if tag in self.tags: | |
| 397 del self.tags[tag] | |
| 398 | |
| 399 | |
| 400 def setNbOccurrences(self, nbOccurrences): | |
| 401 """ | |
| 402 Set the number of occurrences of the interval | |
| 403 @param nbOccurrences: number of occurrences of the interval | |
| 404 @type nbOccurrences: int | |
| 405 """ | |
| 406 self.setTagValue("nbOccurrences", nbOccurrences) | |
| 407 | |
| 408 | |
| 409 def setOccurrence(self, occurrence): | |
| 410 """ | |
| 411 Set the occurrence of this interval | |
| 412 @param occurrence: an occurrence for this transcript | |
| 413 @type occurrence: int | |
| 414 """ | |
| 415 self.setTagValue("occurrence", occurrence) | |
| 416 | |
| 417 def __eq__(self, interval): | |
| 418 """ | |
| 419 Whether two intervals are equal (start and end at same position) | |
| 420 @param interval: object to be compared to | |
| 421 @type interval: class L{Interval<Interval>} | |
| 422 """ | |
| 423 if not interval: | |
| 424 return False | |
| 425 return self.getChromosome() == interval.getChromosome() and self.getStart() == interval.getStart() and self.getEnd() == interval.getEnd() and self.getDirection() == interval.getDirection() | |
| 426 | |
| 427 | |
| 428 def overlapWith(self, interval, nbNucleotides = 1): | |
| 429 """ | |
| 430 Whether two intervals overlap | |
| 431 @param interval: object to be compared to | |
| 432 @type interval: class L{Interval<Interval>} | |
| 433 @param nbNucleotides: minimum number of nucleotides to declare and overlap | |
| 434 @type nbNucleotides: int | |
| 435 """ | |
| 436 if self.getChromosome() != interval.getChromosome(): | |
| 437 return False | |
| 438 return (min(self.getEnd(), interval.getEnd()) - max(self.getStart(), interval.getStart()) + 1 >= nbNucleotides) | |
| 439 | |
| 440 def isIncludeIn(self, interval): | |
| 441 return interval.include(self) | |
| 442 | |
| 443 | |
| 444 def include(self, interval): | |
| 445 """ | |
| 446 Whether this interval includes the other one | |
| 447 @param interval: object to be compared to | |
| 448 @type interval: class L{Interval<Interval>} | |
| 449 """ | |
| 450 if self.getChromosome() != interval.getChromosome(): | |
| 451 return False | |
| 452 return ((self.getStart() <= interval.getStart()) and (self.getEnd() >= interval.getEnd())) | |
| 453 | |
| 454 | |
| 455 def getDifference(self, interval, sameStrand = False): | |
| 456 """ | |
| 457 Get the difference between this cluster and another one | |
| 458 @param interval: object to be compared to | |
| 459 @type interval: class L{Interval<Interval>} | |
| 460 @param sameStrand: do the comparison iff the intervals are on the same strand | |
| 461 @type sameStrand: boolean | |
| 462 @return: a (possibly empty) list of intervals | |
| 463 """ | |
| 464 newInterval = Interval() | |
| 465 newInterval.copy(self) | |
| 466 if self.getChromosome() != interval.getChromosome(): | |
| 467 return [newInterval] | |
| 468 if not self.overlapWith(interval): | |
| 469 return [newInterval] | |
| 470 if sameStrand and self.getDirection() != interval.getDirection(): | |
| 471 return [newInterval] | |
| 472 intervals = [] | |
| 473 if self.getStart() < interval.getStart(): | |
| 474 newInterval = Interval() | |
| 475 newInterval.copy(self) | |
| 476 newInterval.setEnd(min(self.getEnd(), interval.getStart() - 1)) | |
| 477 intervals.append(newInterval) | |
| 478 if self.getEnd() > interval.getEnd(): | |
| 479 newInterval = Interval() | |
| 480 newInterval.copy(self) | |
| 481 newInterval.setStart(max(self.getStart(), interval.getEnd() + 1)) | |
| 482 intervals.append(newInterval) | |
| 483 return intervals | |
| 484 | |
| 485 | |
| 486 def getIntersection(self, interval): | |
| 487 """ | |
| 488 Get the intersection between this interval and another one | |
| 489 @param interval: object to be compared to | |
| 490 @type interval: class L{Interval<Interval>} | |
| 491 @return: an other interval | |
| 492 """ | |
| 493 if not self.overlapWith(interval): | |
| 494 return None | |
| 495 newInterval = Interval() | |
| 496 newInterval.setChromosome(self.getChromosome()) | |
| 497 newInterval.setDirection(self.getDirection()) | |
| 498 newInterval.setName("%s_intersect_%s" % (self.getName(), interval.getName())) | |
| 499 newInterval.setStart(max(self.getStart(), interval.getStart())) | |
| 500 newInterval.setEnd(min(self.getEnd(), interval.getEnd())) | |
| 501 return newInterval | |
| 502 | |
| 503 | |
| 504 def getDistance(self, interval): | |
| 505 """ | |
| 506 Get the distance between two intervals (a non-negative value) | |
| 507 @param interval: another interval | |
| 508 @type interval: class L{Interval<Interval>} | |
| 509 """ | |
| 510 if self.overlapWith(interval): | |
| 511 return 0 | |
| 512 if self.getChromosome() != interval.getChromosome(): | |
| 513 raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval))) | |
| 514 return min(abs(self.getStart() - interval.getEnd()), abs(self.getEnd() - interval.getStart())) | |
| 515 | |
| 516 | |
| 517 def getRelativeDistance(self, interval): | |
| 518 """ | |
| 519 Get the distance between two intervals (negative if first interval is before) | |
| 520 @param interval: another interval | |
| 521 @type interval: class L{Interval<Interval>} | |
| 522 """ | |
| 523 if self.overlapWith(interval): | |
| 524 return 0 | |
| 525 if self.getChromosome() != interval.getChromosome(): | |
| 526 raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval))) | |
| 527 if self.getEnd() < interval.getStart(): | |
| 528 distance = interval.getStart() - self.getEnd() | |
| 529 else: | |
| 530 distance = interval.getEnd() - self.getStart() | |
| 531 distance *= self.getDirection() | |
| 532 return distance | |
| 533 | |
| 534 | |
| 535 def merge(self, interval, normalization = False): | |
| 536 """ | |
| 537 Merge two intervals | |
| 538 @param interval: another interval | |
| 539 @type interval: class L{Interval<Interval>} | |
| 540 @param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements | |
| 541 @type normalization: boolean | |
| 542 """ | |
| 543 if self.getChromosome() != interval.getChromosome(): | |
| 544 raise Exception("Cannot merge '%s' and '%s' for they are on different chromosomes." % (str(self), str(interval))) | |
| 545 direction = None | |
| 546 if self.getStart() == self.getEnd(): | |
| 547 direction = interval.getDirection() | |
| 548 elif interval.getStart() == interval.getEnd(): | |
| 549 direction = self.getDirection() | |
| 550 elif self.getDirection() != interval.getDirection(): | |
| 551 raise Exception("Cannot merge '%s' and '%s' for they are on different strands." % (str(self), str(interval))) | |
| 552 self.setStart(min(self.getStart(), interval.getStart())) | |
| 553 self.setEnd(max(self.getEnd(), interval.getEnd())) | |
| 554 if direction != None: | |
| 555 self.setDirection(direction) | |
| 556 nbElements = 0.0 | |
| 557 for element in (self, interval): | |
| 558 for tagName in ("nbElements", "nbOccurrences"): | |
| 559 if tagName not in element.getTagNames(): | |
| 560 element.setTagValue(tagName, 1) | |
| 561 nbElements += float(element.getTagValue("nbElements")) / float(element.getTagValue("nbOccurrences")) if normalization else float(element.getTagValue("nbElements")) | |
| 562 self.setTagValue("nbElements", nbElements) | |
| 563 self.bin = None | |
| 564 for tagName in ("identity", "nbOccurrences", "occurrence", "nbMismatches", "nbGaps", "rank", "evalue", "bestRegion"): | |
| 565 if tagName in self.getTagNames(): | |
| 566 del self.tags[tagName] | |
| 567 | |
| 568 | |
| 569 def getBin(self): | |
| 570 """ | |
| 571 Get the bin of the interval | |
| 572 Computed on the fly | |
| 573 """ | |
| 574 if self.bin == None: | |
| 575 self.bin = getBin(self.getStart(), self.getEnd()) | |
| 576 return self.bin | |
| 577 | |
| 578 | |
| 579 def getBins(self): | |
| 580 """ | |
| 581 Get all the bin this interval could fall into | |
| 582 """ | |
| 583 return getOverlappingBins(self.getStart(), self.getEnd()) | |
| 584 | |
| 585 | |
| 586 def getSqlVariables(cls): | |
| 587 """ | |
| 588 Get the properties of the object that should be saved in a database | |
| 589 """ | |
| 590 variables = ["name", "chromosome", "start", "end", "direction", "tags", "bin"] | |
| 591 return variables | |
| 592 getSqlVariables = classmethod(getSqlVariables) | |
| 593 | |
| 594 | |
| 595 def setSqlValues(self, array): | |
| 596 """ | |
| 597 Set the values of the properties of this object as given by a results line of a SQL query | |
| 598 """ | |
| 599 self.id = array[0] | |
| 600 self.name = array[1].strip("'") | |
| 601 self.setChromosome(array[2].strip("'")) | |
| 602 self.setStart(array[3]) | |
| 603 self.setEnd(array[4]) | |
| 604 self.setDirection(array[5]) | |
| 605 self.setTagValues(array[6].strip("'"), ";", "=") | |
| 606 self.bin = array[7] | |
| 607 | |
| 608 | |
| 609 def getSqlValues(self): | |
| 610 """ | |
| 611 Get the values of the properties that should be saved in a database | |
| 612 """ | |
| 613 values = dict() | |
| 614 values["name"] = self.name | |
| 615 values["chromosome"] = self.getChromosome() | |
| 616 values["start"] = self.getStart() | |
| 617 values["end"] = self.getEnd() | |
| 618 values["direction"] = self.getDirection() | |
| 619 values["tags"] = self.getTagValues(";", "=") | |
| 620 values["bin"] = self.getBin() | |
| 621 return values | |
| 622 | |
| 623 | |
| 624 def getSqlTypes(cls): | |
| 625 """ | |
| 626 Get the values of the properties that should be saved in a database | |
| 627 """ | |
| 628 types = dict() | |
| 629 types["name"] = "varchar" | |
| 630 types["chromosome"] = "varchar" | |
| 631 types["start"] = "int" | |
| 632 types["end"] = "int" | |
| 633 types["direction"] = "tinyint" | |
| 634 types["tags"] = "varchar" | |
| 635 types["bin"] = "int" | |
| 636 return types | |
| 637 getSqlTypes = classmethod(getSqlTypes) | |
| 638 | |
| 639 | |
| 640 def getSqlSizes(cls): | |
| 641 """ | |
| 642 Get the sizes of the properties that should be saved in a database | |
| 643 """ | |
| 644 sizes = dict() | |
| 645 sizes["name"] = 255 | |
| 646 sizes["chromosome"] = 255 | |
| 647 sizes["start"] = 11 | |
| 648 sizes["end"] = 11 | |
| 649 sizes["direction"] = 4 | |
| 650 sizes["tags"] = 1023 | |
| 651 sizes["bin"] = 11 | |
| 652 return sizes | |
| 653 getSqlSizes = classmethod(getSqlSizes) | |
| 654 | |
| 655 | |
| 656 def printCoordinates(self): | |
| 657 """ | |
| 658 Print start and end positions (depending on the direction of the interval) | |
| 659 """ | |
| 660 if self.getDirection() == 1: | |
| 661 return "%d-%d" % (self.getStart(), self.getEnd()) | |
| 662 else: | |
| 663 return "%d-%d" % (self.getEnd(), self.getStart()) | |
| 664 | |
| 665 | |
| 666 def extractSequence(self, parser): | |
| 667 """ | |
| 668 Get the sequence corresponding to this interval | |
| 669 @param parser: a parser to a FASTA file | |
| 670 @type parser: class L{SequenceListParser<SequenceListParser>} | |
| 671 @return : a instance of L{Sequence<Sequence>} | |
| 672 """ | |
| 673 return parser.getSubSequence(self.getChromosome(), self.getStart(), self.getEnd(), self.getDirection(), self.name) | |
| 674 | |
| 675 | |
| 676 def extractWigData(self, parser): | |
| 677 """ | |
| 678 Get the data retrieved from a wig file | |
| 679 @param parser: a parser class to a WIG file | |
| 680 @type parser: class L{WigParser<WigParser>} | |
| 681 """ | |
| 682 data = parser.getRange(self.getChromosome(), self.getStart(), self.getEnd()) | |
| 683 if self.getDirection() == -1: | |
| 684 if parser.strands: | |
| 685 newData = {} | |
| 686 for strand in data: | |
| 687 data[strand].reverse() | |
| 688 newData[-strand] = data[strand] | |
| 689 data = newData | |
| 690 else: | |
| 691 data.reverse() | |
| 692 return data | |
| 693 | |
| 694 | |
| 695 def __str__(self): | |
| 696 """ | |
| 697 Output a simple representation of this interval | |
| 698 """ | |
| 699 direction = "+" | |
| 700 if self.getDirection() == -1: | |
| 701 direction = "-" | |
| 702 string = "%s:%d-%d (%s)" % (self.getChromosome(), self.getStart(), self.getEnd(), direction) | |
| 703 if self.name != "": | |
| 704 string = "(%s) %s" % (self.name, string) | |
| 705 return string | |
| 706 |
