| 36 | 1 # | 
|  | 2 # Copyright INRA-URGI 2009-2010 | 
|  | 3 # | 
|  | 4 # This software is governed by the CeCILL license under French law and | 
|  | 5 # abiding by the rules of distribution of free software. You can use, | 
|  | 6 # modify and/ or redistribute the software under the terms of the CeCILL | 
|  | 7 # license as circulated by CEA, CNRS and INRIA at the following URL | 
|  | 8 # "http://www.cecill.info". | 
|  | 9 # | 
|  | 10 # As a counterpart to the access to the source code and rights to copy, | 
|  | 11 # modify and redistribute granted by the license, users are provided only | 
|  | 12 # with a limited warranty and the software's author, the holder of the | 
|  | 13 # economic rights, and the successive licensors have only limited | 
|  | 14 # liability. | 
|  | 15 # | 
|  | 16 # In this respect, the user's attention is drawn to the risks associated | 
|  | 17 # with loading, using, modifying and/or developing or reproducing the | 
|  | 18 # software by the user in light of its specific status of free software, | 
|  | 19 # that may mean that it is complicated to manipulate, and that also | 
|  | 20 # therefore means that it is reserved for developers and experienced | 
|  | 21 # professionals having in-depth computer knowledge. Users are therefore | 
|  | 22 # encouraged to load and test the software's suitability as regards their | 
|  | 23 # requirements in conditions enabling the security of their systems and/or | 
|  | 24 # data to be ensured and, more generally, to use and operate it in the | 
|  | 25 # same conditions as regards security. | 
|  | 26 # | 
|  | 27 # The fact that you are presently reading this means that you have had | 
|  | 28 # knowledge of the CeCILL license and that you accept its terms. | 
|  | 29 # | 
|  | 30 | 
|  | 31 from SMART.Java.Python.structure.Bins import * | 
|  | 32 from commons.core.coord.Range import Range | 
|  | 33 | 
|  | 34 class Interval(Range): | 
|  | 35     """ | 
|  | 36     Store a genomic interval | 
|  | 37     @ivar name:          name of the interval [optional] | 
|  | 38     @type name:          string | 
|  | 39     @ivar id:            id of the interval [optional] | 
|  | 40     @type id:            int | 
|  | 41     @ivar bin:           bin in which the interval should be if stored in a database [computed] | 
|  | 42     @type bin:           int | 
|  | 43     @ival tags:          information about the transcript [optional] | 
|  | 44     @type tags:          dict | 
|  | 45     @ivar verbosity:     verbosity | 
|  | 46     @type verbosity:     int [default: 0] | 
|  | 47     """ | 
|  | 48 | 
|  | 49     def __init__(self, interval = None, verbosity = 0): | 
|  | 50         """ | 
|  | 51         Constructor | 
|  | 52         @param interval:    interval to be copied | 
|  | 53         @type    interval:    class L{Interval<Interval>} | 
|  | 54         @param verbosity: verbosity | 
|  | 55         @type    verbosity: int | 
|  | 56         """ | 
|  | 57         Range.__init__(self) | 
|  | 58         self.name          = None | 
|  | 59         self.id            = None | 
|  | 60         self.bin           = None | 
|  | 61         self.verbosity     = verbosity | 
|  | 62         self.tags          = {} | 
|  | 63         if interval != None: | 
|  | 64             self.copy(interval) | 
|  | 65 | 
|  | 66     #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!# | 
|  | 67     #In case strand = "+", start < end; strand = "-", start > end | 
|  | 68     def getStart(self): | 
|  | 69         if self.start == -1: | 
|  | 70             return -1 | 
|  | 71         if self.end == -1: | 
|  | 72             return self.start | 
|  | 73         return self.getMin() | 
|  | 74 | 
|  | 75 | 
|  | 76     def getEnd(self): | 
|  | 77         if self.end == -1: | 
|  | 78             return -1 | 
|  | 79         if self.start == -1: | 
|  | 80             return self.end | 
|  | 81         return self.getMax() | 
|  | 82 | 
|  | 83 | 
|  | 84     def getChromosome(self): | 
|  | 85         return self.getSeqname() | 
|  | 86 | 
|  | 87 | 
|  | 88     def getDirection(self): | 
|  | 89         return 1 if self.getStrand() == "+" else -1 | 
|  | 90 | 
|  | 91 | 
|  | 92     def getName(self): | 
|  | 93         return self.name | 
|  | 94 | 
|  | 95 | 
|  | 96     def isSet(self): | 
|  | 97         """ | 
|  | 98         Check if the interval is set | 
|  | 99         """ | 
|  | 100         return self.getStart() == None and self.getEnd() == None | 
|  | 101 | 
|  | 102 | 
|  | 103     def copy(self, interval): | 
|  | 104         """ | 
|  | 105         Copy method | 
|  | 106         @param interval: interval to be copied | 
|  | 107         @type    interval: class L{Interval<Interval>} | 
|  | 108         """ | 
|  | 109         self.setStart(interval.getStart()) | 
|  | 110         self.setEnd(interval.getEnd()) | 
|  | 111         self.setChromosome(interval.getChromosome()) | 
|  | 112         self.setDirection(interval.getDirection()) | 
|  | 113         self.name      = interval.name | 
|  | 114         self.id        = interval.id | 
|  | 115         self.bin       = interval.bin | 
|  | 116         self.tags      = {} | 
|  | 117         for tag in interval.tags: | 
|  | 118             self.tags[tag] = interval.tags[tag] | 
|  | 119         self.verbosity     = interval.verbosity | 
|  | 120 | 
|  | 121 | 
|  | 122     def setName(self, name): | 
|  | 123         """ | 
|  | 124         Set the name | 
|  | 125         @param name: name of the interval | 
|  | 126         @type    name: string | 
|  | 127         """ | 
|  | 128         if len(name) > 100: | 
|  | 129             name = name[:100] | 
|  | 130         self.name = name | 
|  | 131 | 
|  | 132 | 
|  | 133     def setChromosome(self, chromosome=""): | 
|  | 134         """ | 
|  | 135         Set the chromosome | 
|  | 136         @param chromosome: chromosome on which the interval is | 
|  | 137         @type    chromosome: string | 
|  | 138         """ | 
|  | 139         if not chromosome: | 
|  | 140             self.seqname = None | 
|  | 141         else: | 
| 46 | 142             self.seqname = chromosome.replace("|", "_") | 
| 36 | 143 | 
|  | 144 | 
|  | 145     def setStart(self, start): | 
|  | 146         """ | 
|  | 147         Set the start point | 
|  | 148         Possibly reset bin | 
|  | 149         @param start: start point of the interval | 
|  | 150         @type    start: int | 
|  | 151         """ | 
|  | 152         self.bin  = None | 
|  | 153         direction = self.getDirection() | 
|  | 154         if self.start == -1: | 
|  | 155             self.start = start | 
|  | 156         elif self.end == -1: | 
|  | 157             self.end = start | 
|  | 158         else: | 
|  | 159             if direction == 1: | 
|  | 160                 self.start = start | 
|  | 161             else: | 
|  | 162                 self.end = start | 
|  | 163         if direction == 1: | 
|  | 164             self.start, self.end = min(self.start, self.end), max(self.start, self.end) | 
|  | 165         else: | 
|  | 166             self.start, self.end = max(self.start, self.end), min(self.start, self.end) | 
|  | 167 | 
|  | 168 | 
|  | 169     def setEnd(self, end): | 
|  | 170         """ | 
|  | 171         Set the end point | 
|  | 172         Possibly reset bin | 
|  | 173         @param end: end point of the interval of the interval | 
|  | 174         @type    end: int | 
|  | 175         """ | 
|  | 176         self.bin  = None | 
|  | 177         direction = self.getDirection() | 
|  | 178         if self.end == -1: | 
|  | 179             self.end = end | 
|  | 180         elif self.start == -1: | 
|  | 181             self.start = end | 
|  | 182         else: | 
|  | 183             if direction == 1: | 
|  | 184                 self.end = end | 
|  | 185             else: | 
|  | 186                 self.start = end | 
|  | 187         if direction == 1: | 
|  | 188             self.start, self.end = min(self.start, self.end), max(self.start, self.end) | 
|  | 189         else: | 
|  | 190             self.start, self.end = max(self.start, self.end), min(self.start, self.end) | 
|  | 191 | 
|  | 192 | 
|  | 193     def setSize(self, size): | 
|  | 194         """ | 
|  | 195         Possibly modify the end point | 
|  | 196         @param size: size of the transcript | 
|  | 197         @type    size: int | 
|  | 198         """ | 
|  | 199         if self.end == None and self.start != None: | 
|  | 200             self.setEnd(self.start + self.getSize() - 1) | 
|  | 201         elif self.start == None and self.end != None: | 
|  | 202             self.setStart(self.end - self.getSize() + 1) | 
|  | 203 | 
|  | 204 | 
|  | 205     def getSize(self): | 
|  | 206         """ | 
|  | 207         Get the size | 
|  | 208         """ | 
|  | 209         return self.getEnd() - self.getStart() + 1 | 
|  | 210 | 
|  | 211 | 
|  | 212     def _setDirection(self, direction): | 
|  | 213         """ | 
|  | 214         Set the direction of the interval (connection to Range) | 
|  | 215         @param direction: direction of the transcript (+ / -) | 
|  | 216         @type  direction: int (1 or -1) | 
|  | 217         """ | 
|  | 218         if direction * self.getDirection() < 0: | 
|  | 219             self.reverse() | 
|  | 220 | 
|  | 221 | 
|  | 222     def setDirection(self, direction): | 
|  | 223         """ | 
|  | 224         Set the direction of the interval | 
|  | 225         Possibly parse different formats | 
|  | 226         @param direction: direction of the transcript (+ / -) | 
|  | 227         @type    direction: int or string | 
|  | 228         """ | 
|  | 229         if type(direction).__name__ == 'int': | 
|  | 230             self._setDirection(direction / abs(direction)) | 
|  | 231         elif type(direction).__name__ == 'str': | 
|  | 232             if direction == "+": | 
|  | 233                 self._setDirection(1) | 
|  | 234             elif direction == "-": | 
|  | 235                 self._setDirection(-1) | 
|  | 236             elif direction == "1" or direction == "-1": | 
|  | 237                 self._setDirection(int(direction)) | 
|  | 238             elif direction.lower() == "plus": | 
|  | 239                 self._setDirection(1) | 
|  | 240             elif direction.lower() == "minus": | 
|  | 241                 self._setDirection(-1) | 
|  | 242             else: | 
|  | 243                 raise Exception("Cannot understand direction %s" % (direction)) | 
|  | 244         else: | 
|  | 245             raise Exception("Cannot understand direction %s" % (direction)) | 
|  | 246 | 
|  | 247 | 
|  | 248     def extendStart(self, size): | 
|  | 249         """ | 
|  | 250         Extend the interval by the 5' end | 
|  | 251         @param size: the size to be exended | 
|  | 252         @type    size: int | 
|  | 253         """ | 
|  | 254         if self.getDirection() == 1: | 
|  | 255             self.setStart(max(0, self.getStart() - size)) | 
|  | 256         else: | 
|  | 257             self.setEnd(self.getEnd() + size) | 
|  | 258         self.bin  = None | 
|  | 259 | 
|  | 260 | 
|  | 261     def extendEnd(self, size): | 
|  | 262         """ | 
|  | 263         Extend the interval by the 3' end | 
|  | 264         @param size: the size to be exended | 
|  | 265         @type    size: int | 
|  | 266         """ | 
|  | 267         if self.getDirection() == 1: | 
|  | 268             self.setEnd(self.getEnd() + size) | 
|  | 269         else: | 
|  | 270             self.setStart(max(0, self.getStart() - size)) | 
|  | 271         self.bin  = None | 
|  | 272 | 
|  | 273 | 
|  | 274     def restrictStart(self, size = 1): | 
|  | 275         """ | 
|  | 276         Restrict the interval by some nucleotides, start from its start position | 
|  | 277         Remove the exons | 
|  | 278         @param size: the size to be restricted to | 
|  | 279         @type    size: int | 
|  | 280         """ | 
|  | 281         if self.getDirection() == 1: | 
|  | 282             self.setEnd(min(self.getEnd(), self.getStart() + size - 1)) | 
|  | 283         else: | 
|  | 284             self.setStart(max(self.getStart(), self.getEnd() - size + 1)) | 
|  | 285         self.bin  = None | 
|  | 286 | 
|  | 287 | 
|  | 288     def restrictEnd(self, size = 1): | 
|  | 289         """ | 
|  | 290         Restrict the interval by some nucleotides, end from its end position | 
|  | 291         Remove the exons | 
|  | 292         @param size: the size to be restricted to | 
|  | 293         @type    size: int | 
|  | 294         """ | 
|  | 295         if self.getDirection() == 1: | 
|  | 296             self.setStart(max(self.getStart(), self.getEnd() - size + 1)) | 
|  | 297         else: | 
|  | 298             self.setEnd(min(self.getEnd(), self.getStart() + size - 1)) | 
|  | 299         self.bin  = None | 
|  | 300 | 
|  | 301 | 
|  | 302 | 
|  | 303     def setTagValue(self, name, value): | 
|  | 304         """ | 
|  | 305         Set a tag | 
|  | 306         @param name:    name of the tag | 
|  | 307         @type    name:    string | 
|  | 308         @param value: value of the tag | 
|  | 309         @type    value: int or string | 
|  | 310         """ | 
|  | 311         self.tags[name] = value | 
|  | 312 | 
|  | 313 | 
|  | 314     def getTagNames(self): | 
|  | 315         """ | 
|  | 316         Get all the names of the tags | 
|  | 317         """ | 
|  | 318         return self.tags.keys() | 
|  | 319 | 
|  | 320 | 
|  | 321     def getTagValue(self, tag): | 
|  | 322         """ | 
|  | 323         Get the value of a tag | 
|  | 324         @param tag: name of a tag | 
|  | 325         @type    tag: string | 
|  | 326         """ | 
|  | 327         if tag not in self.tags: | 
|  | 328             return None | 
|  | 329         return self.tags[tag] | 
|  | 330 | 
|  | 331 | 
|  | 332     def getTagValues(self, tagSep = "; ", fieldSep = " ", surrounder = ""): | 
|  | 333         """ | 
|  | 334         Get the formatted tag values | 
|  | 335         @param tagSep:     separator between tags | 
|  | 336         @type  tagSep:     string | 
|  | 337         @param fieldSep:   separator between tag name and tag value | 
|  | 338         @type  fieldSep:   string | 
|  | 339         @param surrounder: string which optionally surround values | 
|  | 340         @type  surrounder: string | 
|  | 341         """ | 
|  | 342         tags = [] | 
|  | 343         for name, value in self.tags.iteritems(): | 
|  | 344             if value == None: | 
|  | 345                 continue | 
|  | 346             if isinstance(value, basestring): | 
|  | 347                 tags.append("%s%s%s%s%s" % (name, fieldSep, surrounder, value.replace("'", "\\'"), surrounder)) | 
|  | 348             elif type(value) is int: | 
|  | 349                 tags.append("%s%s%s%i%s" % (name, fieldSep, surrounder, value, surrounder)) | 
|  | 350             elif type(value) is float: | 
|  | 351                 tags.append("%s%s%s%f%s" % (name, fieldSep, surrounder, value, surrounder)) | 
|  | 352             else: | 
|  | 353                 raise Exception("Do not know how to print '" + value + "'.") | 
|  | 354         if self.getName() != None: | 
|  | 355             tags.append("%s%s%s%s%s" % ("Name", fieldSep, surrounder, self.getName(), surrounder)) | 
|  | 356         return tagSep.join(tags) | 
|  | 357 | 
|  | 358 | 
|  | 359     def setTagValues(self, tags, tagSep = "; ", fieldSep = " "): | 
|  | 360         """ | 
|  | 361         Set the tag values using given string | 
|  | 362         @param tags:     the tags, concatenated | 
|  | 363         @type  tags:     string | 
|  | 364         @param tagSep:   separator between tags | 
|  | 365         @type  tagSep:   string | 
|  | 366         @param fieldSep: separator between tag name and tag value | 
|  | 367         @type  fieldSep: string | 
|  | 368         """ | 
|  | 369         if tags == "": | 
|  | 370             self.tags = {} | 
|  | 371             return | 
|  | 372         for splittedTag in tags.split(tagSep): | 
|  | 373             if fieldSep not in splittedTag: | 
|  | 374                 raise Exception("Weird field '%s' in tags '%s'" % (splittedTag, tags)) | 
|  | 375             tag, value = splittedTag.split(fieldSep, 1) | 
|  | 376             if tag == "Name": | 
|  | 377                 self.setName(value) | 
|  | 378                 continue | 
|  | 379             try: | 
|  | 380                 intValue       = int(value) | 
|  | 381                 self.tags[tag] = intValue | 
|  | 382             except ValueError: | 
|  | 383                 try: | 
|  | 384                     floatValue     = float(value) | 
|  | 385                     self.tags[tag] = floatValue | 
|  | 386                 except ValueError: | 
|  | 387                     self.tags[tag] = value | 
|  | 388 | 
|  | 389 | 
|  | 390     def deleteTag(self, tag): | 
|  | 391         """ | 
|  | 392         Remove a tag | 
|  | 393         @param tag: the tag to be removed | 
|  | 394         @type    tag: string | 
|  | 395         """ | 
|  | 396         if tag in self.tags: | 
|  | 397             del self.tags[tag] | 
|  | 398 | 
|  | 399 | 
|  | 400     def setNbOccurrences(self, nbOccurrences): | 
|  | 401         """ | 
|  | 402         Set the number of occurrences of the interval | 
|  | 403         @param nbOccurrences: number of occurrences of the interval | 
|  | 404         @type    nbOccurrences: int | 
|  | 405         """ | 
|  | 406         self.setTagValue("nbOccurrences", nbOccurrences) | 
|  | 407 | 
|  | 408 | 
|  | 409     def setOccurrence(self, occurrence): | 
|  | 410         """ | 
|  | 411         Set the occurrence of this interval | 
|  | 412         @param occurrence: an occurrence for this transcript | 
|  | 413         @type    occurrence: int | 
|  | 414         """ | 
|  | 415         self.setTagValue("occurrence", occurrence) | 
|  | 416 | 
|  | 417     def __eq__(self, interval): | 
|  | 418         """ | 
|  | 419         Whether two intervals are equal (start and end at same position) | 
|  | 420         @param interval: object to be compared to | 
|  | 421         @type    interval: class L{Interval<Interval>} | 
|  | 422         """ | 
|  | 423         if not interval: | 
|  | 424             return False | 
|  | 425         return self.getChromosome() == interval.getChromosome() and self.getStart() == interval.getStart() and self.getEnd() == interval.getEnd() and self.getDirection() == interval.getDirection() | 
|  | 426 | 
|  | 427 | 
|  | 428     def overlapWith(self, interval, nbNucleotides = 1): | 
|  | 429         """ | 
|  | 430         Whether two intervals overlap | 
|  | 431         @param interval:        object to be compared to | 
|  | 432         @type    interval:        class L{Interval<Interval>} | 
|  | 433         @param nbNucleotides: minimum number of nucleotides to declare and overlap | 
|  | 434         @type    nbNucleotides: int | 
|  | 435         """ | 
|  | 436         if self.getChromosome() != interval.getChromosome(): | 
|  | 437             return False | 
|  | 438         return (min(self.getEnd(), interval.getEnd()) - max(self.getStart(), interval.getStart()) + 1 >= nbNucleotides) | 
|  | 439 | 
|  | 440     def isIncludeIn(self, interval): | 
|  | 441         return interval.include(self) | 
|  | 442 | 
|  | 443 | 
|  | 444     def include(self, interval): | 
|  | 445         """ | 
|  | 446         Whether this interval includes the other one | 
|  | 447         @param interval:      object to be compared to | 
|  | 448         @type  interval:      class L{Interval<Interval>} | 
|  | 449         """ | 
|  | 450         if self.getChromosome() != interval.getChromosome(): | 
|  | 451             return False | 
|  | 452         return ((self.getStart() <= interval.getStart()) and (self.getEnd() >= interval.getEnd())) | 
|  | 453 | 
|  | 454 | 
|  | 455     def getDifference(self, interval, sameStrand = False): | 
|  | 456         """ | 
|  | 457         Get the difference between this cluster and another one | 
|  | 458         @param interval:   object to be compared to | 
|  | 459         @type  interval:   class L{Interval<Interval>} | 
|  | 460         @param sameStrand: do the comparison iff the intervals are on the same strand | 
|  | 461         @type  sameStrand: boolean | 
|  | 462         @return:           a (possibly empty) list of intervals | 
|  | 463         """ | 
|  | 464         newInterval = Interval() | 
|  | 465         newInterval.copy(self) | 
|  | 466         if self.getChromosome() != interval.getChromosome(): | 
|  | 467             return [newInterval] | 
|  | 468         if not self.overlapWith(interval): | 
|  | 469             return [newInterval] | 
|  | 470         if sameStrand and self.getDirection() != interval.getDirection(): | 
|  | 471             return [newInterval] | 
|  | 472         intervals = [] | 
|  | 473         if self.getStart() < interval.getStart(): | 
|  | 474             newInterval = Interval() | 
|  | 475             newInterval.copy(self) | 
|  | 476             newInterval.setEnd(min(self.getEnd(), interval.getStart() - 1)) | 
|  | 477             intervals.append(newInterval) | 
|  | 478         if self.getEnd() > interval.getEnd(): | 
|  | 479             newInterval = Interval() | 
|  | 480             newInterval.copy(self) | 
|  | 481             newInterval.setStart(max(self.getStart(), interval.getEnd() + 1)) | 
|  | 482             intervals.append(newInterval) | 
|  | 483         return intervals | 
|  | 484 | 
|  | 485 | 
|  | 486     def getIntersection(self, interval): | 
|  | 487         """ | 
|  | 488         Get the intersection between this interval and another one | 
|  | 489         @param interval: object to be compared to | 
|  | 490         @type  interval: class L{Interval<Interval>} | 
|  | 491         @return:         an other interval | 
|  | 492         """ | 
|  | 493         if not self.overlapWith(interval): | 
|  | 494             return None | 
|  | 495         newInterval = Interval() | 
|  | 496         newInterval.setChromosome(self.getChromosome()) | 
|  | 497         newInterval.setDirection(self.getDirection()) | 
|  | 498         newInterval.setName("%s_intersect_%s" % (self.getName(), interval.getName())) | 
|  | 499         newInterval.setStart(max(self.getStart(), interval.getStart())) | 
|  | 500         newInterval.setEnd(min(self.getEnd(), interval.getEnd())) | 
|  | 501         return newInterval | 
|  | 502 | 
|  | 503 | 
|  | 504     def getDistance(self, interval): | 
|  | 505         """ | 
|  | 506         Get the distance between two intervals (a non-negative value) | 
|  | 507         @param interval: another interval | 
|  | 508         @type    interval: class L{Interval<Interval>} | 
|  | 509         """ | 
|  | 510         if self.overlapWith(interval): | 
|  | 511             return 0 | 
|  | 512         if self.getChromosome() != interval.getChromosome(): | 
|  | 513             raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval))) | 
|  | 514         return min(abs(self.getStart() - interval.getEnd()), abs(self.getEnd() - interval.getStart())) | 
|  | 515 | 
|  | 516 | 
|  | 517     def getRelativeDistance(self, interval): | 
|  | 518         """ | 
|  | 519         Get the distance between two intervals (negative if first interval is before) | 
|  | 520         @param interval: another interval | 
|  | 521         @type    interval: class L{Interval<Interval>} | 
|  | 522         """ | 
|  | 523         if self.overlapWith(interval): | 
|  | 524             return 0 | 
|  | 525         if self.getChromosome() != interval.getChromosome(): | 
|  | 526             raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval))) | 
|  | 527         if self.getEnd() < interval.getStart(): | 
|  | 528             distance = interval.getStart() - self.getEnd() | 
|  | 529         else: | 
|  | 530             distance = interval.getEnd() - self.getStart() | 
|  | 531         distance *= self.getDirection() | 
|  | 532         return distance | 
|  | 533 | 
|  | 534 | 
|  | 535     def merge(self, interval, normalization = False): | 
|  | 536         """ | 
|  | 537         Merge two intervals | 
|  | 538         @param interval:        another interval | 
|  | 539         @type    interval:        class L{Interval<Interval>} | 
|  | 540         @param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements | 
|  | 541         @type    normalization: boolean | 
|  | 542         """ | 
|  | 543         if self.getChromosome() != interval.getChromosome(): | 
|  | 544             raise Exception("Cannot merge '%s' and '%s' for they are on different chromosomes." % (str(self), str(interval))) | 
|  | 545         direction = None | 
|  | 546         if self.getStart() == self.getEnd(): | 
|  | 547             direction = interval.getDirection() | 
|  | 548         elif interval.getStart() == interval.getEnd(): | 
|  | 549             direction = self.getDirection() | 
|  | 550         elif self.getDirection() != interval.getDirection(): | 
|  | 551             raise Exception("Cannot merge '%s' and '%s' for they are on different strands." % (str(self), str(interval))) | 
|  | 552         self.setStart(min(self.getStart(), interval.getStart())) | 
|  | 553         self.setEnd(max(self.getEnd(), interval.getEnd())) | 
|  | 554         if direction != None: | 
|  | 555             self.setDirection(direction) | 
|  | 556         nbElements = 0.0 | 
|  | 557         for element in (self, interval): | 
|  | 558             for tagName in ("nbElements", "nbOccurrences"): | 
|  | 559                 if tagName not in element.getTagNames(): | 
|  | 560                     element.setTagValue(tagName, 1) | 
|  | 561             nbElements += float(element.getTagValue("nbElements")) / float(element.getTagValue("nbOccurrences")) if normalization else float(element.getTagValue("nbElements")) | 
|  | 562         self.setTagValue("nbElements", nbElements) | 
|  | 563         self.bin = None | 
|  | 564         for tagName in ("identity", "nbOccurrences", "occurrence", "nbMismatches", "nbGaps", "rank", "evalue", "bestRegion"): | 
|  | 565             if tagName in self.getTagNames(): | 
|  | 566                 del self.tags[tagName] | 
|  | 567 | 
|  | 568 | 
|  | 569     def getBin(self): | 
|  | 570         """ | 
|  | 571         Get the bin of the interval | 
|  | 572         Computed on the fly | 
|  | 573         """ | 
|  | 574         if self.bin == None: | 
|  | 575             self.bin = getBin(self.getStart(), self.getEnd()) | 
|  | 576         return self.bin | 
|  | 577 | 
|  | 578 | 
|  | 579     def getBins(self): | 
|  | 580         """ | 
|  | 581         Get all the bin this interval could fall into | 
|  | 582         """ | 
|  | 583         return getOverlappingBins(self.getStart(), self.getEnd()) | 
|  | 584 | 
|  | 585 | 
|  | 586     def getSqlVariables(cls): | 
|  | 587         """ | 
|  | 588         Get the properties of the object that should be saved in a database | 
|  | 589         """ | 
|  | 590         variables = ["name", "chromosome", "start", "end", "direction", "tags", "bin"] | 
|  | 591         return variables | 
|  | 592     getSqlVariables = classmethod(getSqlVariables) | 
|  | 593 | 
|  | 594 | 
|  | 595     def setSqlValues(self, array): | 
|  | 596         """ | 
|  | 597         Set the values of the properties of this object as given by a results line of a SQL query | 
|  | 598         """ | 
|  | 599         self.id         = array[0] | 
|  | 600         self.name       = array[1].strip("'") | 
|  | 601         self.setChromosome(array[2].strip("'")) | 
|  | 602         self.setStart(array[3]) | 
|  | 603         self.setEnd(array[4]) | 
|  | 604         self.setDirection(array[5]) | 
|  | 605         self.setTagValues(array[6].strip("'"), ";", "=") | 
|  | 606         self.bin        = array[7] | 
|  | 607 | 
|  | 608 | 
|  | 609     def getSqlValues(self): | 
|  | 610         """ | 
|  | 611         Get the values of the properties that should be saved in a database | 
|  | 612         """ | 
|  | 613         values = dict() | 
|  | 614         values["name"]       = self.name | 
|  | 615         values["chromosome"] = self.getChromosome() | 
|  | 616         values["start"]      = self.getStart() | 
|  | 617         values["end"]        = self.getEnd() | 
|  | 618         values["direction"]  = self.getDirection() | 
|  | 619         values["tags"]       = self.getTagValues(";", "=") | 
|  | 620         values["bin"]        = self.getBin() | 
|  | 621         return values | 
|  | 622 | 
|  | 623 | 
|  | 624     def getSqlTypes(cls): | 
|  | 625         """ | 
|  | 626         Get the values of the properties that should be saved in a database | 
|  | 627         """ | 
|  | 628         types = dict() | 
|  | 629         types["name"]       = "varchar" | 
|  | 630         types["chromosome"] = "varchar" | 
|  | 631         types["start"]      = "int" | 
|  | 632         types["end"]        = "int" | 
|  | 633         types["direction"]  = "tinyint" | 
|  | 634         types["tags"]       = "varchar" | 
|  | 635         types["bin"]        = "int" | 
|  | 636         return types | 
|  | 637     getSqlTypes = classmethod(getSqlTypes) | 
|  | 638 | 
|  | 639 | 
|  | 640     def getSqlSizes(cls): | 
|  | 641         """ | 
|  | 642         Get the sizes of the properties that should be saved in a database | 
|  | 643         """ | 
|  | 644         sizes = dict() | 
|  | 645         sizes["name"]       = 255 | 
|  | 646         sizes["chromosome"] = 255 | 
|  | 647         sizes["start"]      = 11 | 
|  | 648         sizes["end"]        = 11 | 
|  | 649         sizes["direction"]  = 4 | 
|  | 650         sizes["tags"]       = 1023 | 
|  | 651         sizes["bin"]        = 11 | 
|  | 652         return sizes | 
|  | 653     getSqlSizes = classmethod(getSqlSizes) | 
|  | 654 | 
|  | 655 | 
|  | 656     def printCoordinates(self): | 
|  | 657         """ | 
|  | 658         Print start and end positions (depending on the direction of the interval) | 
|  | 659         """ | 
|  | 660         if self.getDirection() == 1: | 
|  | 661             return "%d-%d" % (self.getStart(), self.getEnd()) | 
|  | 662         else: | 
|  | 663             return "%d-%d" % (self.getEnd(), self.getStart()) | 
|  | 664 | 
|  | 665 | 
|  | 666     def extractSequence(self, parser): | 
|  | 667         """ | 
|  | 668         Get the sequence corresponding to this interval | 
|  | 669         @param parser: a parser to a FASTA file | 
|  | 670         @type    parser: class L{SequenceListParser<SequenceListParser>} | 
|  | 671         @return        : a instance of L{Sequence<Sequence>} | 
|  | 672         """ | 
|  | 673         return parser.getSubSequence(self.getChromosome(), self.getStart(), self.getEnd(), self.getDirection(), self.name) | 
|  | 674 | 
|  | 675 | 
|  | 676     def extractWigData(self, parser): | 
|  | 677         """ | 
|  | 678         Get the data retrieved from a wig file | 
|  | 679         @param parser: a parser class to a WIG file | 
|  | 680         @type    parser: class L{WigParser<WigParser>} | 
|  | 681         """ | 
|  | 682         data = parser.getRange(self.getChromosome(), self.getStart(), self.getEnd()) | 
|  | 683         if self.getDirection() == -1: | 
|  | 684             if parser.strands: | 
|  | 685                 newData = {} | 
|  | 686                 for strand in data: | 
|  | 687                     data[strand].reverse() | 
|  | 688                     newData[-strand] = data[strand] | 
|  | 689                 data = newData | 
|  | 690             else: | 
|  | 691                 data.reverse() | 
|  | 692         return data | 
|  | 693 | 
|  | 694 | 
|  | 695     def __str__(self): | 
|  | 696         """ | 
|  | 697         Output a simple representation of this interval | 
|  | 698         """ | 
|  | 699         direction = "+" | 
|  | 700         if self.getDirection() == -1: | 
|  | 701             direction = "-" | 
|  | 702         string = "%s:%d-%d (%s)" % (self.getChromosome(), self.getStart(), self.getEnd(), direction) | 
|  | 703         if self.name != "": | 
|  | 704             string = "(%s) %s" % (self.name, string) | 
|  | 705         return string | 
|  | 706 |