| 
36
 | 
     1 #
 | 
| 
 | 
     2 # Copyright INRA-URGI 2009-2010
 | 
| 
 | 
     3 # 
 | 
| 
 | 
     4 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     5 # abiding by the rules of distribution of free software. You can use,
 | 
| 
 | 
     6 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
     7 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
     8 # "http://www.cecill.info".
 | 
| 
 | 
     9 # 
 | 
| 
 | 
    10 # As a counterpart to the access to the source code and rights to copy,
 | 
| 
 | 
    11 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    12 # with a limited warranty and the software's author, the holder of the
 | 
| 
 | 
    13 # economic rights, and the successive licensors have only limited
 | 
| 
 | 
    14 # liability.
 | 
| 
 | 
    15 # 
 | 
| 
 | 
    16 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    17 # with loading, using, modifying and/or developing or reproducing the
 | 
| 
 | 
    18 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    19 # that may mean that it is complicated to manipulate, and that also
 | 
| 
 | 
    20 # therefore means that it is reserved for developers and experienced
 | 
| 
 | 
    21 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    22 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    23 # requirements in conditions enabling the security of their systems and/or
 | 
| 
 | 
    24 # data to be ensured and, more generally, to use and operate it in the
 | 
| 
 | 
    25 # same conditions as regards security.
 | 
| 
 | 
    26 # 
 | 
| 
 | 
    27 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    28 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    29 #
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 from SMART.Java.Python.structure.Bins import *
 | 
| 
 | 
    32 from commons.core.coord.Range import Range
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 class Interval(Range):
 | 
| 
 | 
    35     """
 | 
| 
 | 
    36     Store a genomic interval
 | 
| 
 | 
    37     @ivar name:          name of the interval [optional]
 | 
| 
 | 
    38     @type name:          string
 | 
| 
 | 
    39     @ivar id:            id of the interval [optional]
 | 
| 
 | 
    40     @type id:            int
 | 
| 
 | 
    41     @ivar bin:           bin in which the interval should be if stored in a database [computed]
 | 
| 
 | 
    42     @type bin:           int 
 | 
| 
 | 
    43     @ival tags:          information about the transcript [optional]
 | 
| 
 | 
    44     @type tags:          dict
 | 
| 
 | 
    45     @ivar verbosity:     verbosity
 | 
| 
 | 
    46     @type verbosity:     int [default: 0]
 | 
| 
 | 
    47     """
 | 
| 
 | 
    48 
 | 
| 
 | 
    49     def __init__(self, interval = None, verbosity = 0):
 | 
| 
 | 
    50         """
 | 
| 
 | 
    51         Constructor
 | 
| 
 | 
    52         @param interval:    interval to be copied
 | 
| 
 | 
    53         @type    interval:    class L{Interval<Interval>}
 | 
| 
 | 
    54         @param verbosity: verbosity
 | 
| 
 | 
    55         @type    verbosity: int
 | 
| 
 | 
    56         """
 | 
| 
 | 
    57         Range.__init__(self)
 | 
| 
 | 
    58         self.name          = None
 | 
| 
 | 
    59         self.id            = None
 | 
| 
 | 
    60         self.bin           = None
 | 
| 
 | 
    61         self.verbosity     = verbosity
 | 
| 
 | 
    62         self.tags          = {}
 | 
| 
 | 
    63         if interval != None:
 | 
| 
 | 
    64             self.copy(interval)
 | 
| 
 | 
    65 
 | 
| 
 | 
    66     #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!#
 | 
| 
 | 
    67     #In case strand = "+", start < end; strand = "-", start > end        
 | 
| 
 | 
    68     def getStart(self):
 | 
| 
 | 
    69         if self.start == -1:
 | 
| 
 | 
    70             return -1
 | 
| 
 | 
    71         if self.end == -1:
 | 
| 
 | 
    72             return self.start
 | 
| 
 | 
    73         return self.getMin()
 | 
| 
 | 
    74 
 | 
| 
 | 
    75     
 | 
| 
 | 
    76     def getEnd(self):
 | 
| 
 | 
    77         if self.end == -1:
 | 
| 
 | 
    78             return -1
 | 
| 
 | 
    79         if self.start == -1:
 | 
| 
 | 
    80             return self.end
 | 
| 
 | 
    81         return self.getMax()
 | 
| 
 | 
    82 
 | 
| 
 | 
    83 
 | 
| 
 | 
    84     def getChromosome(self):
 | 
| 
 | 
    85         return self.getSeqname()
 | 
| 
 | 
    86 
 | 
| 
 | 
    87 
 | 
| 
 | 
    88     def getDirection(self):
 | 
| 
 | 
    89         return 1 if self.getStrand() == "+" else -1
 | 
| 
 | 
    90 
 | 
| 
 | 
    91 
 | 
| 
 | 
    92     def getName(self):
 | 
| 
 | 
    93         return self.name
 | 
| 
 | 
    94 
 | 
| 
 | 
    95 
 | 
| 
 | 
    96     def isSet(self):
 | 
| 
 | 
    97         """
 | 
| 
 | 
    98         Check if the interval is set
 | 
| 
 | 
    99         """
 | 
| 
 | 
   100         return self.getStart() == None and self.getEnd() == None
 | 
| 
 | 
   101 
 | 
| 
 | 
   102 
 | 
| 
 | 
   103     def copy(self, interval):
 | 
| 
 | 
   104         """
 | 
| 
 | 
   105         Copy method
 | 
| 
 | 
   106         @param interval: interval to be copied
 | 
| 
 | 
   107         @type    interval: class L{Interval<Interval>}
 | 
| 
 | 
   108         """
 | 
| 
 | 
   109         self.setStart(interval.getStart())
 | 
| 
 | 
   110         self.setEnd(interval.getEnd())
 | 
| 
 | 
   111         self.setChromosome(interval.getChromosome())
 | 
| 
 | 
   112         self.setDirection(interval.getDirection())
 | 
| 
 | 
   113         self.name      = interval.name     
 | 
| 
 | 
   114         self.id        = interval.id
 | 
| 
 | 
   115         self.bin       = interval.bin
 | 
| 
 | 
   116         self.tags      = {}
 | 
| 
 | 
   117         for tag in interval.tags:
 | 
| 
 | 
   118             self.tags[tag] = interval.tags[tag]
 | 
| 
 | 
   119         self.verbosity     = interval.verbosity
 | 
| 
 | 
   120 
 | 
| 
 | 
   121 
 | 
| 
 | 
   122     def setName(self, name):
 | 
| 
 | 
   123         """
 | 
| 
 | 
   124         Set the name
 | 
| 
 | 
   125         @param name: name of the interval
 | 
| 
 | 
   126         @type    name: string
 | 
| 
 | 
   127         """
 | 
| 
 | 
   128         if len(name) > 100:
 | 
| 
 | 
   129             name = name[:100]
 | 
| 
 | 
   130         self.name = name
 | 
| 
 | 
   131 
 | 
| 
 | 
   132 
 | 
| 
 | 
   133     def setChromosome(self, chromosome=""):
 | 
| 
 | 
   134         """
 | 
| 
 | 
   135         Set the chromosome
 | 
| 
 | 
   136         @param chromosome: chromosome on which the interval is
 | 
| 
 | 
   137         @type    chromosome: string
 | 
| 
 | 
   138         """
 | 
| 
 | 
   139         if not chromosome:
 | 
| 
 | 
   140             self.seqname = None
 | 
| 
 | 
   141         else:
 | 
| 
46
 | 
   142             self.seqname = chromosome.replace("|", "_")
 | 
| 
36
 | 
   143 
 | 
| 
 | 
   144 
 | 
| 
 | 
   145     def setStart(self, start):
 | 
| 
 | 
   146         """
 | 
| 
 | 
   147         Set the start point
 | 
| 
 | 
   148         Possibly reset bin
 | 
| 
 | 
   149         @param start: start point of the interval
 | 
| 
 | 
   150         @type    start: int
 | 
| 
 | 
   151         """
 | 
| 
 | 
   152         self.bin  = None
 | 
| 
 | 
   153         direction = self.getDirection()
 | 
| 
 | 
   154         if self.start == -1:
 | 
| 
 | 
   155             self.start = start
 | 
| 
 | 
   156         elif self.end == -1:
 | 
| 
 | 
   157             self.end = start
 | 
| 
 | 
   158         else:
 | 
| 
 | 
   159             if direction == 1:
 | 
| 
 | 
   160                 self.start = start
 | 
| 
 | 
   161             else:
 | 
| 
 | 
   162                 self.end = start
 | 
| 
 | 
   163         if direction == 1:
 | 
| 
 | 
   164             self.start, self.end = min(self.start, self.end), max(self.start, self.end)
 | 
| 
 | 
   165         else:
 | 
| 
 | 
   166             self.start, self.end = max(self.start, self.end), min(self.start, self.end)
 | 
| 
 | 
   167 
 | 
| 
 | 
   168 
 | 
| 
 | 
   169     def setEnd(self, end):
 | 
| 
 | 
   170         """
 | 
| 
 | 
   171         Set the end point
 | 
| 
 | 
   172         Possibly reset bin
 | 
| 
 | 
   173         @param end: end point of the interval of the interval
 | 
| 
 | 
   174         @type    end: int
 | 
| 
 | 
   175         """
 | 
| 
 | 
   176         self.bin  = None
 | 
| 
 | 
   177         direction = self.getDirection()
 | 
| 
 | 
   178         if self.end == -1:
 | 
| 
 | 
   179             self.end = end
 | 
| 
 | 
   180         elif self.start == -1:
 | 
| 
 | 
   181             self.start = end
 | 
| 
 | 
   182         else:
 | 
| 
 | 
   183             if direction == 1:
 | 
| 
 | 
   184                 self.end = end
 | 
| 
 | 
   185             else:
 | 
| 
 | 
   186                 self.start = end
 | 
| 
 | 
   187         if direction == 1:
 | 
| 
 | 
   188             self.start, self.end = min(self.start, self.end), max(self.start, self.end)
 | 
| 
 | 
   189         else:
 | 
| 
 | 
   190             self.start, self.end = max(self.start, self.end), min(self.start, self.end)
 | 
| 
 | 
   191 
 | 
| 
 | 
   192 
 | 
| 
 | 
   193     def setSize(self, size):
 | 
| 
 | 
   194         """
 | 
| 
 | 
   195         Possibly modify the end point
 | 
| 
 | 
   196         @param size: size of the transcript
 | 
| 
 | 
   197         @type    size: int
 | 
| 
 | 
   198         """
 | 
| 
 | 
   199         if self.end == None and self.start != None:
 | 
| 
 | 
   200             self.setEnd(self.start + self.getSize() - 1)
 | 
| 
 | 
   201         elif self.start == None and self.end != None:
 | 
| 
 | 
   202             self.setStart(self.end - self.getSize() + 1)
 | 
| 
 | 
   203 
 | 
| 
 | 
   204 
 | 
| 
 | 
   205     def getSize(self):
 | 
| 
 | 
   206         """
 | 
| 
 | 
   207         Get the size
 | 
| 
 | 
   208         """
 | 
| 
 | 
   209         return self.getEnd() - self.getStart() + 1
 | 
| 
 | 
   210 
 | 
| 
 | 
   211 
 | 
| 
 | 
   212     def _setDirection(self, direction):
 | 
| 
 | 
   213         """
 | 
| 
 | 
   214         Set the direction of the interval (connection to Range)
 | 
| 
 | 
   215         @param direction: direction of the transcript (+ / -)
 | 
| 
 | 
   216         @type  direction: int (1 or -1)
 | 
| 
 | 
   217         """
 | 
| 
 | 
   218         if direction * self.getDirection() < 0:
 | 
| 
 | 
   219             self.reverse()
 | 
| 
 | 
   220 
 | 
| 
 | 
   221 
 | 
| 
 | 
   222     def setDirection(self, direction):
 | 
| 
 | 
   223         """
 | 
| 
 | 
   224         Set the direction of the interval
 | 
| 
 | 
   225         Possibly parse different formats
 | 
| 
 | 
   226         @param direction: direction of the transcript (+ / -)
 | 
| 
 | 
   227         @type    direction: int or string
 | 
| 
 | 
   228         """
 | 
| 
 | 
   229         if type(direction).__name__ == 'int':
 | 
| 
 | 
   230             self._setDirection(direction / abs(direction))
 | 
| 
 | 
   231         elif type(direction).__name__ == 'str':
 | 
| 
 | 
   232             if direction == "+":
 | 
| 
 | 
   233                 self._setDirection(1)
 | 
| 
 | 
   234             elif direction == "-":
 | 
| 
 | 
   235                 self._setDirection(-1)
 | 
| 
 | 
   236             elif direction == "1" or direction == "-1":
 | 
| 
 | 
   237                 self._setDirection(int(direction))
 | 
| 
 | 
   238             elif direction.lower() == "plus":
 | 
| 
 | 
   239                 self._setDirection(1)
 | 
| 
 | 
   240             elif direction.lower() == "minus":
 | 
| 
 | 
   241                 self._setDirection(-1)
 | 
| 
 | 
   242             else:
 | 
| 
 | 
   243                 raise Exception("Cannot understand direction %s" % (direction))
 | 
| 
 | 
   244         else:
 | 
| 
 | 
   245             raise Exception("Cannot understand direction %s" % (direction))
 | 
| 
 | 
   246 
 | 
| 
 | 
   247 
 | 
| 
 | 
   248     def extendStart(self, size):
 | 
| 
 | 
   249         """
 | 
| 
 | 
   250         Extend the interval by the 5' end
 | 
| 
 | 
   251         @param size: the size to be exended
 | 
| 
 | 
   252         @type    size: int
 | 
| 
 | 
   253         """
 | 
| 
 | 
   254         if self.getDirection() == 1:
 | 
| 
 | 
   255             self.setStart(max(0, self.getStart() - size))
 | 
| 
 | 
   256         else:
 | 
| 
 | 
   257             self.setEnd(self.getEnd() + size)
 | 
| 
 | 
   258         self.bin  = None
 | 
| 
 | 
   259         
 | 
| 
 | 
   260         
 | 
| 
 | 
   261     def extendEnd(self, size):
 | 
| 
 | 
   262         """
 | 
| 
 | 
   263         Extend the interval by the 3' end
 | 
| 
 | 
   264         @param size: the size to be exended
 | 
| 
 | 
   265         @type    size: int
 | 
| 
 | 
   266         """
 | 
| 
 | 
   267         if self.getDirection() == 1:
 | 
| 
 | 
   268             self.setEnd(self.getEnd() + size)
 | 
| 
 | 
   269         else:
 | 
| 
 | 
   270             self.setStart(max(0, self.getStart() - size))
 | 
| 
 | 
   271         self.bin  = None
 | 
| 
 | 
   272         
 | 
| 
 | 
   273         
 | 
| 
 | 
   274     def restrictStart(self, size = 1):
 | 
| 
 | 
   275         """
 | 
| 
 | 
   276         Restrict the interval by some nucleotides, start from its start position
 | 
| 
 | 
   277         Remove the exons
 | 
| 
 | 
   278         @param size: the size to be restricted to
 | 
| 
 | 
   279         @type    size: int
 | 
| 
 | 
   280         """
 | 
| 
 | 
   281         if self.getDirection() == 1:
 | 
| 
 | 
   282             self.setEnd(min(self.getEnd(), self.getStart() + size - 1))
 | 
| 
 | 
   283         else:
 | 
| 
 | 
   284             self.setStart(max(self.getStart(), self.getEnd() - size + 1))
 | 
| 
 | 
   285         self.bin  = None
 | 
| 
 | 
   286         
 | 
| 
 | 
   287         
 | 
| 
 | 
   288     def restrictEnd(self, size = 1):
 | 
| 
 | 
   289         """
 | 
| 
 | 
   290         Restrict the interval by some nucleotides, end from its end position
 | 
| 
 | 
   291         Remove the exons
 | 
| 
 | 
   292         @param size: the size to be restricted to
 | 
| 
 | 
   293         @type    size: int
 | 
| 
 | 
   294         """
 | 
| 
 | 
   295         if self.getDirection() == 1:
 | 
| 
 | 
   296             self.setStart(max(self.getStart(), self.getEnd() - size + 1))
 | 
| 
 | 
   297         else:
 | 
| 
 | 
   298             self.setEnd(min(self.getEnd(), self.getStart() + size - 1))
 | 
| 
 | 
   299         self.bin  = None
 | 
| 
 | 
   300 
 | 
| 
 | 
   301             
 | 
| 
 | 
   302     
 | 
| 
 | 
   303     def setTagValue(self, name, value):
 | 
| 
 | 
   304         """
 | 
| 
 | 
   305         Set a tag
 | 
| 
 | 
   306         @param name:    name of the tag
 | 
| 
 | 
   307         @type    name:    string
 | 
| 
 | 
   308         @param value: value of the tag
 | 
| 
 | 
   309         @type    value: int or string
 | 
| 
 | 
   310         """
 | 
| 
 | 
   311         self.tags[name] = value
 | 
| 
 | 
   312     
 | 
| 
 | 
   313     
 | 
| 
 | 
   314     def getTagNames(self):
 | 
| 
 | 
   315         """
 | 
| 
 | 
   316         Get all the names of the tags
 | 
| 
 | 
   317         """
 | 
| 
 | 
   318         return self.tags.keys()
 | 
| 
 | 
   319 
 | 
| 
 | 
   320 
 | 
| 
 | 
   321     def getTagValue(self, tag):
 | 
| 
 | 
   322         """
 | 
| 
 | 
   323         Get the value of a tag
 | 
| 
 | 
   324         @param tag: name of a tag
 | 
| 
 | 
   325         @type    tag: string
 | 
| 
 | 
   326         """
 | 
| 
 | 
   327         if tag not in self.tags:
 | 
| 
 | 
   328             return None
 | 
| 
 | 
   329         return self.tags[tag]
 | 
| 
 | 
   330 
 | 
| 
 | 
   331 
 | 
| 
 | 
   332     def getTagValues(self, tagSep = "; ", fieldSep = " ", surrounder = ""):
 | 
| 
 | 
   333         """
 | 
| 
 | 
   334         Get the formatted tag values
 | 
| 
 | 
   335         @param tagSep:     separator between tags
 | 
| 
 | 
   336         @type  tagSep:     string
 | 
| 
 | 
   337         @param fieldSep:   separator between tag name and tag value
 | 
| 
 | 
   338         @type  fieldSep:   string
 | 
| 
 | 
   339         @param surrounder: string which optionally surround values
 | 
| 
 | 
   340         @type  surrounder: string
 | 
| 
 | 
   341         """
 | 
| 
 | 
   342         tags = []
 | 
| 
 | 
   343         for name, value in self.tags.iteritems():
 | 
| 
 | 
   344             if value == None:
 | 
| 
 | 
   345                 continue
 | 
| 
 | 
   346             if isinstance(value, basestring):
 | 
| 
 | 
   347                 tags.append("%s%s%s%s%s" % (name, fieldSep, surrounder, value.replace("'", "\\'"), surrounder))
 | 
| 
 | 
   348             elif type(value) is int:
 | 
| 
 | 
   349                 tags.append("%s%s%s%i%s" % (name, fieldSep, surrounder, value, surrounder))
 | 
| 
 | 
   350             elif type(value) is float:
 | 
| 
 | 
   351                 tags.append("%s%s%s%f%s" % (name, fieldSep, surrounder, value, surrounder))
 | 
| 
 | 
   352             else:
 | 
| 
 | 
   353                 raise Exception("Do not know how to print '" + value + "'.")
 | 
| 
 | 
   354         if self.getName() != None:
 | 
| 
 | 
   355             tags.append("%s%s%s%s%s" % ("Name", fieldSep, surrounder, self.getName(), surrounder))
 | 
| 
 | 
   356         return tagSep.join(tags)
 | 
| 
 | 
   357 
 | 
| 
 | 
   358     
 | 
| 
 | 
   359     def setTagValues(self, tags, tagSep = "; ", fieldSep = " "):
 | 
| 
 | 
   360         """
 | 
| 
 | 
   361         Set the tag values using given string
 | 
| 
 | 
   362         @param tags:     the tags, concatenated
 | 
| 
 | 
   363         @type  tags:     string
 | 
| 
 | 
   364         @param tagSep:   separator between tags
 | 
| 
 | 
   365         @type  tagSep:   string
 | 
| 
 | 
   366         @param fieldSep: separator between tag name and tag value
 | 
| 
 | 
   367         @type  fieldSep: string
 | 
| 
 | 
   368         """
 | 
| 
 | 
   369         if tags == "":
 | 
| 
 | 
   370             self.tags = {}
 | 
| 
 | 
   371             return
 | 
| 
 | 
   372         for splittedTag in tags.split(tagSep):
 | 
| 
 | 
   373             if fieldSep not in splittedTag:
 | 
| 
 | 
   374                 raise Exception("Weird field '%s' in tags '%s'" % (splittedTag, tags))
 | 
| 
 | 
   375             tag, value = splittedTag.split(fieldSep, 1)
 | 
| 
 | 
   376             if tag == "Name":
 | 
| 
 | 
   377                 self.setName(value)
 | 
| 
 | 
   378                 continue
 | 
| 
 | 
   379             try:
 | 
| 
 | 
   380                 intValue       = int(value)
 | 
| 
 | 
   381                 self.tags[tag] = intValue
 | 
| 
 | 
   382             except ValueError:
 | 
| 
 | 
   383                 try:
 | 
| 
 | 
   384                     floatValue     = float(value)
 | 
| 
 | 
   385                     self.tags[tag] = floatValue
 | 
| 
 | 
   386                 except ValueError:
 | 
| 
 | 
   387                     self.tags[tag] = value
 | 
| 
 | 
   388 
 | 
| 
 | 
   389 
 | 
| 
 | 
   390     def deleteTag(self, tag):
 | 
| 
 | 
   391         """
 | 
| 
 | 
   392         Remove a tag
 | 
| 
 | 
   393         @param tag: the tag to be removed
 | 
| 
 | 
   394         @type    tag: string
 | 
| 
 | 
   395         """
 | 
| 
 | 
   396         if tag in self.tags:
 | 
| 
 | 
   397             del self.tags[tag]
 | 
| 
 | 
   398 
 | 
| 
 | 
   399     
 | 
| 
 | 
   400     def setNbOccurrences(self, nbOccurrences):
 | 
| 
 | 
   401         """
 | 
| 
 | 
   402         Set the number of occurrences of the interval
 | 
| 
 | 
   403         @param nbOccurrences: number of occurrences of the interval
 | 
| 
 | 
   404         @type    nbOccurrences: int
 | 
| 
 | 
   405         """
 | 
| 
 | 
   406         self.setTagValue("nbOccurrences", nbOccurrences)
 | 
| 
 | 
   407     
 | 
| 
 | 
   408         
 | 
| 
 | 
   409     def setOccurrence(self, occurrence):
 | 
| 
 | 
   410         """
 | 
| 
 | 
   411         Set the occurrence of this interval
 | 
| 
 | 
   412         @param occurrence: an occurrence for this transcript
 | 
| 
 | 
   413         @type    occurrence: int
 | 
| 
 | 
   414         """
 | 
| 
 | 
   415         self.setTagValue("occurrence", occurrence)
 | 
| 
 | 
   416     
 | 
| 
 | 
   417     def __eq__(self, interval):
 | 
| 
 | 
   418         """
 | 
| 
 | 
   419         Whether two intervals are equal (start and end at same position)
 | 
| 
 | 
   420         @param interval: object to be compared to
 | 
| 
 | 
   421         @type    interval: class L{Interval<Interval>}
 | 
| 
 | 
   422         """
 | 
| 
 | 
   423         if not interval:
 | 
| 
 | 
   424             return False
 | 
| 
 | 
   425         return self.getChromosome() == interval.getChromosome() and self.getStart() == interval.getStart() and self.getEnd() == interval.getEnd() and self.getDirection() == interval.getDirection()
 | 
| 
 | 
   426 
 | 
| 
 | 
   427 
 | 
| 
 | 
   428     def overlapWith(self, interval, nbNucleotides = 1):
 | 
| 
 | 
   429         """
 | 
| 
 | 
   430         Whether two intervals overlap
 | 
| 
 | 
   431         @param interval:        object to be compared to
 | 
| 
 | 
   432         @type    interval:        class L{Interval<Interval>}
 | 
| 
 | 
   433         @param nbNucleotides: minimum number of nucleotides to declare and overlap
 | 
| 
 | 
   434         @type    nbNucleotides: int
 | 
| 
 | 
   435         """    
 | 
| 
 | 
   436         if self.getChromosome() != interval.getChromosome():
 | 
| 
 | 
   437             return False
 | 
| 
 | 
   438         return (min(self.getEnd(), interval.getEnd()) - max(self.getStart(), interval.getStart()) + 1 >= nbNucleotides)
 | 
| 
 | 
   439 
 | 
| 
 | 
   440     def isIncludeIn(self, interval):
 | 
| 
 | 
   441         return interval.include(self)
 | 
| 
 | 
   442 
 | 
| 
 | 
   443 
 | 
| 
 | 
   444     def include(self, interval):
 | 
| 
 | 
   445         """
 | 
| 
 | 
   446         Whether this interval includes the other one
 | 
| 
 | 
   447         @param interval:      object to be compared to
 | 
| 
 | 
   448         @type  interval:      class L{Interval<Interval>}
 | 
| 
 | 
   449         """    
 | 
| 
 | 
   450         if self.getChromosome() != interval.getChromosome():
 | 
| 
 | 
   451             return False
 | 
| 
 | 
   452         return ((self.getStart() <= interval.getStart()) and (self.getEnd() >= interval.getEnd()))
 | 
| 
 | 
   453     
 | 
| 
 | 
   454     
 | 
| 
 | 
   455     def getDifference(self, interval, sameStrand = False):
 | 
| 
 | 
   456         """
 | 
| 
 | 
   457         Get the difference between this cluster and another one
 | 
| 
 | 
   458         @param interval:   object to be compared to
 | 
| 
 | 
   459         @type  interval:   class L{Interval<Interval>}
 | 
| 
 | 
   460         @param sameStrand: do the comparison iff the intervals are on the same strand
 | 
| 
 | 
   461         @type  sameStrand: boolean
 | 
| 
 | 
   462         @return:           a (possibly empty) list of intervals
 | 
| 
 | 
   463         """    
 | 
| 
 | 
   464         newInterval = Interval()
 | 
| 
 | 
   465         newInterval.copy(self)
 | 
| 
 | 
   466         if self.getChromosome() != interval.getChromosome():
 | 
| 
 | 
   467             return [newInterval]
 | 
| 
 | 
   468         if not self.overlapWith(interval):
 | 
| 
 | 
   469             return [newInterval]
 | 
| 
 | 
   470         if sameStrand and self.getDirection() != interval.getDirection():
 | 
| 
 | 
   471             return [newInterval]
 | 
| 
 | 
   472         intervals = []
 | 
| 
 | 
   473         if self.getStart() < interval.getStart():
 | 
| 
 | 
   474             newInterval = Interval()
 | 
| 
 | 
   475             newInterval.copy(self)
 | 
| 
 | 
   476             newInterval.setEnd(min(self.getEnd(), interval.getStart() - 1))
 | 
| 
 | 
   477             intervals.append(newInterval)
 | 
| 
 | 
   478         if self.getEnd() > interval.getEnd():
 | 
| 
 | 
   479             newInterval = Interval()
 | 
| 
 | 
   480             newInterval.copy(self)
 | 
| 
 | 
   481             newInterval.setStart(max(self.getStart(), interval.getEnd() + 1))
 | 
| 
 | 
   482             intervals.append(newInterval)
 | 
| 
 | 
   483         return intervals
 | 
| 
 | 
   484     
 | 
| 
 | 
   485     
 | 
| 
 | 
   486     def getIntersection(self, interval):
 | 
| 
 | 
   487         """
 | 
| 
 | 
   488         Get the intersection between this interval and another one
 | 
| 
 | 
   489         @param interval: object to be compared to
 | 
| 
 | 
   490         @type  interval: class L{Interval<Interval>}
 | 
| 
 | 
   491         @return:         an other interval
 | 
| 
 | 
   492         """    
 | 
| 
 | 
   493         if not self.overlapWith(interval):
 | 
| 
 | 
   494             return None
 | 
| 
 | 
   495         newInterval = Interval()
 | 
| 
 | 
   496         newInterval.setChromosome(self.getChromosome())
 | 
| 
 | 
   497         newInterval.setDirection(self.getDirection())
 | 
| 
 | 
   498         newInterval.setName("%s_intersect_%s" % (self.getName(), interval.getName()))
 | 
| 
 | 
   499         newInterval.setStart(max(self.getStart(), interval.getStart()))
 | 
| 
 | 
   500         newInterval.setEnd(min(self.getEnd(), interval.getEnd()))
 | 
| 
 | 
   501         return newInterval
 | 
| 
 | 
   502     
 | 
| 
 | 
   503     
 | 
| 
 | 
   504     def getDistance(self, interval):
 | 
| 
 | 
   505         """
 | 
| 
 | 
   506         Get the distance between two intervals (a non-negative value)
 | 
| 
 | 
   507         @param interval: another interval
 | 
| 
 | 
   508         @type    interval: class L{Interval<Interval>}
 | 
| 
 | 
   509         """    
 | 
| 
 | 
   510         if self.overlapWith(interval):
 | 
| 
 | 
   511             return 0
 | 
| 
 | 
   512         if self.getChromosome() != interval.getChromosome():
 | 
| 
 | 
   513             raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval)))
 | 
| 
 | 
   514         return min(abs(self.getStart() - interval.getEnd()), abs(self.getEnd() - interval.getStart()))
 | 
| 
 | 
   515 
 | 
| 
 | 
   516 
 | 
| 
 | 
   517     def getRelativeDistance(self, interval):
 | 
| 
 | 
   518         """
 | 
| 
 | 
   519         Get the distance between two intervals (negative if first interval is before)
 | 
| 
 | 
   520         @param interval: another interval
 | 
| 
 | 
   521         @type    interval: class L{Interval<Interval>}
 | 
| 
 | 
   522         """    
 | 
| 
 | 
   523         if self.overlapWith(interval):
 | 
| 
 | 
   524             return 0
 | 
| 
 | 
   525         if self.getChromosome() != interval.getChromosome():
 | 
| 
 | 
   526             raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval)))
 | 
| 
 | 
   527         if self.getEnd() < interval.getStart():
 | 
| 
 | 
   528             distance = interval.getStart() - self.getEnd()
 | 
| 
 | 
   529         else:
 | 
| 
 | 
   530             distance = interval.getEnd() - self.getStart()
 | 
| 
 | 
   531         distance *= self.getDirection()
 | 
| 
 | 
   532         return distance
 | 
| 
 | 
   533 
 | 
| 
 | 
   534 
 | 
| 
 | 
   535     def merge(self, interval, normalization = False):
 | 
| 
 | 
   536         """
 | 
| 
 | 
   537         Merge two intervals
 | 
| 
 | 
   538         @param interval:        another interval
 | 
| 
 | 
   539         @type    interval:        class L{Interval<Interval>}
 | 
| 
 | 
   540         @param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements
 | 
| 
 | 
   541         @type    normalization: boolean
 | 
| 
 | 
   542         """
 | 
| 
 | 
   543         if self.getChromosome() != interval.getChromosome():
 | 
| 
 | 
   544             raise Exception("Cannot merge '%s' and '%s' for they are on different chromosomes." % (str(self), str(interval)))
 | 
| 
 | 
   545         direction = None
 | 
| 
 | 
   546         if self.getStart() == self.getEnd():
 | 
| 
 | 
   547             direction = interval.getDirection()
 | 
| 
 | 
   548         elif interval.getStart() == interval.getEnd():
 | 
| 
 | 
   549             direction = self.getDirection()
 | 
| 
 | 
   550         elif self.getDirection() != interval.getDirection():
 | 
| 
 | 
   551             raise Exception("Cannot merge '%s' and '%s' for they are on different strands." % (str(self), str(interval)))
 | 
| 
 | 
   552         self.setStart(min(self.getStart(), interval.getStart()))
 | 
| 
 | 
   553         self.setEnd(max(self.getEnd(), interval.getEnd()))
 | 
| 
 | 
   554         if direction != None:
 | 
| 
 | 
   555             self.setDirection(direction)
 | 
| 
 | 
   556         nbElements = 0.0
 | 
| 
 | 
   557         for element in (self, interval):
 | 
| 
 | 
   558             for tagName in ("nbElements", "nbOccurrences"):
 | 
| 
 | 
   559                 if tagName not in element.getTagNames():
 | 
| 
 | 
   560                     element.setTagValue(tagName, 1)
 | 
| 
 | 
   561             nbElements += float(element.getTagValue("nbElements")) / float(element.getTagValue("nbOccurrences")) if normalization else float(element.getTagValue("nbElements"))
 | 
| 
 | 
   562         self.setTagValue("nbElements", nbElements)
 | 
| 
 | 
   563         self.bin = None
 | 
| 
 | 
   564         for tagName in ("identity", "nbOccurrences", "occurrence", "nbMismatches", "nbGaps", "rank", "evalue", "bestRegion"):
 | 
| 
 | 
   565             if tagName in self.getTagNames():
 | 
| 
 | 
   566                 del self.tags[tagName]
 | 
| 
 | 
   567 
 | 
| 
 | 
   568 
 | 
| 
 | 
   569     def getBin(self):
 | 
| 
 | 
   570         """
 | 
| 
 | 
   571         Get the bin of the interval
 | 
| 
 | 
   572         Computed on the fly
 | 
| 
 | 
   573         """
 | 
| 
 | 
   574         if self.bin == None:
 | 
| 
 | 
   575             self.bin = getBin(self.getStart(), self.getEnd())
 | 
| 
 | 
   576         return self.bin
 | 
| 
 | 
   577 
 | 
| 
 | 
   578 
 | 
| 
 | 
   579     def getBins(self):
 | 
| 
 | 
   580         """
 | 
| 
 | 
   581         Get all the bin this interval could fall into
 | 
| 
 | 
   582         """
 | 
| 
 | 
   583         return getOverlappingBins(self.getStart(), self.getEnd())
 | 
| 
 | 
   584 
 | 
| 
 | 
   585 
 | 
| 
 | 
   586     def getSqlVariables(cls):
 | 
| 
 | 
   587         """
 | 
| 
 | 
   588         Get the properties of the object that should be saved in a database
 | 
| 
 | 
   589         """
 | 
| 
 | 
   590         variables = ["name", "chromosome", "start", "end", "direction", "tags", "bin"]
 | 
| 
 | 
   591         return variables
 | 
| 
 | 
   592     getSqlVariables = classmethod(getSqlVariables)
 | 
| 
 | 
   593 
 | 
| 
 | 
   594 
 | 
| 
 | 
   595     def setSqlValues(self, array):
 | 
| 
 | 
   596         """
 | 
| 
 | 
   597         Set the values of the properties of this object as given by a results line of a SQL query
 | 
| 
 | 
   598         """
 | 
| 
 | 
   599         self.id         = array[0]
 | 
| 
 | 
   600         self.name       = array[1].strip("'")
 | 
| 
 | 
   601         self.setChromosome(array[2].strip("'"))
 | 
| 
 | 
   602         self.setStart(array[3])
 | 
| 
 | 
   603         self.setEnd(array[4])
 | 
| 
 | 
   604         self.setDirection(array[5])
 | 
| 
 | 
   605         self.setTagValues(array[6].strip("'"), ";", "=")
 | 
| 
 | 
   606         self.bin        = array[7]
 | 
| 
 | 
   607 
 | 
| 
 | 
   608 
 | 
| 
 | 
   609     def getSqlValues(self):
 | 
| 
 | 
   610         """
 | 
| 
 | 
   611         Get the values of the properties that should be saved in a database
 | 
| 
 | 
   612         """
 | 
| 
 | 
   613         values = dict()
 | 
| 
 | 
   614         values["name"]       = self.name
 | 
| 
 | 
   615         values["chromosome"] = self.getChromosome()
 | 
| 
 | 
   616         values["start"]      = self.getStart()
 | 
| 
 | 
   617         values["end"]        = self.getEnd()
 | 
| 
 | 
   618         values["direction"]  = self.getDirection()
 | 
| 
 | 
   619         values["tags"]       = self.getTagValues(";", "=")
 | 
| 
 | 
   620         values["bin"]        = self.getBin()
 | 
| 
 | 
   621         return values
 | 
| 
 | 
   622 
 | 
| 
 | 
   623 
 | 
| 
 | 
   624     def getSqlTypes(cls):
 | 
| 
 | 
   625         """
 | 
| 
 | 
   626         Get the values of the properties that should be saved in a database
 | 
| 
 | 
   627         """
 | 
| 
 | 
   628         types = dict()
 | 
| 
 | 
   629         types["name"]       = "varchar"
 | 
| 
 | 
   630         types["chromosome"] = "varchar"
 | 
| 
 | 
   631         types["start"]      = "int"
 | 
| 
 | 
   632         types["end"]        = "int"
 | 
| 
 | 
   633         types["direction"]  = "tinyint"
 | 
| 
 | 
   634         types["tags"]       = "varchar"
 | 
| 
 | 
   635         types["bin"]        = "int"
 | 
| 
 | 
   636         return types
 | 
| 
 | 
   637     getSqlTypes = classmethod(getSqlTypes)
 | 
| 
 | 
   638     
 | 
| 
 | 
   639 
 | 
| 
 | 
   640     def getSqlSizes(cls):
 | 
| 
 | 
   641         """
 | 
| 
 | 
   642         Get the sizes of the properties that should be saved in a database
 | 
| 
 | 
   643         """
 | 
| 
 | 
   644         sizes = dict()
 | 
| 
 | 
   645         sizes["name"]       = 255
 | 
| 
 | 
   646         sizes["chromosome"] = 255
 | 
| 
 | 
   647         sizes["start"]      = 11
 | 
| 
 | 
   648         sizes["end"]        = 11
 | 
| 
 | 
   649         sizes["direction"]  = 4
 | 
| 
 | 
   650         sizes["tags"]       = 1023
 | 
| 
 | 
   651         sizes["bin"]        = 11
 | 
| 
 | 
   652         return sizes
 | 
| 
 | 
   653     getSqlSizes = classmethod(getSqlSizes)
 | 
| 
 | 
   654     
 | 
| 
 | 
   655 
 | 
| 
 | 
   656     def printCoordinates(self):
 | 
| 
 | 
   657         """
 | 
| 
 | 
   658         Print start and end positions (depending on the direction of the interval)
 | 
| 
 | 
   659         """
 | 
| 
 | 
   660         if self.getDirection() == 1:
 | 
| 
 | 
   661             return "%d-%d" % (self.getStart(), self.getEnd())
 | 
| 
 | 
   662         else:
 | 
| 
 | 
   663             return "%d-%d" % (self.getEnd(), self.getStart())
 | 
| 
 | 
   664 
 | 
| 
 | 
   665     
 | 
| 
 | 
   666     def extractSequence(self, parser):
 | 
| 
 | 
   667         """
 | 
| 
 | 
   668         Get the sequence corresponding to this interval
 | 
| 
 | 
   669         @param parser: a parser to a FASTA file
 | 
| 
 | 
   670         @type    parser: class L{SequenceListParser<SequenceListParser>}
 | 
| 
 | 
   671         @return        : a instance of L{Sequence<Sequence>}
 | 
| 
 | 
   672         """
 | 
| 
 | 
   673         return parser.getSubSequence(self.getChromosome(), self.getStart(), self.getEnd(), self.getDirection(), self.name)
 | 
| 
 | 
   674     
 | 
| 
 | 
   675     
 | 
| 
 | 
   676     def extractWigData(self, parser):
 | 
| 
 | 
   677         """
 | 
| 
 | 
   678         Get the data retrieved from a wig file
 | 
| 
 | 
   679         @param parser: a parser class to a WIG file
 | 
| 
 | 
   680         @type    parser: class L{WigParser<WigParser>}
 | 
| 
 | 
   681         """
 | 
| 
 | 
   682         data = parser.getRange(self.getChromosome(), self.getStart(), self.getEnd())
 | 
| 
 | 
   683         if self.getDirection() == -1:
 | 
| 
 | 
   684             if parser.strands:
 | 
| 
 | 
   685                 newData = {}
 | 
| 
 | 
   686                 for strand in data:
 | 
| 
 | 
   687                     data[strand].reverse()
 | 
| 
 | 
   688                     newData[-strand] = data[strand]
 | 
| 
 | 
   689                 data = newData
 | 
| 
 | 
   690             else:
 | 
| 
 | 
   691                 data.reverse()
 | 
| 
 | 
   692         return data
 | 
| 
 | 
   693 
 | 
| 
 | 
   694 
 | 
| 
 | 
   695     def __str__(self):
 | 
| 
 | 
   696         """
 | 
| 
 | 
   697         Output a simple representation of this interval
 | 
| 
 | 
   698         """
 | 
| 
 | 
   699         direction = "+"
 | 
| 
 | 
   700         if self.getDirection() == -1:
 | 
| 
 | 
   701             direction = "-"
 | 
| 
 | 
   702         string = "%s:%d-%d (%s)" % (self.getChromosome(), self.getStart(), self.getEnd(), direction)
 | 
| 
 | 
   703         if self.name != "":
 | 
| 
 | 
   704             string = "(%s) %s" % (self.name, string)
 | 
| 
 | 
   705         return string
 | 
| 
 | 
   706 
 |