| 
13
 | 
     1 # Copyright INRA (Institut National de la Recherche Agronomique)
 | 
| 
 | 
     2 # http://www.inra.fr
 | 
| 
 | 
     3 # http://urgi.versailles.inra.fr
 | 
| 
 | 
     4 #
 | 
| 
 | 
     5 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     6 # abiding by the rules of distribution of free software.  You can  use, 
 | 
| 
 | 
     7 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
     8 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
     9 # "http://www.cecill.info". 
 | 
| 
 | 
    10 #
 | 
| 
 | 
    11 # As a counterpart to the access to the source code and  rights to copy,
 | 
| 
 | 
    12 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    13 # with a limited warranty  and the software's author,  the holder of the
 | 
| 
 | 
    14 # economic rights,  and the successive licensors  have only  limited
 | 
| 
 | 
    15 # liability. 
 | 
| 
 | 
    16 #
 | 
| 
 | 
    17 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    18 # with loading,  using,  modifying and/or developing or reproducing the
 | 
| 
 | 
    19 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    20 # that may mean  that it is complicated to manipulate,  and  that  also
 | 
| 
 | 
    21 # therefore means  that it is reserved for developers  and  experienced
 | 
| 
 | 
    22 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    23 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    24 # requirements in conditions enabling the security of their systems and/or 
 | 
| 
 | 
    25 # data to be ensured and,  more generally, to use and operate it in the 
 | 
| 
 | 
    26 # same conditions as regards security. 
 | 
| 
 | 
    27 #
 | 
| 
 | 
    28 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    29 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 
 | 
| 
 | 
    32 ## Record a region on a given sequence
 | 
| 
 | 
    33 #
 | 
| 
 | 
    34 class Range( object ):
 | 
| 
 | 
    35     
 | 
| 
 | 
    36     __slots__ = ("seqname", "start", "end", '__dict__')
 | 
| 
 | 
    37 
 | 
| 
 | 
    38     ## Constructor
 | 
| 
 | 
    39     #
 | 
| 
 | 
    40     # @param seqname the name of the sequence
 | 
| 
 | 
    41     # @param start the start coordinate
 | 
| 
 | 
    42     # @param end the end coordinate
 | 
| 
 | 
    43     #
 | 
| 
 | 
    44     def __init__(self, seqname="", start=-1, end=-1):
 | 
| 
 | 
    45         self.seqname = seqname
 | 
| 
 | 
    46         self.start = int(start)
 | 
| 
 | 
    47         self.end = int(end)
 | 
| 
 | 
    48         
 | 
| 
 | 
    49     ## Equal operator
 | 
| 
 | 
    50     #
 | 
| 
 | 
    51     # @param o a Range instance
 | 
| 
 | 
    52     #
 | 
| 
 | 
    53     def __eq__(self, o):
 | 
| 
 | 
    54         if type(o) is type(self) and self.seqname == o.seqname and self.start == o.start and self.end == o.end:
 | 
| 
 | 
    55             return True
 | 
| 
 | 
    56         return False
 | 
| 
 | 
    57         
 | 
| 
 | 
    58     ## Unequal operator
 | 
| 
 | 
    59     #
 | 
| 
 | 
    60     # @param o a Range instance
 | 
| 
 | 
    61     #
 | 
| 
 | 
    62     def __ne__(self, o):
 | 
| 
 | 
    63         return not self.__eq__(o)
 | 
| 
 | 
    64     
 | 
| 
 | 
    65     ## Convert the object into a string
 | 
| 
 | 
    66     #
 | 
| 
 | 
    67     # @note used in 'print myObject'
 | 
| 
 | 
    68     #
 | 
| 
 | 
    69     def __str__( self ):
 | 
| 
 | 
    70         return self.toString()
 | 
| 
 | 
    71     
 | 
| 
 | 
    72     ## Convert the object into a string
 | 
| 
 | 
    73     #
 | 
| 
 | 
    74     # @note used in 'repr(myObject)' for debugging
 | 
| 
 | 
    75     #
 | 
| 
 | 
    76     def __repr__( self ):
 | 
| 
 | 
    77         return self.toString().replace("\t",";")
 | 
| 
 | 
    78     
 | 
| 
 | 
    79     def setStart(self, start):
 | 
| 
 | 
    80         self.start = start
 | 
| 
 | 
    81         
 | 
| 
 | 
    82     def setEnd(self, end):
 | 
| 
 | 
    83         self.end = end
 | 
| 
 | 
    84         
 | 
| 
 | 
    85     def setSeqName(self, seqName):
 | 
| 
 | 
    86         self.seqname = seqName
 | 
| 
 | 
    87     
 | 
| 
 | 
    88     ## Reset
 | 
| 
 | 
    89     #
 | 
| 
 | 
    90     def reset(self):
 | 
| 
 | 
    91         self.seqname = ""
 | 
| 
 | 
    92         self.start = -1
 | 
| 
 | 
    93         self.end = -1
 | 
| 
 | 
    94         
 | 
| 
 | 
    95     ## Return the attributes as a formatted string
 | 
| 
 | 
    96     #   
 | 
| 
 | 
    97     def toString(self):
 | 
| 
 | 
    98         string = "%s" % (self.seqname)
 | 
| 
 | 
    99         string += "\t%d" % (self.start)
 | 
| 
 | 
   100         string += "\t%d" % (self.end)
 | 
| 
 | 
   101         return string
 | 
| 
 | 
   102     
 | 
| 
 | 
   103     ## Show the attributes
 | 
| 
 | 
   104     #
 | 
| 
 | 
   105     def show(self):
 | 
| 
 | 
   106         print self.toString()
 | 
| 
 | 
   107     
 | 
| 
 | 
   108     ## Return seqname
 | 
| 
 | 
   109     #
 | 
| 
 | 
   110     def getSeqname(self):
 | 
| 
 | 
   111         return self.seqname
 | 
| 
 | 
   112     
 | 
| 
 | 
   113     ## Return the start coordinate
 | 
| 
 | 
   114     #
 | 
| 
 | 
   115     def getStart(self):
 | 
| 
 | 
   116         return self.start
 | 
| 
 | 
   117     
 | 
| 
 | 
   118     ## Return the end coordinate
 | 
| 
 | 
   119     #
 | 
| 
 | 
   120     def getEnd(self):
 | 
| 
 | 
   121         return self.end
 | 
| 
 | 
   122     
 | 
| 
 | 
   123     ## Return the lowest value between start and end coordinates
 | 
| 
 | 
   124     #
 | 
| 
 | 
   125     def getMin(self):
 | 
| 
 | 
   126         return min(self.start, self.end)
 | 
| 
 | 
   127     
 | 
| 
 | 
   128     ## Return the greatest value between start and end attributes
 | 
| 
 | 
   129     # 
 | 
| 
 | 
   130     def getMax(self):
 | 
| 
 | 
   131         return max(self.start, self.end)
 | 
| 
 | 
   132     
 | 
| 
 | 
   133     ## Return True if the instance is on the direct strand, False otherwise
 | 
| 
 | 
   134     # 
 | 
| 
 | 
   135     def isOnDirectStrand(self):
 | 
| 
 | 
   136         if self.start <= self.end:
 | 
| 
 | 
   137             return True
 | 
| 
 | 
   138         else:
 | 
| 
 | 
   139             return False
 | 
| 
 | 
   140         
 | 
| 
 | 
   141     ## Return True if the instance is on the reverse strand, False otherwise
 | 
| 
 | 
   142     # 
 | 
| 
 | 
   143     def isOnReverseStrand(self):
 | 
| 
 | 
   144         return not self.isOnDirectStrand()
 | 
| 
 | 
   145     
 | 
| 
 | 
   146     ## Return '+' if the instance is on the direct strand, '-' otherwise
 | 
| 
 | 
   147     # 
 | 
| 
 | 
   148     def getStrand(self):
 | 
| 
 | 
   149         if self.isOnDirectStrand():
 | 
| 
 | 
   150             return '+'
 | 
| 
 | 
   151         else:
 | 
| 
 | 
   152             return '-'
 | 
| 
 | 
   153         
 | 
| 
 | 
   154     ## Exchange start and end coordinates
 | 
| 
 | 
   155     #
 | 
| 
 | 
   156     def reverse(self):
 | 
| 
 | 
   157         tmp = self.start
 | 
| 
 | 
   158         self.start = self.end
 | 
| 
 | 
   159         self.end = tmp
 | 
| 
 | 
   160         
 | 
| 
 | 
   161     ## Return the length of the instance
 | 
| 
 | 
   162     #
 | 
| 
 | 
   163     # @warning old name is 'length'
 | 
| 
 | 
   164     #
 | 
| 
 | 
   165     def getLength(self):
 | 
| 
 | 
   166         return int(abs(self.start-self.end))+1
 | 
| 
 | 
   167     
 | 
| 
 | 
   168     ## Return True if the instance is empty, False otherwise
 | 
| 
 | 
   169     #
 | 
| 
 | 
   170     def isEmpty(self):
 | 
| 
 | 
   171         if self.start==self.end and (self.start==0 or self.start==-1):
 | 
| 
 | 
   172             return True
 | 
| 
 | 
   173         return False
 | 
| 
 | 
   174     
 | 
| 
 | 
   175     ## Set attributes from tuple
 | 
| 
 | 
   176     #
 | 
| 
 | 
   177     # @param tuple a tuple with (name,start,end)
 | 
| 
 | 
   178     #
 | 
| 
 | 
   179     def setFromTuple(self, tuple):
 | 
| 
 | 
   180         self.seqname = tuple[0]
 | 
| 
 | 
   181         self.start = int(tuple[1])
 | 
| 
 | 
   182         self.end = int(tuple[2])
 | 
| 
 | 
   183         
 | 
| 
 | 
   184     ## Set attributes from string
 | 
| 
 | 
   185     #
 | 
| 
 | 
   186     # @param string a string formatted like name<sep>start<sep>end
 | 
| 
 | 
   187     # @param sep field separator
 | 
| 
 | 
   188     #
 | 
| 
 | 
   189     def setFromString(self, string, sep="\t"):
 | 
| 
 | 
   190         if string[-1] == "\n":
 | 
| 
 | 
   191             string = string[:-1]
 | 
| 
 | 
   192         self.setFromTuple( string.split(sep) )
 | 
| 
 | 
   193         
 | 
| 
 | 
   194     ## Merge the instance with another Range instance
 | 
| 
 | 
   195     #
 | 
| 
 | 
   196     # @param o a Range instance
 | 
| 
 | 
   197     #
 | 
| 
 | 
   198     def merge(self, o):
 | 
| 
 | 
   199         if self.seqname != o.seqname:
 | 
| 
 | 
   200             return
 | 
| 
 | 
   201         if self.isOnDirectStrand():
 | 
| 
 | 
   202             self.start = min(self.getMin(), o.getMin())
 | 
| 
 | 
   203             self.end = max(self.getMax(), o.getMax())
 | 
| 
 | 
   204         else:
 | 
| 
 | 
   205             self.start = max(self.getMax(), o.getMax())
 | 
| 
 | 
   206             self.end = min(self.getMin(), o.getMin())
 | 
| 
 | 
   207             
 | 
| 
 | 
   208     ## Return True if the instance overlaps with another Range instance, False otherwise
 | 
| 
 | 
   209     #
 | 
| 
 | 
   210     # @param o a Range instance
 | 
| 
 | 
   211     #
 | 
| 
 | 
   212     def isOverlapping(self, o):
 | 
| 
 | 
   213         if o.seqname != self.seqname:
 | 
| 
 | 
   214             return False
 | 
| 
 | 
   215         smin = self.getMin()
 | 
| 
 | 
   216         smax = self.getMax()
 | 
| 
 | 
   217         omin = o.getMin()
 | 
| 
 | 
   218         omax = o.getMax()
 | 
| 
 | 
   219         if omin <= smin and omax >= smax:
 | 
| 
 | 
   220             return True
 | 
| 
 | 
   221         if omin >= smin and omin <= smax or omax >= smin and omax <= smax:
 | 
| 
 | 
   222             return True
 | 
| 
 | 
   223         return False
 | 
| 
 | 
   224     
 | 
| 
 | 
   225     
 | 
| 
 | 
   226     ## Return the length of the overlap between the instance and another Range, 0 if no overlap
 | 
| 
 | 
   227     #
 | 
| 
 | 
   228     # @param o a Range instance
 | 
| 
 | 
   229     #
 | 
| 
 | 
   230     def getOverlapLength( self, o ):
 | 
| 
 | 
   231         if self.isOverlapping( o ):
 | 
| 
 | 
   232             if self.isIncludedIn( o ):
 | 
| 
 | 
   233                 return self.getLength()
 | 
| 
 | 
   234             elif o.isIncludedIn( self ):
 | 
| 
 | 
   235                 return o.getLength()
 | 
| 
 | 
   236             elif o.getMin() <= self.getMax() and o.getMin() >= self.getMin():
 | 
| 
 | 
   237                 return self.getMax() - o.getMin() + 1
 | 
| 
 | 
   238             elif o.getMax() <= self.getMax() and o.getMax() >= self.getMin():
 | 
| 
 | 
   239                 return o.getMax() - self.getMin() + 1
 | 
| 
 | 
   240         return 0
 | 
| 
 | 
   241     
 | 
| 
 | 
   242     
 | 
| 
 | 
   243     ## Return True if the instance is included within another Range, False otherwise
 | 
| 
 | 
   244     #
 | 
| 
 | 
   245     # @param o a Range instance
 | 
| 
 | 
   246     #
 | 
| 
 | 
   247     # @note the min (respectively max) coordinates can be equal
 | 
| 
 | 
   248     #
 | 
| 
 | 
   249     def isIncludedIn( self, o ):
 | 
| 
 | 
   250         if o.seqname != self.seqname:
 | 
| 
 | 
   251             return False
 | 
| 
 | 
   252         if self.getMin() >= o.getMin() and self.getMax() <= o.getMax():
 | 
| 
 | 
   253             return True
 | 
| 
 | 
   254         else:
 | 
| 
 | 
   255             return False
 | 
| 
 | 
   256 
 | 
| 
 | 
   257         
 | 
| 
 | 
   258     ## Return the distance between the start of the instance and the start of another Range instance
 | 
| 
 | 
   259     #
 | 
| 
 | 
   260     # @param o a Range instance
 | 
| 
 | 
   261     #
 | 
| 
 | 
   262     def getDistance(self, o):
 | 
| 
 | 
   263         if self.isOnDirectStrand() == o.isOnDirectStrand():
 | 
| 
 | 
   264             if self.isOverlapping(o):
 | 
| 
 | 
   265                 return 0
 | 
| 
 | 
   266             elif self.isOnDirectStrand():
 | 
| 
 | 
   267                 if self.start > o.start:
 | 
| 
 | 
   268                     return self.start - o.end
 | 
| 
 | 
   269                 else:
 | 
| 
 | 
   270                     return o.start - self.end
 | 
| 
 | 
   271             else:
 | 
| 
 | 
   272                 if self.start > o.start:
 | 
| 
 | 
   273                     return self.end - o.start
 | 
| 
 | 
   274                 else:
 | 
| 
 | 
   275                     return o.end - self.start
 | 
| 
 | 
   276         return -1
 | 
| 
 | 
   277     
 | 
| 
 | 
   278     ## Remove in the instance the region overlapping with another Range instance
 | 
| 
 | 
   279     #
 | 
| 
 | 
   280     # @param o a Range instance
 | 
| 
 | 
   281     # 
 | 
| 
 | 
   282     def diff(self, o):
 | 
| 
 | 
   283         new_range = Range(self.seqname)
 | 
| 
 | 
   284         if not self.isOverlapping(o) or self.seqname != o.seqname:
 | 
| 
 | 
   285             return new_range
 | 
| 
 | 
   286 
 | 
| 
 | 
   287         istart = min(self.start, self.end)
 | 
| 
 | 
   288         iend = max(self.start, self.end)
 | 
| 
 | 
   289         jstart = min(o.start, o.end)
 | 
| 
 | 
   290         jend = max(o.start, o.end)
 | 
| 
 | 
   291         if istart < jstart:
 | 
| 
 | 
   292             if iend <= jend:
 | 
| 
 | 
   293                 if self.isOnDirectStrand():
 | 
| 
 | 
   294                     self.start = istart
 | 
| 
 | 
   295                     self.end = jstart - 1
 | 
| 
 | 
   296                 else:
 | 
| 
 | 
   297                     self.start = jstart - 1
 | 
| 
 | 
   298                     self.end = istart
 | 
| 
 | 
   299             else:
 | 
| 
 | 
   300                 if self.isOnDirectStrand():
 | 
| 
 | 
   301                     self.start = istart
 | 
| 
 | 
   302                     self.end = jstart - 1
 | 
| 
 | 
   303                     new_range.start = jend + 1
 | 
| 
 | 
   304                     new_range.end = iend
 | 
| 
 | 
   305                 else:
 | 
| 
 | 
   306                     self.start = jstart - 1;
 | 
| 
 | 
   307                     self.end = istart;
 | 
| 
 | 
   308                     new_range.start = iend
 | 
| 
 | 
   309                     new_range.end = jend + 1
 | 
| 
 | 
   310         else: #istart>=jstart
 | 
| 
 | 
   311             if iend <= jend:
 | 
| 
 | 
   312                 self.start = 0
 | 
| 
 | 
   313                 self.end = 0
 | 
| 
 | 
   314             else:
 | 
| 
 | 
   315                 if self.isOnDirectStrand():
 | 
| 
 | 
   316                     self.start = jend + 1
 | 
| 
 | 
   317                     self.end = iend
 | 
| 
 | 
   318                 else:
 | 
| 
 | 
   319                     self.start = iend
 | 
| 
 | 
   320                     self.end = jend + 1
 | 
| 
 | 
   321         return new_range
 | 
| 
 | 
   322         
 | 
| 
 | 
   323     ## Find the bin that contains the instance and compute its index
 | 
| 
 | 
   324     #
 | 
| 
 | 
   325     # @note Required for coordinate indexing via a hierarchical bin system
 | 
| 
 | 
   326     #
 | 
| 
 | 
   327     def findIdx(self):
 | 
| 
 | 
   328         min_lvl = 3
 | 
| 
 | 
   329         max_lvl = 6
 | 
| 
 | 
   330         for bin_lvl in xrange(min_lvl, max_lvl):
 | 
| 
 | 
   331             if getBin(self.start, bin_lvl) == getBin(self.end, bin_lvl):
 | 
| 
 | 
   332                 return getIdx(self.start, bin_lvl)
 | 
| 
 | 
   333         return getIdx(self.start, max_lvl) 
 | 
| 
 | 
   334     
 | 
| 
 | 
   335     ## Get a bin for fast database access
 | 
| 
 | 
   336     #
 | 
| 
 | 
   337     # @return bin number (float)
 | 
| 
 | 
   338     #
 | 
| 
 | 
   339     def getBin(self):
 | 
| 
 | 
   340         for i in xrange(3, 8):
 | 
| 
 | 
   341             bin_lvl = pow(10, i)
 | 
| 
 | 
   342             if int(self.start/bin_lvl) == int(self.end/bin_lvl):
 | 
| 
 | 
   343                 return float(bin_lvl+(int(self.start/bin_lvl)/1e10))
 | 
| 
 | 
   344         bin_lvl = pow(10, 8)
 | 
| 
 | 
   345         return float(bin_lvl+(int(self.start/bin_lvl)/1e10))
 | 
| 
 | 
   346     
 | 
| 
 | 
   347     
 | 
| 
 | 
   348 # Functions
 | 
| 
 | 
   349 
 | 
| 
 | 
   350 # Get the bin number of a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system
 | 
| 
 | 
   351 #    
 | 
| 
 | 
   352 def getBin(val, bin_lvl):
 | 
| 
 | 
   353     bin_size = pow(10, bin_lvl)
 | 
| 
 | 
   354     return long(val / bin_size)
 | 
| 
 | 
   355     
 | 
| 
 | 
   356 # Get an index from a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system
 | 
| 
 | 
   357 #
 | 
| 
 | 
   358 def getIdx(val, bin_lvl):
 | 
| 
 | 
   359     min_lvl = 3
 | 
| 
 | 
   360     max_lvl = 6
 | 
| 
 | 
   361     if bin_lvl >= max_lvl:
 | 
| 
 | 
   362         return long((bin_lvl-min_lvl+1)*pow(10,max_lvl))
 | 
| 
 | 
   363     return long(((bin_lvl-min_lvl+1)*pow(10,max_lvl))+getBin(val,bin_lvl))
 |