Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.1.a/commons/core/coord/CountOverlapping.py @ 16:836ce3d9d47a draft default tip
Uploaded
author | urgi-team |
---|---|
date | Thu, 21 Jul 2016 07:42:47 -0400 |
parents | 255c852351c5 |
children |
comparison
equal
deleted
inserted
replaced
15:255c852351c5 | 16:836ce3d9d47a |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 import bisect | |
32 from commons.core.checker.RepetException import RepetException | |
33 from commons.core.LoggerFactory import LoggerFactory | |
34 | |
35 LOG_DEPTH = "commons.coord" | |
36 class CountOverlapping(object): | |
37 | |
38 ## lFeatures must be a list of objects implementing getStart, getEnd, getSeqname methods. | |
39 # If areFeaturesOnDirectStrandsOnly is set to False, isOnReverseStrand and reverse methods must be implemented too. | |
40 # Throws a RepetException if all the features in lFeatures don't share the same getSeqname() result | |
41 # | |
42 # This implementation may not be very efficient but it works | |
43 # | |
44 def __init__(self, lFeatures, areFeaturesOnDirectStrandsOnly = False, verbosity = 2): | |
45 self._verbosity = verbosity | |
46 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) | |
47 | |
48 self._areFeaturesOnDirectStrandsOnly = areFeaturesOnDirectStrandsOnly | |
49 self._lFeaturesToCheck = lFeatures | |
50 self._prepareData() | |
51 | |
52 def _prepareData(self): | |
53 self._nbFeatures = len(self._lFeaturesToCheck) | |
54 sNames = set() | |
55 for seqName in [iFeature.getSeqname() for iFeature in self._lFeaturesToCheck]: | |
56 sNames.add(seqName) | |
57 | |
58 if len(sNames) not in [0, 1]: | |
59 self._logAndRaise("ERROR: different sequence names in input features list") | |
60 | |
61 if not self._areFeaturesOnDirectStrandsOnly: | |
62 for iFeature in self._lFeaturesToCheck: | |
63 if iFeature.isOnReverseStrand(): | |
64 iFeature.reverse() | |
65 self._areFeaturesOnDirectStrandsOnly = True | |
66 | |
67 def _logAndRaise(self, errorMsg): | |
68 self._log.error(errorMsg) | |
69 raise RepetException(errorMsg) | |
70 | |
71 ## Count number of features overlapping with a given interval | |
72 # | |
73 # @param queryInterval feature to check overlaps number with (must implement getStart, getEnd, getSeqname, isOnReverseStrand and reverse methods) | |
74 # @return int number of input features overlapping with queryInterval | |
75 # | |
76 def count(self, queryInterval): | |
77 if queryInterval.isOnReverseStrand(): | |
78 queryInterval.reverse() | |
79 if self._nbFeatures == 0: | |
80 self._log.warning("WARNING: empty feature list. Will return 0 overlap.") | |
81 return 0 | |
82 else: | |
83 featuresName = self._lFeaturesToCheck[0].getSeqname() | |
84 queryName = queryInterval.getSeqname() | |
85 if featuresName != queryName: | |
86 self._log.warning("WARNING: different sequence names between feature '%s' and queryInterval '%s'. Will return 0 overlap." % (featuresName, queryName)) | |
87 | |
88 lOrderedStart = [iFeature.getStart() for iFeature in self._lFeaturesToCheck] | |
89 lOrderedEnd = [iFeature.getEnd() for iFeature in self._lFeaturesToCheck] | |
90 | |
91 lOrderedStart.sort() | |
92 lOrderedEnd.sort() | |
93 | |
94 first = bisect.bisect_right(lOrderedStart, queryInterval.getEnd()) | |
95 last = bisect.bisect_right(lOrderedEnd, queryInterval.getStart()) | |
96 return self._nbFeatures - (last +(self._nbFeatures - first)) |