Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.1.a/commons/core/coord/Align.py @ 16:836ce3d9d47a draft default tip
Uploaded
author | urgi-team |
---|---|
date | Thu, 21 Jul 2016 07:42:47 -0400 |
parents | 255c852351c5 |
children |
comparison
equal
deleted
inserted
replaced
15:255c852351c5 | 16:836ce3d9d47a |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 import time | |
32 from commons.core.coord.Map import Map | |
33 from commons.core.coord.Range import Range | |
34 | |
35 ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity) | |
36 # | |
37 class Align( object ): | |
38 | |
39 __slots__ = ("range_query", "range_subject", "e_value", "score", "identity", '__dict__') | |
40 | |
41 ## Constructor | |
42 # | |
43 # @param range_q: a Range instance for the query | |
44 # @param range_s: a Range instance for the subject | |
45 # @param e_value: E-value of the match | |
46 # @param identity: identity percentage of the match | |
47 # @param score: score of the match | |
48 # | |
49 def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0): | |
50 self.range_query = range_q | |
51 self.range_subject = range_s | |
52 self.e_value = float(e_value) | |
53 self.score = float(score) | |
54 self.identity = float(identity) | |
55 | |
56 ## Return True if the instance is empty, False otherwise | |
57 # | |
58 def isEmpty(self): | |
59 return self.range_query.isEmpty() or self.range_subject.isEmpty() | |
60 | |
61 ## Equal operator | |
62 # | |
63 def __eq__(self, o): | |
64 if type(o) is not type(self): | |
65 return False | |
66 else: | |
67 return self.range_query==o.range_query and self.range_subject==o.range_subject and \ | |
68 self.e_value==o.e_value and self.score==o.score and self.identity==o.identity | |
69 | |
70 ## Unequal operator | |
71 # | |
72 # @param o a Range instance | |
73 # | |
74 def __ne__(self, o): | |
75 return not self.__eq__(o) | |
76 | |
77 ## Convert the object into a string | |
78 # | |
79 # @note used in 'print myObject' | |
80 # | |
81 def __str__( self ): | |
82 return self.toString() | |
83 | |
84 ## Read attributes from an Align file | |
85 # | |
86 # @param fileHandler: file handler of the file being read | |
87 # @return: 1 on success, 0 at the end of the file | |
88 # | |
89 def read(self, fileHandler): | |
90 self.reset() | |
91 line = fileHandler.readline() | |
92 if line == "": | |
93 return 0 | |
94 tokens = line.split("\t") | |
95 if len(tokens) < 5: | |
96 return 0 | |
97 self.setFromTuple(tokens) | |
98 return 1 | |
99 | |
100 ## Set attributes from tuple | |
101 # | |
102 # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity) | |
103 # @note data are loaded such that the query is always on the direct strand | |
104 # | |
105 def setFromTuple( self, tuple ): | |
106 #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ??? | |
107 self.range_query = Range() | |
108 self.range_subject = Range() | |
109 if int(tuple[1]) < int(tuple[2]): | |
110 self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) ) | |
111 self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) ) | |
112 else: | |
113 self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) ) | |
114 self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) ) | |
115 self.e_value = float(tuple[6]) | |
116 self.score = float(tuple[7]) | |
117 self.identity = float(tuple[8]) | |
118 | |
119 ## Reset | |
120 # | |
121 def reset( self ): | |
122 self.range_query.reset() | |
123 self.range_subject.reset() | |
124 self.e_value = 0 | |
125 self.score = 0 | |
126 self.identity = 0 | |
127 | |
128 ## Return the attributes as a formatted string | |
129 # | |
130 def toString(self): | |
131 string = "%s" % ( self.range_query.toString() ) | |
132 string += "\t%s" % ( self.range_subject.toString() ) | |
133 string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity ) | |
134 return string | |
135 | |
136 | |
137 ## Return the attributes as a GFF-formatted string | |
138 # | |
139 def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ): | |
140 if not self.isSubjectOnDirectStrand(): | |
141 self.reverse() | |
142 string = "%s" % ( self.getQueryName() ) | |
143 string += "\t%s" % ( source ) | |
144 string += "\t%s" % ( type ) | |
145 string += "\t%s" % ( self.getQueryMin() ) | |
146 string += "\t%s" % ( self.getQueryMax() ) | |
147 string += "\t%g" % ( self.e_value ) | |
148 string += "\t%s" % ( self.getQueryStrand() ) | |
149 string += "\t%s" % ( phase ) | |
150 attributes = "" | |
151 if ID != "": | |
152 attributes += "ID=%s" % ( ID ) | |
153 else: | |
154 attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") ) | |
155 if Parent != "": | |
156 attributes += ";Parent=%s" % ( Parent ) | |
157 attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() ) | |
158 string += "\t%s" % ( attributes ) | |
159 return string | |
160 | |
161 | |
162 ## Reverse query and subject | |
163 # | |
164 def reverse(self): | |
165 self.range_query.reverse() | |
166 self.range_subject.reverse() | |
167 | |
168 ## Show the attributes | |
169 # | |
170 def show(self): | |
171 print self.toString() | |
172 | |
173 ## Write attributes into an Align file | |
174 # | |
175 # @param fileHandler: file handler of the file being filled | |
176 # | |
177 def write(self, fileHandler): | |
178 fileHandler.write("%s\n" % (self.toString())) | |
179 | |
180 ## Save attributes into an Align file | |
181 # | |
182 # @param file: name of the file being filled | |
183 # | |
184 def save(self, file): | |
185 fileHandler = open( file, "a" ) | |
186 self.write( fileHandler ) | |
187 fileHandler.close() | |
188 | |
189 ## Return the score | |
190 # | |
191 def getScore(self): | |
192 return self.score | |
193 | |
194 ## Return the identity | |
195 # | |
196 def getIdentity(self): | |
197 return self.identity | |
198 | |
199 def getEvalue(self): | |
200 return self.e_value | |
201 | |
202 ## Return the length on the query | |
203 # | |
204 def getLengthOnQuery(self): | |
205 return self.range_query.getLength() | |
206 | |
207 ## Return the name of the query | |
208 # | |
209 def getQueryName( self ): | |
210 return self.range_query.seqname | |
211 | |
212 ## Return the start of the query | |
213 # | |
214 def getQueryStart( self ): | |
215 return self.range_query.start | |
216 | |
217 ## Return the end of the query | |
218 # | |
219 def getQueryEnd( self ): | |
220 return self.range_query.end | |
221 | |
222 ## Return the min of the query | |
223 # | |
224 def getQueryMin( self ): | |
225 return self.range_query.getMin() | |
226 | |
227 ## Return the max of the query | |
228 # | |
229 def getQueryMax( self ): | |
230 return self.range_query.getMax() | |
231 | |
232 ## Return the strand of the query | |
233 # | |
234 def getQueryStrand( self ): | |
235 return self.range_query.getStrand() | |
236 | |
237 ## Return the length on the subject | |
238 # | |
239 def getLengthOnSubject(self): | |
240 return self.range_subject.getLength() | |
241 | |
242 ## Return the name of the subject | |
243 # | |
244 def getSubjectName( self ): | |
245 return self.range_subject.seqname | |
246 | |
247 ## Return the start of the subject | |
248 # | |
249 def getSubjectStart( self ): | |
250 return self.range_subject.start | |
251 | |
252 ## Return the end of the subject | |
253 # | |
254 def getSubjectEnd( self ): | |
255 return self.range_subject.end | |
256 | |
257 ## Return the min of the subject | |
258 # | |
259 def getSubjectMin( self ): | |
260 return self.range_subject.getMin() | |
261 | |
262 ## Return the max of the subject | |
263 # | |
264 def getSubjectMax( self ): | |
265 return self.range_subject.getMax() | |
266 | |
267 ## Return the strand of the subject | |
268 # | |
269 def getSubjectStrand( self ): | |
270 return self.range_subject.getStrand() | |
271 | |
272 ## Return the query as a Range instance | |
273 # | |
274 def getQueryAsRange( self ): | |
275 return self.range_query | |
276 | |
277 ## Return the subject as a Range instance | |
278 # | |
279 def getSubjectAsRange( self ): | |
280 return self.range_subject | |
281 | |
282 ## Set the name of the query | |
283 # | |
284 def setQueryName( self, name ): | |
285 self.range_query.seqname = name | |
286 | |
287 ## Set the start of the query | |
288 # | |
289 def setQueryStart( self, start ): | |
290 self.range_query.start = start | |
291 | |
292 ## Set the end of the query | |
293 # | |
294 def setQueryEnd( self, end ): | |
295 self.range_query.end = end | |
296 | |
297 ## Set the name of the subject | |
298 # | |
299 def setSubjectName( self, name ): | |
300 self.range_subject.seqname = name | |
301 | |
302 ## Set the start of the subject | |
303 # | |
304 def setSubjectStart( self, start ): | |
305 self.range_subject.start = start | |
306 | |
307 ## Set the end of the subject | |
308 # | |
309 def setSubjectEnd( self, end ): | |
310 self.range_subject.end = end | |
311 | |
312 ## Merge the instance with another Align instance | |
313 # | |
314 # @param o an Align instance | |
315 # | |
316 def merge(self, o): | |
317 if self.range_query.seqname != o.range_query.seqname \ | |
318 or self.range_subject.seqname != o.range_subject.seqname: | |
319 return | |
320 self.range_query.merge(o.range_query) | |
321 self.range_subject.merge(o.range_subject) | |
322 self.score = max(self.score,o.score) | |
323 self.e_value = min(self.e_value,o.e_value) | |
324 self.identity = max(self.identity,o.identity) | |
325 | |
326 ## Return a Map instance with the subject mapped on the query | |
327 # | |
328 def getSubjectAsMapOfQuery(self): | |
329 iMap = Map() | |
330 iMap.name = self.range_subject.seqname | |
331 iMap.seqname = self.range_query.seqname | |
332 if self.range_subject.isOnDirectStrand(): | |
333 iMap.start = self.range_query.start | |
334 iMap.end = self.range_query.end | |
335 else: | |
336 iMap.start = self.range_query.end | |
337 iMap.end = self.range_query.start | |
338 return iMap | |
339 | |
340 ## Return True if query is on direct strand | |
341 # | |
342 def isQueryOnDirectStrand( self ): | |
343 return self.range_query.isOnDirectStrand() | |
344 | |
345 ## Return True if subject is on direct strand | |
346 # | |
347 def isSubjectOnDirectStrand( self ): | |
348 return self.range_subject.isOnDirectStrand() | |
349 | |
350 ## Return True if query and subject are on the same strand, False otherwise | |
351 # | |
352 def areQrySbjOnSameStrand(self): | |
353 return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand() | |
354 | |
355 ## Return False if query and subject are on the same strand, True otherwise | |
356 # | |
357 def areQrySbjOnOppositeStrands(self): | |
358 return not self.areQrySbjOnSameStrand() | |
359 | |
360 ## Set attributes from string | |
361 # | |
362 # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity | |
363 # @param sep field separator | |
364 # | |
365 def setFromString(self, string, sep="\t"): | |
366 if string[-1] == "\n": | |
367 string = string[:-1] | |
368 self.setFromTuple( string.split(sep) ) | |
369 | |
370 ## Return a first Map instance for the query and a second for the subject | |
371 # | |
372 def getMapsOfQueryAndSubject(self): | |
373 iMapQuery = Map( name="repet", | |
374 seqname=self.range_query.seqname, | |
375 start=self.range_query.start, | |
376 end=self.range_query.end ) | |
377 iMapSubject = Map( name="repet", | |
378 seqname=self.range_subject.seqname, | |
379 start=self.range_subject.start, | |
380 end=self.range_subject.end ) | |
381 return iMapQuery, iMapSubject | |
382 | |
383 ## Write query coordinates as Map in a file | |
384 # | |
385 # @param fileHandler: file handler of the file being filled | |
386 # | |
387 def writeSubjectAsMapOfQuery( self, fileHandler ): | |
388 m = self.getSubjectAsMapOfQuery() | |
389 m.write( fileHandler ) | |
390 | |
391 ## Return a bin for fast database access | |
392 # | |
393 def getBin(self): | |
394 return self.range_query.getBin() | |
395 | |
396 ## Switch query and subject | |
397 # | |
398 def switchQuerySubject( self ): | |
399 tmpRange = self.range_query | |
400 self.range_query = self.range_subject | |
401 self.range_subject = tmpRange | |
402 if not self.isQueryOnDirectStrand(): | |
403 self.reverse() | |
404 | |
405 ## Return True if the query overlaps with the query of another Align instance, False otherwise | |
406 # | |
407 def isQueryOverlapping( self, iAlign ): | |
408 return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() ) | |
409 | |
410 ## Return True if the subject overlaps with the subject of another Align instance, False otherwise | |
411 # | |
412 def isSubjectOverlapping( self, iAlign ): | |
413 return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() ) | |
414 | |
415 ## Return True if the Align instance overlaps with another Align instance, False otherwise | |
416 # | |
417 def isOverlapping( self, iAlign ): | |
418 if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ): | |
419 return True | |
420 else: | |
421 return False | |
422 | |
423 ## Update the score | |
424 # | |
425 # @note the new score is the length on the query times the percentage of identity | |
426 # | |
427 def updateScore( self ): | |
428 newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0 | |
429 self.score = newScore |