13
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31 import time
|
|
32 from commons.core.coord.Map import Map
|
|
33 from commons.core.coord.Range import Range
|
|
34
|
|
35 ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity)
|
|
36 #
|
|
37 class Align( object ):
|
|
38
|
|
39 __slots__ = ("range_query", "range_subject", "e_value", "score", "identity", '__dict__')
|
|
40
|
|
41 ## Constructor
|
|
42 #
|
|
43 # @param range_q: a Range instance for the query
|
|
44 # @param range_s: a Range instance for the subject
|
|
45 # @param e_value: E-value of the match
|
|
46 # @param identity: identity percentage of the match
|
|
47 # @param score: score of the match
|
|
48 #
|
|
49 def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0):
|
|
50 self.range_query = range_q
|
|
51 self.range_subject = range_s
|
|
52 self.e_value = float(e_value)
|
|
53 self.score = float(score)
|
|
54 self.identity = float(identity)
|
|
55
|
|
56 ## Return True if the instance is empty, False otherwise
|
|
57 #
|
|
58 def isEmpty(self):
|
|
59 return self.range_query.isEmpty() or self.range_subject.isEmpty()
|
|
60
|
|
61 ## Equal operator
|
|
62 #
|
|
63 def __eq__(self, o):
|
|
64 if type(o) is not type(self):
|
|
65 return False
|
|
66 else:
|
|
67 return self.range_query==o.range_query and self.range_subject==o.range_subject and \
|
|
68 self.e_value==o.e_value and self.score==o.score and self.identity==o.identity
|
|
69
|
|
70 ## Unequal operator
|
|
71 #
|
|
72 # @param o a Range instance
|
|
73 #
|
|
74 def __ne__(self, o):
|
|
75 return not self.__eq__(o)
|
|
76
|
|
77 ## Convert the object into a string
|
|
78 #
|
|
79 # @note used in 'print myObject'
|
|
80 #
|
|
81 def __str__( self ):
|
|
82 return self.toString()
|
|
83
|
|
84 ## Read attributes from an Align file
|
|
85 #
|
|
86 # @param fileHandler: file handler of the file being read
|
|
87 # @return: 1 on success, 0 at the end of the file
|
|
88 #
|
|
89 def read(self, fileHandler):
|
|
90 self.reset()
|
|
91 line = fileHandler.readline()
|
|
92 if line == "":
|
|
93 return 0
|
|
94 tokens = line.split("\t")
|
|
95 if len(tokens) < 5:
|
|
96 return 0
|
|
97 self.setFromTuple(tokens)
|
|
98 return 1
|
|
99
|
|
100 ## Set attributes from tuple
|
|
101 #
|
|
102 # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)
|
|
103 # @note data are loaded such that the query is always on the direct strand
|
|
104 #
|
|
105 def setFromTuple( self, tuple ):
|
|
106 #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ???
|
|
107 self.range_query = Range()
|
|
108 self.range_subject = Range()
|
|
109 if int(tuple[1]) < int(tuple[2]):
|
|
110 self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) )
|
|
111 self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) )
|
|
112 else:
|
|
113 self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) )
|
|
114 self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) )
|
|
115 self.e_value = float(tuple[6])
|
|
116 self.score = float(tuple[7])
|
|
117 self.identity = float(tuple[8])
|
|
118
|
|
119 ## Reset
|
|
120 #
|
|
121 def reset( self ):
|
|
122 self.range_query.reset()
|
|
123 self.range_subject.reset()
|
|
124 self.e_value = 0
|
|
125 self.score = 0
|
|
126 self.identity = 0
|
|
127
|
|
128 ## Return the attributes as a formatted string
|
|
129 #
|
|
130 def toString(self):
|
|
131 string = "%s" % ( self.range_query.toString() )
|
|
132 string += "\t%s" % ( self.range_subject.toString() )
|
|
133 string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity )
|
|
134 return string
|
|
135
|
|
136
|
|
137 ## Return the attributes as a GFF-formatted string
|
|
138 #
|
|
139 def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ):
|
|
140 if not self.isSubjectOnDirectStrand():
|
|
141 self.reverse()
|
|
142 string = "%s" % ( self.getQueryName() )
|
|
143 string += "\t%s" % ( source )
|
|
144 string += "\t%s" % ( type )
|
|
145 string += "\t%s" % ( self.getQueryMin() )
|
|
146 string += "\t%s" % ( self.getQueryMax() )
|
|
147 string += "\t%g" % ( self.e_value )
|
|
148 string += "\t%s" % ( self.getQueryStrand() )
|
|
149 string += "\t%s" % ( phase )
|
|
150 attributes = ""
|
|
151 if ID != "":
|
|
152 attributes += "ID=%s" % ( ID )
|
|
153 else:
|
|
154 attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") )
|
|
155 if Parent != "":
|
|
156 attributes += ";Parent=%s" % ( Parent )
|
|
157 attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() )
|
|
158 string += "\t%s" % ( attributes )
|
|
159 return string
|
|
160
|
|
161
|
|
162 ## Reverse query and subject
|
|
163 #
|
|
164 def reverse(self):
|
|
165 self.range_query.reverse()
|
|
166 self.range_subject.reverse()
|
|
167
|
|
168 ## Show the attributes
|
|
169 #
|
|
170 def show(self):
|
|
171 print self.toString()
|
|
172
|
|
173 ## Write attributes into an Align file
|
|
174 #
|
|
175 # @param fileHandler: file handler of the file being filled
|
|
176 #
|
|
177 def write(self, fileHandler):
|
|
178 fileHandler.write("%s\n" % (self.toString()))
|
|
179
|
|
180 ## Save attributes into an Align file
|
|
181 #
|
|
182 # @param file: name of the file being filled
|
|
183 #
|
|
184 def save(self, file):
|
|
185 fileHandler = open( file, "a" )
|
|
186 self.write( fileHandler )
|
|
187 fileHandler.close()
|
|
188
|
|
189 ## Return the score
|
|
190 #
|
|
191 def getScore(self):
|
|
192 return self.score
|
|
193
|
|
194 ## Return the identity
|
|
195 #
|
|
196 def getIdentity(self):
|
|
197 return self.identity
|
|
198
|
|
199 def getEvalue(self):
|
|
200 return self.e_value
|
|
201
|
|
202 ## Return the length on the query
|
|
203 #
|
|
204 def getLengthOnQuery(self):
|
|
205 return self.range_query.getLength()
|
|
206
|
|
207 ## Return the name of the query
|
|
208 #
|
|
209 def getQueryName( self ):
|
|
210 return self.range_query.seqname
|
|
211
|
|
212 ## Return the start of the query
|
|
213 #
|
|
214 def getQueryStart( self ):
|
|
215 return self.range_query.start
|
|
216
|
|
217 ## Return the end of the query
|
|
218 #
|
|
219 def getQueryEnd( self ):
|
|
220 return self.range_query.end
|
|
221
|
|
222 ## Return the min of the query
|
|
223 #
|
|
224 def getQueryMin( self ):
|
|
225 return self.range_query.getMin()
|
|
226
|
|
227 ## Return the max of the query
|
|
228 #
|
|
229 def getQueryMax( self ):
|
|
230 return self.range_query.getMax()
|
|
231
|
|
232 ## Return the strand of the query
|
|
233 #
|
|
234 def getQueryStrand( self ):
|
|
235 return self.range_query.getStrand()
|
|
236
|
|
237 ## Return the length on the subject
|
|
238 #
|
|
239 def getLengthOnSubject(self):
|
|
240 return self.range_subject.getLength()
|
|
241
|
|
242 ## Return the name of the subject
|
|
243 #
|
|
244 def getSubjectName( self ):
|
|
245 return self.range_subject.seqname
|
|
246
|
|
247 ## Return the start of the subject
|
|
248 #
|
|
249 def getSubjectStart( self ):
|
|
250 return self.range_subject.start
|
|
251
|
|
252 ## Return the end of the subject
|
|
253 #
|
|
254 def getSubjectEnd( self ):
|
|
255 return self.range_subject.end
|
|
256
|
|
257 ## Return the min of the subject
|
|
258 #
|
|
259 def getSubjectMin( self ):
|
|
260 return self.range_subject.getMin()
|
|
261
|
|
262 ## Return the max of the subject
|
|
263 #
|
|
264 def getSubjectMax( self ):
|
|
265 return self.range_subject.getMax()
|
|
266
|
|
267 ## Return the strand of the subject
|
|
268 #
|
|
269 def getSubjectStrand( self ):
|
|
270 return self.range_subject.getStrand()
|
|
271
|
|
272 ## Return the query as a Range instance
|
|
273 #
|
|
274 def getQueryAsRange( self ):
|
|
275 return self.range_query
|
|
276
|
|
277 ## Return the subject as a Range instance
|
|
278 #
|
|
279 def getSubjectAsRange( self ):
|
|
280 return self.range_subject
|
|
281
|
|
282 ## Set the name of the query
|
|
283 #
|
|
284 def setQueryName( self, name ):
|
|
285 self.range_query.seqname = name
|
|
286
|
|
287 ## Set the start of the query
|
|
288 #
|
|
289 def setQueryStart( self, start ):
|
|
290 self.range_query.start = start
|
|
291
|
|
292 ## Set the end of the query
|
|
293 #
|
|
294 def setQueryEnd( self, end ):
|
|
295 self.range_query.end = end
|
|
296
|
|
297 ## Set the name of the subject
|
|
298 #
|
|
299 def setSubjectName( self, name ):
|
|
300 self.range_subject.seqname = name
|
|
301
|
|
302 ## Set the start of the subject
|
|
303 #
|
|
304 def setSubjectStart( self, start ):
|
|
305 self.range_subject.start = start
|
|
306
|
|
307 ## Set the end of the subject
|
|
308 #
|
|
309 def setSubjectEnd( self, end ):
|
|
310 self.range_subject.end = end
|
|
311
|
|
312 ## Merge the instance with another Align instance
|
|
313 #
|
|
314 # @param o an Align instance
|
|
315 #
|
|
316 def merge(self, o):
|
|
317 if self.range_query.seqname != o.range_query.seqname \
|
|
318 or self.range_subject.seqname != o.range_subject.seqname:
|
|
319 return
|
|
320 self.range_query.merge(o.range_query)
|
|
321 self.range_subject.merge(o.range_subject)
|
|
322 self.score = max(self.score,o.score)
|
|
323 self.e_value = min(self.e_value,o.e_value)
|
|
324 self.identity = max(self.identity,o.identity)
|
|
325
|
|
326 ## Return a Map instance with the subject mapped on the query
|
|
327 #
|
|
328 def getSubjectAsMapOfQuery(self):
|
|
329 iMap = Map()
|
|
330 iMap.name = self.range_subject.seqname
|
|
331 iMap.seqname = self.range_query.seqname
|
|
332 if self.range_subject.isOnDirectStrand():
|
|
333 iMap.start = self.range_query.start
|
|
334 iMap.end = self.range_query.end
|
|
335 else:
|
|
336 iMap.start = self.range_query.end
|
|
337 iMap.end = self.range_query.start
|
|
338 return iMap
|
|
339
|
|
340 ## Return True if query is on direct strand
|
|
341 #
|
|
342 def isQueryOnDirectStrand( self ):
|
|
343 return self.range_query.isOnDirectStrand()
|
|
344
|
|
345 ## Return True if subject is on direct strand
|
|
346 #
|
|
347 def isSubjectOnDirectStrand( self ):
|
|
348 return self.range_subject.isOnDirectStrand()
|
|
349
|
|
350 ## Return True if query and subject are on the same strand, False otherwise
|
|
351 #
|
|
352 def areQrySbjOnSameStrand(self):
|
|
353 return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand()
|
|
354
|
|
355 ## Return False if query and subject are on the same strand, True otherwise
|
|
356 #
|
|
357 def areQrySbjOnOppositeStrands(self):
|
|
358 return not self.areQrySbjOnSameStrand()
|
|
359
|
|
360 ## Set attributes from string
|
|
361 #
|
|
362 # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity
|
|
363 # @param sep field separator
|
|
364 #
|
|
365 def setFromString(self, string, sep="\t"):
|
|
366 if string[-1] == "\n":
|
|
367 string = string[:-1]
|
|
368 self.setFromTuple( string.split(sep) )
|
|
369
|
|
370 ## Return a first Map instance for the query and a second for the subject
|
|
371 #
|
|
372 def getMapsOfQueryAndSubject(self):
|
|
373 iMapQuery = Map( name="repet",
|
|
374 seqname=self.range_query.seqname,
|
|
375 start=self.range_query.start,
|
|
376 end=self.range_query.end )
|
|
377 iMapSubject = Map( name="repet",
|
|
378 seqname=self.range_subject.seqname,
|
|
379 start=self.range_subject.start,
|
|
380 end=self.range_subject.end )
|
|
381 return iMapQuery, iMapSubject
|
|
382
|
|
383 ## Write query coordinates as Map in a file
|
|
384 #
|
|
385 # @param fileHandler: file handler of the file being filled
|
|
386 #
|
|
387 def writeSubjectAsMapOfQuery( self, fileHandler ):
|
|
388 m = self.getSubjectAsMapOfQuery()
|
|
389 m.write( fileHandler )
|
|
390
|
|
391 ## Return a bin for fast database access
|
|
392 #
|
|
393 def getBin(self):
|
|
394 return self.range_query.getBin()
|
|
395
|
|
396 ## Switch query and subject
|
|
397 #
|
|
398 def switchQuerySubject( self ):
|
|
399 tmpRange = self.range_query
|
|
400 self.range_query = self.range_subject
|
|
401 self.range_subject = tmpRange
|
|
402 if not self.isQueryOnDirectStrand():
|
|
403 self.reverse()
|
|
404
|
|
405 ## Return True if the query overlaps with the query of another Align instance, False otherwise
|
|
406 #
|
|
407 def isQueryOverlapping( self, iAlign ):
|
|
408 return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() )
|
|
409
|
|
410 ## Return True if the subject overlaps with the subject of another Align instance, False otherwise
|
|
411 #
|
|
412 def isSubjectOverlapping( self, iAlign ):
|
|
413 return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() )
|
|
414
|
|
415 ## Return True if the Align instance overlaps with another Align instance, False otherwise
|
|
416 #
|
|
417 def isOverlapping( self, iAlign ):
|
|
418 if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ):
|
|
419 return True
|
|
420 else:
|
|
421 return False
|
|
422
|
|
423 ## Update the score
|
|
424 #
|
|
425 # @note the new score is the length on the query times the percentage of identity
|
|
426 #
|
|
427 def updateScore( self ):
|
|
428 newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0
|
|
429 self.score = newScore
|