comparison TEisotools-1.1.a/commons/core/coord/Align.py @ 16:836ce3d9d47a draft default tip

Uploaded
author urgi-team
date Thu, 21 Jul 2016 07:42:47 -0400
parents 255c852351c5
children
comparison
equal deleted inserted replaced
15:255c852351c5 16:836ce3d9d47a
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31 import time
32 from commons.core.coord.Map import Map
33 from commons.core.coord.Range import Range
34
35 ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity)
36 #
37 class Align( object ):
38
39 __slots__ = ("range_query", "range_subject", "e_value", "score", "identity", '__dict__')
40
41 ## Constructor
42 #
43 # @param range_q: a Range instance for the query
44 # @param range_s: a Range instance for the subject
45 # @param e_value: E-value of the match
46 # @param identity: identity percentage of the match
47 # @param score: score of the match
48 #
49 def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0):
50 self.range_query = range_q
51 self.range_subject = range_s
52 self.e_value = float(e_value)
53 self.score = float(score)
54 self.identity = float(identity)
55
56 ## Return True if the instance is empty, False otherwise
57 #
58 def isEmpty(self):
59 return self.range_query.isEmpty() or self.range_subject.isEmpty()
60
61 ## Equal operator
62 #
63 def __eq__(self, o):
64 if type(o) is not type(self):
65 return False
66 else:
67 return self.range_query==o.range_query and self.range_subject==o.range_subject and \
68 self.e_value==o.e_value and self.score==o.score and self.identity==o.identity
69
70 ## Unequal operator
71 #
72 # @param o a Range instance
73 #
74 def __ne__(self, o):
75 return not self.__eq__(o)
76
77 ## Convert the object into a string
78 #
79 # @note used in 'print myObject'
80 #
81 def __str__( self ):
82 return self.toString()
83
84 ## Read attributes from an Align file
85 #
86 # @param fileHandler: file handler of the file being read
87 # @return: 1 on success, 0 at the end of the file
88 #
89 def read(self, fileHandler):
90 self.reset()
91 line = fileHandler.readline()
92 if line == "":
93 return 0
94 tokens = line.split("\t")
95 if len(tokens) < 5:
96 return 0
97 self.setFromTuple(tokens)
98 return 1
99
100 ## Set attributes from tuple
101 #
102 # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)
103 # @note data are loaded such that the query is always on the direct strand
104 #
105 def setFromTuple( self, tuple ):
106 #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ???
107 self.range_query = Range()
108 self.range_subject = Range()
109 if int(tuple[1]) < int(tuple[2]):
110 self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) )
111 self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) )
112 else:
113 self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) )
114 self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) )
115 self.e_value = float(tuple[6])
116 self.score = float(tuple[7])
117 self.identity = float(tuple[8])
118
119 ## Reset
120 #
121 def reset( self ):
122 self.range_query.reset()
123 self.range_subject.reset()
124 self.e_value = 0
125 self.score = 0
126 self.identity = 0
127
128 ## Return the attributes as a formatted string
129 #
130 def toString(self):
131 string = "%s" % ( self.range_query.toString() )
132 string += "\t%s" % ( self.range_subject.toString() )
133 string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity )
134 return string
135
136
137 ## Return the attributes as a GFF-formatted string
138 #
139 def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ):
140 if not self.isSubjectOnDirectStrand():
141 self.reverse()
142 string = "%s" % ( self.getQueryName() )
143 string += "\t%s" % ( source )
144 string += "\t%s" % ( type )
145 string += "\t%s" % ( self.getQueryMin() )
146 string += "\t%s" % ( self.getQueryMax() )
147 string += "\t%g" % ( self.e_value )
148 string += "\t%s" % ( self.getQueryStrand() )
149 string += "\t%s" % ( phase )
150 attributes = ""
151 if ID != "":
152 attributes += "ID=%s" % ( ID )
153 else:
154 attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") )
155 if Parent != "":
156 attributes += ";Parent=%s" % ( Parent )
157 attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() )
158 string += "\t%s" % ( attributes )
159 return string
160
161
162 ## Reverse query and subject
163 #
164 def reverse(self):
165 self.range_query.reverse()
166 self.range_subject.reverse()
167
168 ## Show the attributes
169 #
170 def show(self):
171 print self.toString()
172
173 ## Write attributes into an Align file
174 #
175 # @param fileHandler: file handler of the file being filled
176 #
177 def write(self, fileHandler):
178 fileHandler.write("%s\n" % (self.toString()))
179
180 ## Save attributes into an Align file
181 #
182 # @param file: name of the file being filled
183 #
184 def save(self, file):
185 fileHandler = open( file, "a" )
186 self.write( fileHandler )
187 fileHandler.close()
188
189 ## Return the score
190 #
191 def getScore(self):
192 return self.score
193
194 ## Return the identity
195 #
196 def getIdentity(self):
197 return self.identity
198
199 def getEvalue(self):
200 return self.e_value
201
202 ## Return the length on the query
203 #
204 def getLengthOnQuery(self):
205 return self.range_query.getLength()
206
207 ## Return the name of the query
208 #
209 def getQueryName( self ):
210 return self.range_query.seqname
211
212 ## Return the start of the query
213 #
214 def getQueryStart( self ):
215 return self.range_query.start
216
217 ## Return the end of the query
218 #
219 def getQueryEnd( self ):
220 return self.range_query.end
221
222 ## Return the min of the query
223 #
224 def getQueryMin( self ):
225 return self.range_query.getMin()
226
227 ## Return the max of the query
228 #
229 def getQueryMax( self ):
230 return self.range_query.getMax()
231
232 ## Return the strand of the query
233 #
234 def getQueryStrand( self ):
235 return self.range_query.getStrand()
236
237 ## Return the length on the subject
238 #
239 def getLengthOnSubject(self):
240 return self.range_subject.getLength()
241
242 ## Return the name of the subject
243 #
244 def getSubjectName( self ):
245 return self.range_subject.seqname
246
247 ## Return the start of the subject
248 #
249 def getSubjectStart( self ):
250 return self.range_subject.start
251
252 ## Return the end of the subject
253 #
254 def getSubjectEnd( self ):
255 return self.range_subject.end
256
257 ## Return the min of the subject
258 #
259 def getSubjectMin( self ):
260 return self.range_subject.getMin()
261
262 ## Return the max of the subject
263 #
264 def getSubjectMax( self ):
265 return self.range_subject.getMax()
266
267 ## Return the strand of the subject
268 #
269 def getSubjectStrand( self ):
270 return self.range_subject.getStrand()
271
272 ## Return the query as a Range instance
273 #
274 def getQueryAsRange( self ):
275 return self.range_query
276
277 ## Return the subject as a Range instance
278 #
279 def getSubjectAsRange( self ):
280 return self.range_subject
281
282 ## Set the name of the query
283 #
284 def setQueryName( self, name ):
285 self.range_query.seqname = name
286
287 ## Set the start of the query
288 #
289 def setQueryStart( self, start ):
290 self.range_query.start = start
291
292 ## Set the end of the query
293 #
294 def setQueryEnd( self, end ):
295 self.range_query.end = end
296
297 ## Set the name of the subject
298 #
299 def setSubjectName( self, name ):
300 self.range_subject.seqname = name
301
302 ## Set the start of the subject
303 #
304 def setSubjectStart( self, start ):
305 self.range_subject.start = start
306
307 ## Set the end of the subject
308 #
309 def setSubjectEnd( self, end ):
310 self.range_subject.end = end
311
312 ## Merge the instance with another Align instance
313 #
314 # @param o an Align instance
315 #
316 def merge(self, o):
317 if self.range_query.seqname != o.range_query.seqname \
318 or self.range_subject.seqname != o.range_subject.seqname:
319 return
320 self.range_query.merge(o.range_query)
321 self.range_subject.merge(o.range_subject)
322 self.score = max(self.score,o.score)
323 self.e_value = min(self.e_value,o.e_value)
324 self.identity = max(self.identity,o.identity)
325
326 ## Return a Map instance with the subject mapped on the query
327 #
328 def getSubjectAsMapOfQuery(self):
329 iMap = Map()
330 iMap.name = self.range_subject.seqname
331 iMap.seqname = self.range_query.seqname
332 if self.range_subject.isOnDirectStrand():
333 iMap.start = self.range_query.start
334 iMap.end = self.range_query.end
335 else:
336 iMap.start = self.range_query.end
337 iMap.end = self.range_query.start
338 return iMap
339
340 ## Return True if query is on direct strand
341 #
342 def isQueryOnDirectStrand( self ):
343 return self.range_query.isOnDirectStrand()
344
345 ## Return True if subject is on direct strand
346 #
347 def isSubjectOnDirectStrand( self ):
348 return self.range_subject.isOnDirectStrand()
349
350 ## Return True if query and subject are on the same strand, False otherwise
351 #
352 def areQrySbjOnSameStrand(self):
353 return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand()
354
355 ## Return False if query and subject are on the same strand, True otherwise
356 #
357 def areQrySbjOnOppositeStrands(self):
358 return not self.areQrySbjOnSameStrand()
359
360 ## Set attributes from string
361 #
362 # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity
363 # @param sep field separator
364 #
365 def setFromString(self, string, sep="\t"):
366 if string[-1] == "\n":
367 string = string[:-1]
368 self.setFromTuple( string.split(sep) )
369
370 ## Return a first Map instance for the query and a second for the subject
371 #
372 def getMapsOfQueryAndSubject(self):
373 iMapQuery = Map( name="repet",
374 seqname=self.range_query.seqname,
375 start=self.range_query.start,
376 end=self.range_query.end )
377 iMapSubject = Map( name="repet",
378 seqname=self.range_subject.seqname,
379 start=self.range_subject.start,
380 end=self.range_subject.end )
381 return iMapQuery, iMapSubject
382
383 ## Write query coordinates as Map in a file
384 #
385 # @param fileHandler: file handler of the file being filled
386 #
387 def writeSubjectAsMapOfQuery( self, fileHandler ):
388 m = self.getSubjectAsMapOfQuery()
389 m.write( fileHandler )
390
391 ## Return a bin for fast database access
392 #
393 def getBin(self):
394 return self.range_query.getBin()
395
396 ## Switch query and subject
397 #
398 def switchQuerySubject( self ):
399 tmpRange = self.range_query
400 self.range_query = self.range_subject
401 self.range_subject = tmpRange
402 if not self.isQueryOnDirectStrand():
403 self.reverse()
404
405 ## Return True if the query overlaps with the query of another Align instance, False otherwise
406 #
407 def isQueryOverlapping( self, iAlign ):
408 return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() )
409
410 ## Return True if the subject overlaps with the subject of another Align instance, False otherwise
411 #
412 def isSubjectOverlapping( self, iAlign ):
413 return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() )
414
415 ## Return True if the Align instance overlaps with another Align instance, False otherwise
416 #
417 def isOverlapping( self, iAlign ):
418 if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ):
419 return True
420 else:
421 return False
422
423 ## Update the score
424 #
425 # @note the new score is the length on the query times the percentage of identity
426 #
427 def updateScore( self ):
428 newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0
429 self.score = newScore