comparison commons/core/coord/Align.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31 import time
32
33 from commons.core.coord.Range import Range
34 from commons.core.coord.Map import Map
35
36
37 ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity)
38 #
39 class Align( object ):
40
41 ## Constructor
42 #
43 # @param range_q: a Range instance for the query
44 # @param range_s: a Range instance for the subject
45 # @param e_value: E-value of the match
46 # @param identity: identity percentage of the match
47 # @param score: score of the match
48 #
49 def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0):
50 self.range_query = range_q
51 self.range_subject = range_s
52 self.e_value = float(e_value)
53 self.score = float(score)
54 self.identity = float(identity)
55
56 ## Return True if the instance is empty, False otherwise
57 #
58 def isEmpty(self):
59 return self.range_query.isEmpty() or self.range_subject.isEmpty()
60
61 ## Equal operator
62 #
63 def __eq__(self, o):
64 if self.range_query==o.range_query and self.range_subject==o.range_subject and \
65 self.e_value==o.e_value and self.score==o.score and self.identity==o.identity:
66 return True
67 return False
68
69 ## Unequal operator
70 #
71 # @param o a Range instance
72 #
73 def __ne__(self, o):
74 return not self.__eq__(o)
75
76 ## Convert the object into a string
77 #
78 # @note used in 'print myObject'
79 #
80 def __str__( self ):
81 return self.toString()
82
83 ## Read attributes from an Align file
84 #
85 # @param fileHandler: file handler of the file being read
86 # @return: 1 on success, 0 at the end of the file
87 #
88 def read(self, fileHandler):
89 self.reset()
90 line = fileHandler.readline()
91 if line == "":
92 return 0
93 tokens = line.split("\t")
94 if len(tokens) < len(self.__dict__.keys()):
95 return 0
96 self.setFromTuple(tokens)
97 return 1
98
99 ## Set attributes from tuple
100 #
101 # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)
102 # @note data are loaded such that the query is always on the direct strand
103 #
104 def setFromTuple( self, tuple ):
105 #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ???
106 self.range_query = Range()
107 self.range_subject = Range()
108 if int(tuple[1]) < int(tuple[2]):
109 self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) )
110 self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) )
111 else:
112 self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) )
113 self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) )
114 self.e_value = float(tuple[6])
115 self.score = float(tuple[7])
116 self.identity = float(tuple[8])
117
118 ## Reset
119 #
120 def reset( self ):
121 self.range_query.reset()
122 self.range_subject.reset()
123 self.e_value = 0
124 self.score = 0
125 self.identity = 0
126
127 ## Return the attributes as a formatted string
128 #
129 def toString(self):
130 string = "%s" % ( self.range_query.toString() )
131 string += "\t%s" % ( self.range_subject.toString() )
132 string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity )
133 return string
134
135
136 ## Return the attributes as a GFF-formatted string
137 #
138 def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ):
139 if not self.isSubjectOnDirectStrand():
140 self.reverse()
141 string = "%s" % ( self.getQueryName() )
142 string += "\t%s" % ( source )
143 string += "\t%s" % ( type )
144 string += "\t%s" % ( self.getQueryMin() )
145 string += "\t%s" % ( self.getQueryMax() )
146 string += "\t%g" % ( self.e_value )
147 string += "\t%s" % ( self.getQueryStrand() )
148 string += "\t%s" % ( phase )
149 attributes = ""
150 if ID != "":
151 attributes += "ID=%s" % ( ID )
152 else:
153 attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") )
154 if Parent != "":
155 attributes += ";Parent=%s" % ( Parent )
156 attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() )
157 string += "\t%s" % ( attributes )
158 return string
159
160
161 ## Reverse query and subject
162 #
163 def reverse(self):
164 self.range_query.reverse()
165 self.range_subject.reverse()
166
167 ## Show the attributes
168 #
169 def show(self):
170 print self.toString()
171
172 ## Write attributes into an Align file
173 #
174 # @param fileHandler: file handler of the file being filled
175 #
176 def write(self, fileHandler):
177 fileHandler.write("%s\n" % (self.toString()))
178
179 ## Save attributes into an Align file
180 #
181 # @param file: name of the file being filled
182 #
183 def save(self, file):
184 fileHandler = open( file, "a" )
185 self.write( fileHandler )
186 fileHandler.close()
187
188 ## Return the score
189 #
190 def getScore(self):
191 return self.score
192
193 ## Return the identity
194 #
195 def getIdentity(self):
196 return self.identity
197
198 def getEvalue(self):
199 return self.e_value
200
201 ## Return the length on the query
202 #
203 def getLengthOnQuery(self):
204 return self.range_query.getLength()
205
206 ## Return the name of the query
207 #
208 def getQueryName( self ):
209 return self.range_query.seqname
210
211 ## Return the start of the query
212 #
213 def getQueryStart( self ):
214 return self.range_query.start
215
216 ## Return the end of the query
217 #
218 def getQueryEnd( self ):
219 return self.range_query.end
220
221 ## Return the min of the query
222 #
223 def getQueryMin( self ):
224 return self.range_query.getMin()
225
226 ## Return the max of the query
227 #
228 def getQueryMax( self ):
229 return self.range_query.getMax()
230
231 ## Return the strand of the query
232 #
233 def getQueryStrand( self ):
234 return self.range_query.getStrand()
235
236 ## Return the length on the subject
237 #
238 def getLengthOnSubject(self):
239 return self.range_subject.getLength()
240
241 ## Return the name of the subject
242 #
243 def getSubjectName( self ):
244 return self.range_subject.seqname
245
246 ## Return the start of the subject
247 #
248 def getSubjectStart( self ):
249 return self.range_subject.start
250
251 ## Return the end of the subject
252 #
253 def getSubjectEnd( self ):
254 return self.range_subject.end
255
256 ## Return the min of the subject
257 #
258 def getSubjectMin( self ):
259 return self.range_subject.getMin()
260
261 ## Return the max of the subject
262 #
263 def getSubjectMax( self ):
264 return self.range_subject.getMax()
265
266 ## Return the strand of the subject
267 #
268 def getSubjectStrand( self ):
269 return self.range_subject.getStrand()
270
271 ## Return the query as a Range instance
272 #
273 def getQueryAsRange( self ):
274 return self.range_query
275
276 ## Return the subject as a Range instance
277 #
278 def getSubjectAsRange( self ):
279 return self.range_subject
280
281 ## Set the name of the query
282 #
283 def setQueryName( self, name ):
284 self.range_query.seqname = name
285
286 ## Set the start of the query
287 #
288 def setQueryStart( self, start ):
289 self.range_query.start = start
290
291 ## Set the end of the query
292 #
293 def setQueryEnd( self, end ):
294 self.range_query.end = end
295
296 ## Set the name of the subject
297 #
298 def setSubjectName( self, name ):
299 self.range_subject.seqname = name
300
301 ## Set the start of the subject
302 #
303 def setSubjectStart( self, start ):
304 self.range_subject.start = start
305
306 ## Set the end of the subject
307 #
308 def setSubjectEnd( self, end ):
309 self.range_subject.end = end
310
311 ## Merge the instance with another Align instance
312 #
313 # @param o an Align instance
314 #
315 def merge(self, o):
316 if self.range_query.seqname != o.range_query.seqname \
317 or self.range_subject.seqname != o.range_subject.seqname:
318 return
319 self.range_query.merge(o.range_query)
320 self.range_subject.merge(o.range_subject)
321 self.score = max(self.score,o.score)
322 self.e_value = min(self.e_value,o.e_value)
323 self.identity = max(self.identity,o.identity)
324
325 ## Return a Map instance with the subject mapped on the query
326 #
327 def getSubjectAsMapOfQuery(self):
328 iMap = Map()
329 iMap.name = self.range_subject.seqname
330 iMap.seqname = self.range_query.seqname
331 if self.range_subject.isOnDirectStrand():
332 iMap.start = self.range_query.start
333 iMap.end = self.range_query.end
334 else:
335 iMap.start = self.range_query.end
336 iMap.end = self.range_query.start
337 return iMap
338
339 ## Return True if query is on direct strand
340 #
341 def isQueryOnDirectStrand( self ):
342 return self.range_query.isOnDirectStrand()
343
344 ## Return True if subject is on direct strand
345 #
346 def isSubjectOnDirectStrand( self ):
347 return self.range_subject.isOnDirectStrand()
348
349 ## Return True if query and subject are on the same strand, False otherwise
350 #
351 def areQrySbjOnSameStrand(self):
352 return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand()
353
354 ## Return False if query and subject are on the same strand, True otherwise
355 #
356 def areQrySbjOnOppositeStrands(self):
357 return not self.areQrySbjOnSameStrand()
358
359 ## Set attributes from string
360 #
361 # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity
362 # @param sep field separator
363 #
364 def setFromString(self, string, sep="\t"):
365 if string[-1] == "\n":
366 string = string[:-1]
367 self.setFromTuple( string.split(sep) )
368
369 ## Return a first Map instance for the query and a second for the subject
370 #
371 def getMapsOfQueryAndSubject(self):
372 iMapQuery = Map( name="repet",
373 seqname=self.range_query.seqname,
374 start=self.range_query.start,
375 end=self.range_query.end )
376 iMapSubject = Map( name="repet",
377 seqname=self.range_subject.seqname,
378 start=self.range_subject.start,
379 end=self.range_subject.end )
380 return iMapQuery, iMapSubject
381
382 ## Write query coordinates as Map in a file
383 #
384 # @param fileHandler: file handler of the file being filled
385 #
386 def writeSubjectAsMapOfQuery( self, fileHandler ):
387 m = self.getSubjectAsMapOfQuery()
388 m.write( fileHandler )
389
390 ## Return a bin for fast database access
391 #
392 def getBin(self):
393 return self.range_query.getBin()
394
395 ## Switch query and subject
396 #
397 def switchQuerySubject( self ):
398 tmpRange = self.range_query
399 self.range_query = self.range_subject
400 self.range_subject = tmpRange
401 if not self.isQueryOnDirectStrand():
402 self.reverse()
403
404 ## Return True if the query overlaps with the query of another Align instance, False otherwise
405 #
406 def isQueryOverlapping( self, iAlign ):
407 return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() )
408
409 ## Return True if the subject overlaps with the subject of another Align instance, False otherwise
410 #
411 def isSubjectOverlapping( self, iAlign ):
412 return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() )
413
414 ## Return True if the Align instance overlaps with another Align instance, False otherwise
415 #
416 def isOverlapping( self, iAlign ):
417 if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ):
418 return True
419 else:
420 return False
421
422 ## Update the score
423 #
424 # @note the new score is the length on the query times the percentage of identity
425 #
426 def updateScore( self ):
427 newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0
428 self.score = newScore