Mercurial > repos > urgi-team > teiso
diff TEisotools-1.1.a/commons/core/coord/Align.py @ 16:836ce3d9d47a draft default tip
Uploaded
author | urgi-team |
---|---|
date | Thu, 21 Jul 2016 07:42:47 -0400 |
parents | 255c852351c5 |
children |
line wrap: on
line diff
--- a/TEisotools-1.1.a/commons/core/coord/Align.py Thu Jul 21 07:36:44 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,429 +0,0 @@ -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -import time -from commons.core.coord.Map import Map -from commons.core.coord.Range import Range - -## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity) -# -class Align( object ): - - __slots__ = ("range_query", "range_subject", "e_value", "score", "identity", '__dict__') - - ## Constructor - # - # @param range_q: a Range instance for the query - # @param range_s: a Range instance for the subject - # @param e_value: E-value of the match - # @param identity: identity percentage of the match - # @param score: score of the match - # - def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0): - self.range_query = range_q - self.range_subject = range_s - self.e_value = float(e_value) - self.score = float(score) - self.identity = float(identity) - - ## Return True if the instance is empty, False otherwise - # - def isEmpty(self): - return self.range_query.isEmpty() or self.range_subject.isEmpty() - - ## Equal operator - # - def __eq__(self, o): - if type(o) is not type(self): - return False - else: - return self.range_query==o.range_query and self.range_subject==o.range_subject and \ - self.e_value==o.e_value and self.score==o.score and self.identity==o.identity - - ## Unequal operator - # - # @param o a Range instance - # - def __ne__(self, o): - return not self.__eq__(o) - - ## Convert the object into a string - # - # @note used in 'print myObject' - # - def __str__( self ): - return self.toString() - - ## Read attributes from an Align file - # - # @param fileHandler: file handler of the file being read - # @return: 1 on success, 0 at the end of the file - # - def read(self, fileHandler): - self.reset() - line = fileHandler.readline() - if line == "": - return 0 - tokens = line.split("\t") - if len(tokens) < 5: - return 0 - self.setFromTuple(tokens) - return 1 - - ## Set attributes from tuple - # - # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity) - # @note data are loaded such that the query is always on the direct strand - # - def setFromTuple( self, tuple ): - #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ??? - self.range_query = Range() - self.range_subject = Range() - if int(tuple[1]) < int(tuple[2]): - self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) ) - self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) ) - else: - self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) ) - self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) ) - self.e_value = float(tuple[6]) - self.score = float(tuple[7]) - self.identity = float(tuple[8]) - - ## Reset - # - def reset( self ): - self.range_query.reset() - self.range_subject.reset() - self.e_value = 0 - self.score = 0 - self.identity = 0 - - ## Return the attributes as a formatted string - # - def toString(self): - string = "%s" % ( self.range_query.toString() ) - string += "\t%s" % ( self.range_subject.toString() ) - string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity ) - return string - - - ## Return the attributes as a GFF-formatted string - # - def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ): - if not self.isSubjectOnDirectStrand(): - self.reverse() - string = "%s" % ( self.getQueryName() ) - string += "\t%s" % ( source ) - string += "\t%s" % ( type ) - string += "\t%s" % ( self.getQueryMin() ) - string += "\t%s" % ( self.getQueryMax() ) - string += "\t%g" % ( self.e_value ) - string += "\t%s" % ( self.getQueryStrand() ) - string += "\t%s" % ( phase ) - attributes = "" - if ID != "": - attributes += "ID=%s" % ( ID ) - else: - attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") ) - if Parent != "": - attributes += ";Parent=%s" % ( Parent ) - attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() ) - string += "\t%s" % ( attributes ) - return string - - - ## Reverse query and subject - # - def reverse(self): - self.range_query.reverse() - self.range_subject.reverse() - - ## Show the attributes - # - def show(self): - print self.toString() - - ## Write attributes into an Align file - # - # @param fileHandler: file handler of the file being filled - # - def write(self, fileHandler): - fileHandler.write("%s\n" % (self.toString())) - - ## Save attributes into an Align file - # - # @param file: name of the file being filled - # - def save(self, file): - fileHandler = open( file, "a" ) - self.write( fileHandler ) - fileHandler.close() - - ## Return the score - # - def getScore(self): - return self.score - - ## Return the identity - # - def getIdentity(self): - return self.identity - - def getEvalue(self): - return self.e_value - - ## Return the length on the query - # - def getLengthOnQuery(self): - return self.range_query.getLength() - - ## Return the name of the query - # - def getQueryName( self ): - return self.range_query.seqname - - ## Return the start of the query - # - def getQueryStart( self ): - return self.range_query.start - - ## Return the end of the query - # - def getQueryEnd( self ): - return self.range_query.end - - ## Return the min of the query - # - def getQueryMin( self ): - return self.range_query.getMin() - - ## Return the max of the query - # - def getQueryMax( self ): - return self.range_query.getMax() - - ## Return the strand of the query - # - def getQueryStrand( self ): - return self.range_query.getStrand() - - ## Return the length on the subject - # - def getLengthOnSubject(self): - return self.range_subject.getLength() - - ## Return the name of the subject - # - def getSubjectName( self ): - return self.range_subject.seqname - - ## Return the start of the subject - # - def getSubjectStart( self ): - return self.range_subject.start - - ## Return the end of the subject - # - def getSubjectEnd( self ): - return self.range_subject.end - - ## Return the min of the subject - # - def getSubjectMin( self ): - return self.range_subject.getMin() - - ## Return the max of the subject - # - def getSubjectMax( self ): - return self.range_subject.getMax() - - ## Return the strand of the subject - # - def getSubjectStrand( self ): - return self.range_subject.getStrand() - - ## Return the query as a Range instance - # - def getQueryAsRange( self ): - return self.range_query - - ## Return the subject as a Range instance - # - def getSubjectAsRange( self ): - return self.range_subject - - ## Set the name of the query - # - def setQueryName( self, name ): - self.range_query.seqname = name - - ## Set the start of the query - # - def setQueryStart( self, start ): - self.range_query.start = start - - ## Set the end of the query - # - def setQueryEnd( self, end ): - self.range_query.end = end - - ## Set the name of the subject - # - def setSubjectName( self, name ): - self.range_subject.seqname = name - - ## Set the start of the subject - # - def setSubjectStart( self, start ): - self.range_subject.start = start - - ## Set the end of the subject - # - def setSubjectEnd( self, end ): - self.range_subject.end = end - - ## Merge the instance with another Align instance - # - # @param o an Align instance - # - def merge(self, o): - if self.range_query.seqname != o.range_query.seqname \ - or self.range_subject.seqname != o.range_subject.seqname: - return - self.range_query.merge(o.range_query) - self.range_subject.merge(o.range_subject) - self.score = max(self.score,o.score) - self.e_value = min(self.e_value,o.e_value) - self.identity = max(self.identity,o.identity) - - ## Return a Map instance with the subject mapped on the query - # - def getSubjectAsMapOfQuery(self): - iMap = Map() - iMap.name = self.range_subject.seqname - iMap.seqname = self.range_query.seqname - if self.range_subject.isOnDirectStrand(): - iMap.start = self.range_query.start - iMap.end = self.range_query.end - else: - iMap.start = self.range_query.end - iMap.end = self.range_query.start - return iMap - - ## Return True if query is on direct strand - # - def isQueryOnDirectStrand( self ): - return self.range_query.isOnDirectStrand() - - ## Return True if subject is on direct strand - # - def isSubjectOnDirectStrand( self ): - return self.range_subject.isOnDirectStrand() - - ## Return True if query and subject are on the same strand, False otherwise - # - def areQrySbjOnSameStrand(self): - return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand() - - ## Return False if query and subject are on the same strand, True otherwise - # - def areQrySbjOnOppositeStrands(self): - return not self.areQrySbjOnSameStrand() - - ## Set attributes from string - # - # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity - # @param sep field separator - # - def setFromString(self, string, sep="\t"): - if string[-1] == "\n": - string = string[:-1] - self.setFromTuple( string.split(sep) ) - - ## Return a first Map instance for the query and a second for the subject - # - def getMapsOfQueryAndSubject(self): - iMapQuery = Map( name="repet", - seqname=self.range_query.seqname, - start=self.range_query.start, - end=self.range_query.end ) - iMapSubject = Map( name="repet", - seqname=self.range_subject.seqname, - start=self.range_subject.start, - end=self.range_subject.end ) - return iMapQuery, iMapSubject - - ## Write query coordinates as Map in a file - # - # @param fileHandler: file handler of the file being filled - # - def writeSubjectAsMapOfQuery( self, fileHandler ): - m = self.getSubjectAsMapOfQuery() - m.write( fileHandler ) - - ## Return a bin for fast database access - # - def getBin(self): - return self.range_query.getBin() - - ## Switch query and subject - # - def switchQuerySubject( self ): - tmpRange = self.range_query - self.range_query = self.range_subject - self.range_subject = tmpRange - if not self.isQueryOnDirectStrand(): - self.reverse() - - ## Return True if the query overlaps with the query of another Align instance, False otherwise - # - def isQueryOverlapping( self, iAlign ): - return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() ) - - ## Return True if the subject overlaps with the subject of another Align instance, False otherwise - # - def isSubjectOverlapping( self, iAlign ): - return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() ) - - ## Return True if the Align instance overlaps with another Align instance, False otherwise - # - def isOverlapping( self, iAlign ): - if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ): - return True - else: - return False - - ## Update the score - # - # @note the new score is the length on the query times the percentage of identity - # - def updateScore( self ): - newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0 - self.score = newScore