Mercurial > repos > urgi-team > teiso
diff TEisotools-1.1.a/commons/core/utils/Classif.py @ 16:836ce3d9d47a draft default tip
Uploaded
author | urgi-team |
---|---|
date | Thu, 21 Jul 2016 07:42:47 -0400 |
parents | 255c852351c5 |
children |
line wrap: on
line diff
--- a/TEisotools-1.1.a/commons/core/utils/Classif.py Thu Jul 21 07:36:44 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,385 +0,0 @@ -import re -import os -from collections import OrderedDict - -DWICKERCODE = { - "ClassI":"RXX", - "ClassII":"DXX", - "LTR":"RLX", - "DIRS":"RYX", - "PLE":"RPX", - "LINE":"RIX", - "SINE":"RSX", - "TIR":"DTX", - "Crypton":"DYX", - "Helitron":"DHX", - "Maverick":"DMX", - - "TIR-MITE":"DTX", - "LTR-LARD":"RLX", - "LTR-TRIM":"RLX" - } - -class Classif(object): - """ The class Classif is a object what determine a line in classif file. - """ - - def __init__(self, consensusName = "", code = "NA", outConfuseness = "", outCompleteness = "", projectName = "", isShorten = False, consensusLength = "NA", consensusStrand = "NA", consensusClass = "NA", consensusOrder = "NA", consensusSuperFam = "NA", consensusCI = "NA"): - self._consensusName = consensusName - self._confusness = outConfuseness - self._completeness = outCompleteness - self._projectName = projectName - self._isShorten = isShorten - self._consensusLength = consensusLength - self._consensusStrand = consensusStrand - self._consensusClass = consensusClass - self._consensusOrder = consensusOrder - self._consensusSuperFam = consensusSuperFam - self._consensusCI = consensusCI - self._consensusCoding = "" - self._consensusStruct = "" - self._consensusOther = "" - self._isNoChim = "" - self._hasCodingPart = False - self._hasStructPart = False - self._hasOtherPart = False - self._code = code - self._evidence = {} - - def __eq__(self, o): - if type(o) is type(self): - return self._consensusName == o._consensusName and self._code == o._code \ - and self._confusness == o._confusness and self._completeness == o._completeness - return False - - def __ne__(self, o): - return not self.__eq__(o) - - def getConsensusName(self): - return self._consensusName - - def getCode(self): - return self._code - - def getconfusness(self): - return self._confusness - - def getcompleteness(self): - return self._completeness - - def getprojectName(self): - return self._projectName - - def getConsensusLength(self): - return self._consensusLength - - def getConsensusStrand(self): - return self._consensusStrand - - def getConsensusClass(self): - return self._consensusClass - - def getConsensusOrder(self): - return self._consensusOrder - - def getConsensusSuperFamily(self): - return self._consensusSuperFam - - def getConsensusCI(self): - return str(self._consensusCI) - - def getInfoEvidence(self): - return self._evidence - - def getConsensusCoding(self): - if self._confusness == 'ok': - coding = self.writeCodingFeaturesLine(self._evidence) - else: - lOrder = self.getConsensusOrder().split("|") - coding = self.writeCodingFeaturesLine(self._evidence[lOrder[0]]) - for order in lOrder[1:]: - if self._evidence[order].keys() != ['other']: - coding = coding + "|" + self.writeCodingFeaturesLine(self._evidence[order]) - return "coding=" + coding - - def getConsensusStructure(self): - if self._confusness == 'ok': - Structure = self.writeStructFeaturesLine(self._evidence) - else: - lOrder = self.getConsensusOrder().split("|") - Structure = self.writeStructFeaturesLine(self._evidence[lOrder[0]]) - for order in lOrder[1:]: - if self._evidence[order].keys() != ['other']: - Structure = Structure + "|" + self.writeStructFeaturesLine(self._evidence[order]) - return "struct=" + Structure - - def getConsensusOther(self): - if self._confusness == 'ok': - Other = self.writeOtherFeaturesLine(self._evidence) - else: - lOrder = self.getConsensusOrder().split("|") - Other = self.writeOtherFeaturesLine(self._evidence[lOrder[0]]) - for order in lOrder[1:]: - Other = Other + "|" + self.writeOtherFeaturesLine(self._evidence[order]) - return "other=" + Other - - def setConsensusName(self, consensusName): - self._consensusName = consensusName - - def setInfoEvidence(self, evidence): - self._evidence = evidence - - def setCode(self): - self._code = self._decisionRuleForWickerCode(self.getConsensusClass(), self.getConsensusOrder()) - - def setConfusness(self, Confusness): - self._confusness = Confusness - - def setCompleteness(self, completeness): - self._completeness = completeness - - def setProjectName(self, projectName): - self._projectName = projectName - - def setConsensusLength(self, cLength): - self._consensusLength = cLength - - def setConsensusStrand(self, cStrand): - self._consensusStrand = cStrand - - def setConsensusClass(self, cClass): - self._consensusClass = cClass - - def setConsensusOrder(self, cOrder): - self._consensusOrder = cOrder - - def setConsensusSuperFamily(self, cSuperFamily): - self._consensusSuperFamily = cSuperFamily - - def setConsensusCI(self, CI): - self._consensusCI = CI - - def setConsensusCoding(self, coding): - self._consensusCoding = coding - - def setConsensusStructure(self, structure): - self._consensusStruct = structure - - def setConsensusOther(self, other): - self._consensusOther = other - - def setCodStrOthFromMessage(self, dico): - self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico) - self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico) - self._consensusOther = "other="+self.writeOtherFeaturesLine(dico) - - def setCodStrOthFromMessage2(self, dico, cOrder): - if 'rDNA' in cOrder: - cOrder = cOrder.replace('rDNA', 'RDNA') - lOrder = cOrder.split("|") - lDicoKeys = dico.keys() - if lOrder[0] not in lDicoKeys: - self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico) - self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico) - self._consensusOther = "other="+self.writeOtherFeaturesLine(dico) - else: - self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico[lDicoKeys[0]]) - self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico[lDicoKeys[0]]) - self._consensusOther = "other="+self.writeOtherFeaturesLine(dico[lDicoKeys[0]]) - if len(lDicoKeys) != 1: - for order in lDicoKeys[1:]: - if dico[order].keys() == ['other']: - self._consensusOther = self._consensusOther+"|"+self.writeOtherFeaturesLine(dico[order]) - else: - self._consensusCoding = self._consensusCoding+"|"+self.writeCodingFeaturesLine(dico[order]) - self._consensusStruct = self._consensusStruct+"|"+self.writeStructFeaturesLine(dico[order]) - self._consensusOther = self._consensusOther+"|"+self.writeOtherFeaturesLine(dico[order]) - - def createNewConsensusName(self): - pastecClassif = "%s" % self._code - if self._completeness != "": - pastecClassif += "-%s" % self._completeness - if self._confusness != "": - pastecClassif += "-%s" % self._confusness - if self._isShorten: - pattern = "%s_[a-zA-Z0-9]+_[a-zA-Z0-9]+_[a-zA-Z0-9_]+" % self._projectName - if re.match(pattern, self._consensusName) and not "%s_RS_" % self._projectName in self._consensusName: - header = self.shortenConsensusName() - header = "%s_%s" % (pastecClassif, header) - else: - header = "%s_%s" % (pastecClassif, self._consensusName) - else: - header = "%s_%s" % (pastecClassif, self._consensusName) - - return header - - def shortenConsensusName(self): - desc = self._consensusName.split(self._projectName)[1] - palignMeth = desc.split("_")[1] - clustMeth = desc.split("_")[2] - clustID = desc.split("_")[3] - lmalignMeth = desc.split("_")[4:] - if len(lmalignMeth) > 2: - malignMeth = "%s%s_%s" % (lmalignMeth[0], lmalignMeth[1], lmalignMeth[2]) - else: - malignMeth = "".join(lmalignMeth) - consensusShorten = "%s-%s-%s%s-%s" % (self._projectName, palignMeth[0], clustMeth[0], clustID, malignMeth) - - return consensusShorten - - def renameHeaderInConsensusFastaFile(self, fileName = ""): - newFileName = fileName.split(".")[0]+"New.fa" - - oldFile = open(fileName, "r") - newFile = open(newFileName, "w") - - inputLine = oldFile.readline() - while inputLine != "" : - if ">" in inputLine: - self.setConsensusName(inputLine) - outputLine = ">%s" % self.shortenConsensusName() - newFile.write(outputLine) - else: - newFile.write(inputLine) - - inputLine = oldFile.readline() - - oldFile.close() - newFile.close() - - os.system("mv %s.fa %sOld.fa" % (fileName.split(".")[0], fileName.split(".")[0])) - os.system("mv %sNew.fa %s.fa" % (fileName.split(".")[0], fileName.split(".")[0])) - os.system("rm -f %sOld.fa" % fileName.split(".")[0]) - - def writeOtherFeaturesLine(self, dEvidence): - other = "(NA)" - if dEvidence.has_key('other'): - lResults = [] - dOtherResults = dEvidence['other'] - lResultsWithCoding = self.formatCodingFeatures(dOtherResults, lResults) - lResultsFilled = self.formatStructFeatures(dOtherResults, lResultsWithCoding) - if len(lResultsFilled) != 0: - subOther = "; ".join(lResultsFilled) - other = '(%s)' % subOther - self._hasOtherPart = True - return other - - def writeCodingFeaturesLine(self, dEvidence): - lResults = [] - lResultsFilled = self.formatCodingFeatures(dEvidence, lResults) - if len(lResultsFilled) != 0: - subCoding = "; ".join(lResultsFilled) - coding = '(%s)' % subCoding - else: - coding = "(NA)" - return coding - - def writeStructFeaturesLine(self, dEvidence): - lResults = [] - lResultsFilled = self.formatStructFeatures(dEvidence, lResults) - if len(lResultsFilled) != 0: - subStruct = "; ".join(lResultsFilled) - struct = '(%s)' % subStruct - else: - struct = "(NA)" - return struct - - def formatCodingFeatures(self, dEvidence, lResults): - if dEvidence.has_key('Repbase_tbx') and dEvidence['Repbase_tbx'] != []: - lResults.append("TE_BLRtx: %s" % ", ".join(map(str, dEvidence['Repbase_tbx']))) - - if dEvidence.has_key('Repbase_bx') and dEvidence['Repbase_bx'] != []: - lResults.append("TE_BLRx: %s" % ", ".join(map(str, dEvidence['Repbase_bx']))) - - if (dEvidence.has_key('te_hmmer')) and (dEvidence['te_hmmer'] != None): - lResults.append('profiles: %s' % self.formatProfilesResults(dEvidence['te_hmmer'])) - - if dEvidence.has_key('Other_profiles'): - lResults.append('Other_profiles: %s' % self.formatProfilesResults(dEvidence['Other_profiles'])) - - if dEvidence.has_key("rDNA") and (dEvidence["rDNA"] != None): - lResults.append("rDNA_BLRn: %s" % dEvidence["rDNA"]) - - if dEvidence.has_key("HG") and (dEvidence["HG"] != None): - lResults.append("HG_BLRn: %s" % dEvidence["HG"]) - - if len(lResults) != 0: - self._hasCodingPart = True - return lResults - - def formatProfilesResults(self, dProfilesResults): - if len(dProfilesResults.keys()) == 0: - return "" - lResults = [] - for key in dProfilesResults.keys(): - iPDM = dProfilesResults[key] - cov = "%.2f%%" % iPDM.getCoverageOnSubject() - profilesResult = '%s: %s' % (key, cov) - lResults.append(profilesResult) - return ", ".join(lResults) - - def formatStructFeatures(self, dEvidence, lResults): - if dEvidence.has_key('length') and (dEvidence['length']!= None): - lResults.append('TElength: %s' % dEvidence['length']) - - if dEvidence.has_key('TR') and (dEvidence['TR'] != None): - lResults.append('TermRepeats: %s' % ", ".join(map(str, dEvidence['TR']))) - - if dEvidence.has_key('ORF') and (dEvidence['ORF'] != None): - lResults.append('ORF: %s' % ", ".join(dEvidence['ORF'])) - - if dEvidence.has_key('SSR') and (dEvidence['SSR'] != None): - lResults.append('SSR: %s' % ", ".join(dEvidence['SSR'])) - - if dEvidence.has_key('SSRCoverage') and (dEvidence['SSRCoverage'] != None) : - lResults.append('SSRCoverage=%s' % dEvidence['SSRCoverage']) - - if dEvidence.has_key('polyAtail'): - lResults.append('polyAtail') - - if dEvidence.has_key('helitronExtremities') and (dEvidence['helitronExtremities'] != None): - lResults.append('helitronExtremities: %s' % ", ".join(map(str, dEvidence['helitronExtremities']))) - if len(lResults) != 0: - self._hasStructPart = True - return lResults - - def _decisionRuleForWickerCode(self, teClass, order): - code = 'NA' - if order in DWICKERCODE.keys(): - code = DWICKERCODE[order] - elif teClass in DWICKERCODE.keys(): - code = DWICKERCODE[teClass] - elif order == "Unclassified" and teClass == "Unclassified": - code = "NA" - elif re.search("\|", order) and teClass == "Unclassified": - code = "XXX" - elif re.search("\|", order) and re.search("\|",teClass): - lClass = teClass.split("|") - for iC in lClass[1:]: - if lClass[0] != iC: - code = "XXX" - return code - code = DWICKERCODE[lClass[0]] - return code - - def renameLARDTRIMAndMITE(self): - order = self.getConsensusOrder() - order = order.replace("MITE", "TIR-MITE") - order = order.replace("LARD", "LTR-LARD") - order = order.replace("TRIM", "LTR-TRIM") - self.setConsensusOrder(order) - dEvidence = self.getInfoEvidence() - if 'LARD' in dEvidence.keys(): - dEvidence["LTR-LARD"] = dEvidence["LARD"] - del dEvidence["LARD"] - if 'TRIM' in dEvidence.keys(): - dEvidence["LTR-TRIM"] = dEvidence["TRIM"] - del dEvidence["TRIM"] - if 'MITE' in dEvidence.keys(): - dEvidence["TIR-MITE"] = dEvidence["MITE"] - del dEvidence["MITE"] - self.setInfoEvidence(dEvidence) - - - - \ No newline at end of file