diff TEisotools-1.1.a/commons/core/utils/Classif.py @ 16:836ce3d9d47a draft default tip

Uploaded
author urgi-team
date Thu, 21 Jul 2016 07:42:47 -0400
parents 255c852351c5
children
line wrap: on
line diff
--- a/TEisotools-1.1.a/commons/core/utils/Classif.py	Thu Jul 21 07:36:44 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,385 +0,0 @@
-import re
-import os
-from collections import OrderedDict
-
-DWICKERCODE = {
-               "ClassI":"RXX",
-               "ClassII":"DXX",
-               "LTR":"RLX",
-               "DIRS":"RYX",
-               "PLE":"RPX",
-               "LINE":"RIX",
-               "SINE":"RSX",
-               "TIR":"DTX",
-               "Crypton":"DYX",
-               "Helitron":"DHX",
-               "Maverick":"DMX",
-
-               "TIR-MITE":"DTX",
-               "LTR-LARD":"RLX",
-               "LTR-TRIM":"RLX"
-               }
-    
-class Classif(object):
-    """ The class Classif is a object what determine a line in classif file.
-    """
-
-    def __init__(self, consensusName = "", code = "NA", outConfuseness = "", outCompleteness = "", projectName = "", isShorten = False, consensusLength = "NA", consensusStrand = "NA", consensusClass = "NA", consensusOrder = "NA", consensusSuperFam = "NA", consensusCI = "NA"):
-        self._consensusName = consensusName
-        self._confusness = outConfuseness
-        self._completeness = outCompleteness
-        self._projectName = projectName
-        self._isShorten = isShorten
-        self._consensusLength = consensusLength
-        self._consensusStrand = consensusStrand
-        self._consensusClass = consensusClass
-        self._consensusOrder = consensusOrder
-        self._consensusSuperFam = consensusSuperFam
-        self._consensusCI = consensusCI
-        self._consensusCoding = ""
-        self._consensusStruct = ""
-        self._consensusOther = ""
-        self._isNoChim = ""
-        self._hasCodingPart = False
-        self._hasStructPart = False
-        self._hasOtherPart = False
-        self._code = code 
-        self._evidence = {}       
-
-    def __eq__(self, o):
-        if type(o) is type(self):
-            return self._consensusName == o._consensusName and self._code == o._code \
-                and self._confusness == o._confusness and self._completeness == o._completeness
-        return False
-
-    def __ne__(self, o):
-        return not self.__eq__(o)
-    
-    def getConsensusName(self):
-        return self._consensusName
-
-    def getCode(self):
-        return self._code
-
-    def getconfusness(self):
-        return self._confusness
-
-    def getcompleteness(self):
-        return self._completeness
-
-    def getprojectName(self):
-        return self._projectName
-    
-    def getConsensusLength(self):
-        return self._consensusLength
-    
-    def getConsensusStrand(self):
-        return self._consensusStrand
-    
-    def getConsensusClass(self):
-        return self._consensusClass
-    
-    def getConsensusOrder(self):
-        return self._consensusOrder
-    
-    def getConsensusSuperFamily(self):
-        return self._consensusSuperFam
-    
-    def getConsensusCI(self):
-        return str(self._consensusCI)
-    
-    def getInfoEvidence(self):
-        return self._evidence
-    
-    def getConsensusCoding(self):
-        if self._confusness == 'ok':            
-            coding = self.writeCodingFeaturesLine(self._evidence)
-        else:
-            lOrder = self.getConsensusOrder().split("|")
-            coding = self.writeCodingFeaturesLine(self._evidence[lOrder[0]])
-            for order in lOrder[1:]:
-                if self._evidence[order].keys() != ['other']:
-                    coding = coding + "|" + self.writeCodingFeaturesLine(self._evidence[order])
-        return "coding=" + coding
-    
-    def getConsensusStructure(self):
-        if self._confusness == 'ok':            
-            Structure = self.writeStructFeaturesLine(self._evidence)
-        else:
-            lOrder = self.getConsensusOrder().split("|")
-            Structure = self.writeStructFeaturesLine(self._evidence[lOrder[0]])
-            for order in lOrder[1:]:
-                if self._evidence[order].keys() != ['other']:
-                    Structure = Structure + "|" + self.writeStructFeaturesLine(self._evidence[order])
-        return "struct=" + Structure
-    
-    def getConsensusOther(self):
-        if self._confusness == 'ok':            
-            Other = self.writeOtherFeaturesLine(self._evidence)
-        else:
-            lOrder = self.getConsensusOrder().split("|")
-            Other = self.writeOtherFeaturesLine(self._evidence[lOrder[0]])
-            for order in lOrder[1:]:
-                    Other = Other + "|" + self.writeOtherFeaturesLine(self._evidence[order])
-        return "other=" + Other
-
-    def setConsensusName(self, consensusName):
-        self._consensusName = consensusName
-
-    def setInfoEvidence(self, evidence):
-        self._evidence = evidence
-
-    def setCode(self):
-        self._code = self._decisionRuleForWickerCode(self.getConsensusClass(), self.getConsensusOrder())
-
-    def setConfusness(self, Confusness):
-        self._confusness = Confusness
-
-    def setCompleteness(self, completeness):
-        self._completeness = completeness
-
-    def setProjectName(self, projectName):
-        self._projectName = projectName
-
-    def setConsensusLength(self, cLength):
-        self._consensusLength = cLength
-
-    def setConsensusStrand(self, cStrand):
-        self._consensusStrand = cStrand
-
-    def setConsensusClass(self, cClass):
-        self._consensusClass = cClass
-
-    def setConsensusOrder(self, cOrder):
-        self._consensusOrder = cOrder
-
-    def setConsensusSuperFamily(self, cSuperFamily):
-        self._consensusSuperFamily = cSuperFamily
-
-    def setConsensusCI(self, CI):
-        self._consensusCI = CI
-
-    def setConsensusCoding(self, coding):
-        self._consensusCoding = coding
-
-    def setConsensusStructure(self, structure):
-        self._consensusStruct = structure
-
-    def setConsensusOther(self, other):
-        self._consensusOther = other
-
-    def setCodStrOthFromMessage(self, dico):
-        self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico)
-        self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico)
-        self._consensusOther = "other="+self.writeOtherFeaturesLine(dico)
-
-    def setCodStrOthFromMessage2(self, dico, cOrder):
-        if 'rDNA' in cOrder:
-            cOrder = cOrder.replace('rDNA', 'RDNA')
-        lOrder = cOrder.split("|")
-        lDicoKeys = dico.keys()
-        if lOrder[0] not in lDicoKeys:
-            self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico)
-            self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico)
-            self._consensusOther = "other="+self.writeOtherFeaturesLine(dico)
-        else:
-            self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico[lDicoKeys[0]])
-            self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico[lDicoKeys[0]])
-            self._consensusOther = "other="+self.writeOtherFeaturesLine(dico[lDicoKeys[0]])
-            if len(lDicoKeys) != 1:
-                for order in lDicoKeys[1:]:
-                    if dico[order].keys() == ['other']:
-                        self._consensusOther = self._consensusOther+"|"+self.writeOtherFeaturesLine(dico[order])
-                    else:
-                        self._consensusCoding = self._consensusCoding+"|"+self.writeCodingFeaturesLine(dico[order])
-                        self._consensusStruct = self._consensusStruct+"|"+self.writeStructFeaturesLine(dico[order])
-                        self._consensusOther = self._consensusOther+"|"+self.writeOtherFeaturesLine(dico[order])
-
-    def createNewConsensusName(self):
-        pastecClassif = "%s" % self._code
-        if self._completeness != "":
-            pastecClassif += "-%s" % self._completeness
-        if self._confusness != "":
-            pastecClassif += "-%s" % self._confusness
-        if self._isShorten:
-            pattern = "%s_[a-zA-Z0-9]+_[a-zA-Z0-9]+_[a-zA-Z0-9_]+" % self._projectName
-            if re.match(pattern, self._consensusName) and not "%s_RS_" % self._projectName in self._consensusName:
-                header = self.shortenConsensusName()
-                header = "%s_%s" % (pastecClassif, header)
-            else:
-                header = "%s_%s" % (pastecClassif, self._consensusName)
-        else:
-            header = "%s_%s" % (pastecClassif, self._consensusName)
-
-        return header
-
-    def shortenConsensusName(self):
-        desc = self._consensusName.split(self._projectName)[1]
-        palignMeth = desc.split("_")[1]
-        clustMeth = desc.split("_")[2]
-        clustID = desc.split("_")[3]
-        lmalignMeth = desc.split("_")[4:]
-        if len(lmalignMeth) > 2:
-            malignMeth = "%s%s_%s" % (lmalignMeth[0], lmalignMeth[1], lmalignMeth[2])
-        else:
-            malignMeth = "".join(lmalignMeth)
-        consensusShorten = "%s-%s-%s%s-%s" % (self._projectName, palignMeth[0], clustMeth[0], clustID, malignMeth)
-
-        return consensusShorten
-
-    def renameHeaderInConsensusFastaFile(self, fileName = ""):
-        newFileName = fileName.split(".")[0]+"New.fa"
-        
-        oldFile = open(fileName, "r")
-        newFile = open(newFileName, "w")
-        
-        inputLine = oldFile.readline()
-        while inputLine != "" :
-            if ">" in inputLine:
-                self.setConsensusName(inputLine)
-                outputLine = ">%s" % self.shortenConsensusName()           
-                newFile.write(outputLine)
-            else:
-                newFile.write(inputLine)
-            
-            inputLine = oldFile.readline()
-        
-        oldFile.close()
-        newFile.close()
-        
-        os.system("mv %s.fa %sOld.fa" % (fileName.split(".")[0], fileName.split(".")[0]))
-        os.system("mv %sNew.fa %s.fa" % (fileName.split(".")[0], fileName.split(".")[0]))
-        os.system("rm -f %sOld.fa" % fileName.split(".")[0])
-
-    def writeOtherFeaturesLine(self, dEvidence):
-        other = "(NA)"
-        if dEvidence.has_key('other'):
-                lResults = []
-                dOtherResults = dEvidence['other']
-                lResultsWithCoding = self.formatCodingFeatures(dOtherResults, lResults)
-                lResultsFilled = self.formatStructFeatures(dOtherResults, lResultsWithCoding)
-                if len(lResultsFilled) != 0:
-                    subOther = "; ".join(lResultsFilled)
-                    other = '(%s)' % subOther
-                    self._hasOtherPart = True
-        return other
-
-    def writeCodingFeaturesLine(self, dEvidence):
-        lResults = []
-        lResultsFilled = self.formatCodingFeatures(dEvidence, lResults)
-        if len(lResultsFilled) != 0:
-            subCoding = "; ".join(lResultsFilled)
-            coding = '(%s)' % subCoding
-        else:
-            coding = "(NA)"
-        return coding
-
-    def writeStructFeaturesLine(self, dEvidence):
-        lResults = []
-        lResultsFilled = self.formatStructFeatures(dEvidence, lResults)
-        if len(lResultsFilled) != 0:
-            subStruct = "; ".join(lResultsFilled)
-            struct = '(%s)' % subStruct
-        else:
-            struct = "(NA)"
-        return struct
-
-    def formatCodingFeatures(self, dEvidence, lResults):
-        if dEvidence.has_key('Repbase_tbx') and dEvidence['Repbase_tbx'] != []:
-            lResults.append("TE_BLRtx: %s" % ", ".join(map(str, dEvidence['Repbase_tbx'])))
-        
-        if dEvidence.has_key('Repbase_bx') and dEvidence['Repbase_bx'] != []:
-            lResults.append("TE_BLRx: %s" % ", ".join(map(str, dEvidence['Repbase_bx'])))
-            
-        if (dEvidence.has_key('te_hmmer')) and (dEvidence['te_hmmer'] != None):
-            lResults.append('profiles: %s' % self.formatProfilesResults(dEvidence['te_hmmer']))
-            
-        if dEvidence.has_key('Other_profiles'):
-            lResults.append('Other_profiles: %s' % self.formatProfilesResults(dEvidence['Other_profiles']))
-        
-        if dEvidence.has_key("rDNA") and (dEvidence["rDNA"] != None):
-            lResults.append("rDNA_BLRn: %s" % dEvidence["rDNA"])
-        
-        if dEvidence.has_key("HG") and (dEvidence["HG"] != None):
-            lResults.append("HG_BLRn: %s" % dEvidence["HG"])
-        
-        if len(lResults) != 0:
-            self._hasCodingPart = True
-        return lResults
-
-    def formatProfilesResults(self, dProfilesResults):
-        if len(dProfilesResults.keys()) == 0:
-            return ""
-        lResults = []
-        for key in dProfilesResults.keys():
-            iPDM = dProfilesResults[key]
-            cov = "%.2f%%" % iPDM.getCoverageOnSubject()
-            profilesResult = '%s: %s' % (key, cov)
-            lResults.append(profilesResult)
-        return ", ".join(lResults)
-    
-    def formatStructFeatures(self, dEvidence, lResults):
-        if dEvidence.has_key('length') and (dEvidence['length']!= None):
-            lResults.append('TElength: %s' % dEvidence['length'])
-
-        if dEvidence.has_key('TR') and (dEvidence['TR'] != None):
-            lResults.append('TermRepeats: %s' % ", ".join(map(str, dEvidence['TR'])))    
-            
-        if dEvidence.has_key('ORF') and (dEvidence['ORF'] != None):
-            lResults.append('ORF: %s' % ", ".join(dEvidence['ORF']))        
-
-        if dEvidence.has_key('SSR') and (dEvidence['SSR'] != None):
-            lResults.append('SSR: %s' % ", ".join(dEvidence['SSR']))
-        
-        if dEvidence.has_key('SSRCoverage') and (dEvidence['SSRCoverage'] != None) :
-            lResults.append('SSRCoverage=%s' % dEvidence['SSRCoverage'])
-                                        
-        if dEvidence.has_key('polyAtail'):
-            lResults.append('polyAtail')   
-             
-        if dEvidence.has_key('helitronExtremities') and (dEvidence['helitronExtremities'] != None):
-            lResults.append('helitronExtremities: %s' % ", ".join(map(str, dEvidence['helitronExtremities'])))
-        if len(lResults) != 0:
-            self._hasStructPart = True        
-        return lResults
-    
-    def _decisionRuleForWickerCode(self, teClass, order):
-        code = 'NA'
-        if order in DWICKERCODE.keys():
-            code = DWICKERCODE[order]
-        elif teClass in DWICKERCODE.keys():
-            code = DWICKERCODE[teClass]
-        elif order == "Unclassified" and teClass == "Unclassified":
-            code = "NA"
-        elif re.search("\|", order) and teClass == "Unclassified":
-            code = "XXX"
-        elif re.search("\|", order) and re.search("\|",teClass):
-            lClass = teClass.split("|")
-            for iC in lClass[1:]:
-                if lClass[0] != iC:
-                    code = "XXX"
-                    return code
-            code = DWICKERCODE[lClass[0]]
-        return code
-    
-    def renameLARDTRIMAndMITE(self):
-        order = self.getConsensusOrder()
-        order = order.replace("MITE", "TIR-MITE")
-        order = order.replace("LARD", "LTR-LARD")
-        order = order.replace("TRIM", "LTR-TRIM")
-        self.setConsensusOrder(order)
-        dEvidence = self.getInfoEvidence()
-        if 'LARD' in dEvidence.keys():
-            dEvidence["LTR-LARD"] = dEvidence["LARD"]
-            del dEvidence["LARD"]
-        if 'TRIM' in dEvidence.keys():
-            dEvidence["LTR-TRIM"] = dEvidence["TRIM"]
-            del dEvidence["TRIM"]
-        if 'MITE' in dEvidence.keys():
-            dEvidence["TIR-MITE"] = dEvidence["MITE"]
-            del dEvidence["MITE"]
-        self.setInfoEvidence(dEvidence)
-            
-        
-        
-        
\ No newline at end of file