comparison spring_package/Modeller.py @ 39:172398348efd draft

"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
author guerler
date Fri, 22 Jan 2021 15:50:27 +0000
parents
children 06337927c198
comparison
equal deleted inserted replaced
38:80a4b98121b6 39:172398348efd
1 import subprocess
2 from os import mkdir
3 from os.path import basename, isdir
4
5 from spring_package.Alignment import Alignment
6 from spring_package.DBKit import DBKit
7 from spring_package.Energy import Energy
8 from spring_package.Molecule import Molecule
9 from spring_package.Utilities import getChain, getCrossReference, getName, getTemplates
10
11
12 def createPDB(identifier, pdbDatabase, outputName):
13 pdb = getName(identifier)
14 pdbDatabaseId = "%s.pdb" % pdb
15 return pdbDatabase.createFile(pdbDatabaseId, outputName)
16
17
18 def createMonomer(resultFile, identifier, pdbDatabase, outputName):
19 print("Building model with: %s." % identifier)
20 if not createPDB(identifier, pdbDatabase, outputName):
21 return False
22 template = Molecule(outputName)
23 pdbChain = getChain(identifier)
24 if pdbChain not in template.calpha:
25 print("Chain not found in template [%s]" % pdbChain)
26 return False
27 chain = template.calpha[pdbChain]
28 alignment = Alignment(resultFile)
29 alignment.createModel(chain)
30 template.saveChain(pdbChain, outputName)
31 try:
32 subprocess.run(["pulchra", outputName], check=True)
33 except subprocess.CalledProcessError as e:
34 raise Exception(str(e))
35 return True
36
37
38 def TMalign(fileA, fileB, tmName="temp/tmalign"):
39 try:
40 tmResult = open("%s.out" % tmName, "w")
41 subprocess.run(["tmalign", fileA, fileB, "-m", "%s.mat" % tmName], check=True, stdout=tmResult)
42 tmResult.close()
43 except subprocess.CalledProcessError as e:
44 raise Exception(str(e))
45 rotmat = list()
46 with open("%s.mat" % tmName) as file:
47 line = next(file)
48 line = next(file)
49 for i in range(3):
50 line = next(file)
51 rotmatLine = line.split()
52 rotmatLine = list(map(lambda x: float(x), rotmatLine))
53 rotmatLine = [rotmatLine[2], rotmatLine[3], rotmatLine[4], rotmatLine[1]]
54 rotmat.append(rotmatLine)
55 with open("%s.out" % tmName) as file:
56 for i in range(18):
57 line = next(file)
58 try:
59 tmscore = float(line[9:17])
60 line = next(file)
61 tmscore = max(tmscore, float(line[9:17]))
62 except Exception:
63 raise Exception("TMalign::Failed to retrieve TMscore.")
64 molecule = Molecule(fileA)
65 for atom in molecule.atoms:
66 molecule.applyMatrix(atom, rotmat)
67 return tmscore, molecule
68
69
70 def TMalignAlignment(bioMolecule, templateChain, tmName="temp/tmalign"):
71 aligned = list()
72 with open("%s.out" % tmName) as file:
73 for i in range(23):
74 line = next(file)
75 try:
76 modelAlign = line
77 line = next(file)
78 alignment = line
79 line = next(file)
80 templateAlign = line
81 except Exception:
82 raise Exception("TMalign::Failed to retrieve TMalign results.")
83 templateResidues = sorted(bioMolecule.calpha[templateChain].values(), key=lambda item: item["residueNumber"])
84 templateIndex = 0
85 for i in range(len(alignment)):
86 t = templateAlign[i]
87 if alignment[i] == ":":
88 templateResidue = templateResidues[templateIndex]
89 templateResidue["alignedResidue"] = modelAlign[i]
90 aligned.append(templateResidue)
91 if t != "-":
92 templateIndex = templateIndex + 1
93 return aligned
94
95
96 def getFrameworks(aTemplates, bTemplates, crossReference, minScore, maxTries):
97 templateHits = list()
98 for aTemplate in aTemplates:
99 if aTemplate in crossReference:
100 partners = crossReference[aTemplate]["partners"]
101 templates = crossReference[aTemplate]["templates"]
102 for pIndex in range(len(partners)):
103 pTemplate = partners[pIndex]
104 templatePair = templates[pIndex]
105 if pTemplate in bTemplates:
106 minZ = min(aTemplates[aTemplate], bTemplates[pTemplate])
107 templateHits.append(dict(templatePair=templatePair, score=minZ))
108 templateList = sorted(templateHits, key=lambda item: item["score"], reverse=True)
109 print("Found %d templates." % len(templateList))
110 for templateHit in templateList:
111 if templateHit["score"] < minScore or maxTries == 0:
112 break
113 maxTries = maxTries - 1
114 yield templateHit["templatePair"]
115
116
117 def createModel(args):
118 print("SPRING - Complex Model Creation")
119 aName = basename(args.a_hhr)
120 bName = basename(args.b_hhr)
121 print("Sequence A: %s" % aName)
122 print("Sequence B: %s" % bName)
123 aTop, aTemplates = getTemplates(args.a_hhr)
124 bTop, bTemplates = getTemplates(args.b_hhr)
125 if not isdir("temp"):
126 mkdir("temp")
127 outputName = args.output
128 pdbDatabase = DBKit(args.index, args.database)
129 crossReference = getCrossReference(args.cross)
130 interfaceEnergy = Energy()
131 if not createMonomer(args.a_hhr, aTop, pdbDatabase, "temp/monomerA.pdb"):
132 print("Warning: Failed to determine monomer model for %s." % args.a_hhr)
133 return False
134 if not createMonomer(args.b_hhr, bTop, pdbDatabase, "temp/monomerB.pdb"):
135 print("Warning: Failed to determine monomer model for %s." % args.b_hhr)
136 return False
137 maxScore = -9999
138 maxInfo = None
139 minScore = float(args.minscore)
140 maxTries = int(args.maxtries)
141 for [aTemplate, bTemplate] in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries):
142 print("Evaluating Complex Template: %s." % aTemplate)
143 templateFile = "temp/template.pdb"
144 createPDB(aTemplate, pdbDatabase, templateFile)
145 templateMolecule = Molecule(templateFile)
146 aTemplateChain = getChain(aTemplate)
147 bTemplateChain = getChain(bTemplate)
148 if aTemplateChain == bTemplateChain:
149 bTemplateChain = "%s_0" % bTemplateChain
150 print("Evaluating chain %s and %s..." % (aTemplate, bTemplate))
151 biomolFound = False
152 for biomolNumber in templateMolecule.biomol:
153 bioMolecule = templateMolecule.createUnit(biomolNumber)
154 if (len(bioMolecule.calpha.keys()) > 1
155 and aTemplateChain in bioMolecule.calpha
156 and bTemplateChain in bioMolecule.calpha):
157 print("Evaluating biomolecule %i..." % biomolNumber)
158 bioMolecule.saveChain(aTemplateChain, "temp/template_0.pdb")
159 bioMolecule.saveChain(bTemplateChain, "temp/template_1.pdb")
160 try:
161 coreScore, coreMolecule = TMalign("temp/monomerA.rebuilt.pdb", "temp/template_0.pdb")
162 coreAligned = TMalignAlignment(bioMolecule, aTemplateChain)
163 partnerScore, partnerMolecule = TMalign("temp/monomerB.rebuilt.pdb", "temp/template_1.pdb")
164 partnerAligned = TMalignAlignment(bioMolecule, bTemplateChain)
165 except Exception as e:
166 print("Warning: Failed TMalign [%s]." % bTemplateChain)
167 print(str(e))
168 continue
169 biomolFound = True
170 tmscore = min(coreScore, partnerScore)
171 print(" tmscore:\t%5.2f" % tmscore)
172 energy = -interfaceEnergy.get(coreAligned, partnerAligned)
173 print(" energy:\t%5.2f" % energy)
174 clashes = interfaceEnergy.getClashes(coreMolecule, partnerMolecule)
175 print(" clashes:\t%5.2f" % clashes)
176 springscore = tmscore + energy * args.wenergy
177 print(" springscore:\t%5.2f" % springscore)
178 if springscore > maxScore and clashes < args.maxclashes:
179 maxScore = springscore
180 maxInfo = dict(springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes)
181 coreMolecule.save(outputName, chainName="0")
182 partnerMolecule.save(outputName, chainName="1", append=True)
183 if args.showtemplate == "true":
184 bioMolecule.save(outputName, append=True)
185 if biomolFound:
186 break
187 if maxInfo is not None:
188 print("Final Model:")
189 for key in maxInfo:
190 print(" %s:\t%5.2f" % (key, maxInfo[key]))
191 print("Completed.")
192 else:
193 print("Warning: Failed to determine model.")
194 return maxInfo