annotate spring_model.py @ 24:802daf2993b0 draft

"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3-dirty"
author guerler
date Thu, 29 Oct 2020 13:09:57 +0000
parents c790d25086dc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
1 #! /usr/bin/env python3
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
2 import argparse
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
3 import os
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
4
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
5 from spring_package.Alignment import Alignment
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
6 from spring_package.Energy import Energy
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
7 from spring_package.Molecule import Molecule
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
8
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
9 def buildModel(resultFile, templateFile, chainName, outputName):
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
10 template = Molecule(templateFile)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
11 if chainName not in template.calpha:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
12 raise Exception("Chain not found in template [%s]" % chainName)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
13 chain = template.calpha[chainName]
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
14 alignment = Alignment(resultFile)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
15 alignment.createModel(chain)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
16 template.saveChain(chainName, outputName)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
17 os.system("./build/pulchra %s" % outputName)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
18
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
19 def TMalign(fileA, fileB):
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
20 baseA = os.path.basename(fileA)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
21 baseB = os.path.basename(fileB)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
22 baseA = os.path.splitext(baseA)[0]
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
23 baseB = os.path.splitext(baseB)[0]
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
24 tmName = "temp/tmalign.%s.%s" % (baseA, baseB)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
25 os.system("build/TMalign %s %s -m %s.mat > %s.out" % (fileA, fileB, tmName, tmName))
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
26 rotmat = list()
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
27 with open("%s.mat" % tmName) as file:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
28 line = next(file)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
29 line = next(file)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
30 for i in range(3):
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
31 line = next(file)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
32 rotmatLine = line[1:].split()
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
33 rotmatLine = list(map(lambda x: float(x), rotmatLine))
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
34 rotmatLine = [rotmatLine[1], rotmatLine[2], rotmatLine[3], rotmatLine[0]]
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
35 rotmat.append(rotmatLine)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
36 with open("%s.out" % tmName) as file:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
37 for i in range(14):
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
38 line = next(file)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
39 try:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
40 tmscore = float(line[9:17])
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
41 line = next(file)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
42 tmscore = max(tmscore, float(line[9:17]))
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
43 except:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
44 raise Exception("TMalign::Failed to retrieve TMscore.")
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
45 molecule = Molecule(fileA)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
46 for atom in molecule.atoms:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
47 molecule.applyMatrix(atom, rotmat)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
48 return tmscore, molecule
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
49
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
50 def main(args):
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
51 os.system("mkdir -p temp")
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
52 os.system("rm -f temp/*.*")
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
53 buildModel(args.a_result, args.a_template, args.a_chain, "temp/monomerA.pdb")
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
54 buildModel(args.b_result, args.b_template, args.b_chain, "temp/monomerB.pdb")
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
55 interfaceEnergy = Energy()
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
56 templateMolecule = Molecule(args.template)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
57 modelCount = 0
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
58 for biomolNumber in range(len(templateMolecule.rotmat.keys())):
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
59 os.system("rm -f temp/template*.pdb")
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
60 if biomolNumber == 0:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
61 bioMolecule = templateMolecule
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
62 else:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
63 bioMolecule = templateMolecule.createUnit(biomolNumber)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
64 if len(bioMolecule.calpha.keys()) > 1 and args.template_core in bioMolecule.calpha:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
65 for chainName in bioMolecule.calpha.keys():
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
66 bioMolecule.saveChain(chainName, "temp/template%s.pdb" % chainName)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
67 coreTMscore, coreMolecule = TMalign("temp/monomerA.rebuilt.pdb", "temp/template%s.pdb" % args.template_core)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
68 maxScore = -9999
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
69 maxMolecule = None
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
70 for chainName in bioMolecule.calpha.keys():
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
71 if chainName != args.template_core and len(bioMolecule.calpha[chainName]) > 0:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
72 print("Evaluating chain %s..." % chainName)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
73 try:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
74 partnerTMscore, partnerMolecule = TMalign("temp/monomerB.rebuilt.pdb", "temp/template%s.pdb" % chainName)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
75 except:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
76 continue
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
77 minTM = min(coreTMscore, partnerTMscore)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
78 print("min-TMscore: %5.5f" % minTM)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
79 energy = interfaceEnergy.get(coreMolecule, partnerMolecule)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
80 print("Interaction: %5.5f" % energy)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
81 springScore = minTM * args.wtm - energy * args.wenergy
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
82 print("SpringScore: %5.5f" % springScore)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
83 if springScore > maxScore:
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
84 maxMolecule = partnerMolecule
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
85 modelName = "temp/model%s_%s.pdb" % (modelCount, chainName)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
86 coreMolecule.save(modelName, chainName="A")
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
87 maxMolecule.save(modelName, chainName="B", append=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
88 modelCount = modelCount + 1
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
89
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
90 if __name__ == "__main__":
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
91 parser = argparse.ArgumentParser(description='Create a 3D model from HH-search results.')
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
92 parser.add_argument('-ar', '--a_result', help='First HHR target file result', required=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
93 parser.add_argument('-ac', '--a_chain', help='First template chain name', required=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
94 parser.add_argument('-at', '--a_template', help='First template PDB', required=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
95 parser.add_argument('-br', '--b_result', help='Second HHR target file result', required=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
96 parser.add_argument('-bc', '--b_chain', help='Second structure chain name', required=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
97 parser.add_argument('-bt', '--b_template', help='Second template PDB', required=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
98 parser.add_argument('-ts', '--template', help='Structure template', required=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
99 parser.add_argument('-tc', '--template_core', help='Core template chain name', required=True)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
100 parser.add_argument('-o', '--output', help='Output PDB file', required=False)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
101 parser.add_argument('-wt', '--wtm', help='Weight TM-score', type=float, default=12.4, required=False)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
102 parser.add_argument('-we', '--wenergy', help='Weight Energy term', type=float, default=-0.2, required=False)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
103 args = parser.parse_args()
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
104 main(args)
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
diff changeset
105