comparison spring_package/Molecule.py @ 37:0be0af9e695d draft

"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
author guerler
date Wed, 25 Nov 2020 14:35:35 +0000
parents
children 172398348efd
comparison
equal deleted inserted replaced
36:2fe8ffff530d 37:0be0af9e695d
1 class Molecule:
2 def __init__(self, fileName=None):
3 self.calpha = dict()
4 self.biomol = dict()
5 self.atoms = list()
6 if fileName is not None:
7 self.fromFile(fileName)
8
9 def fromFile(self, fileName):
10 biomolNumber = 0
11 biomolChains = list()
12 self.biomol[0] = None
13 with open(fileName) as file:
14 for line in file:
15 key = line[0:6].strip()
16 if key == "ATOM":
17 atom = line[12:16]
18 atomNumber = line[6:11]
19 chainName = line[21:22]
20 if chainName not in self.calpha:
21 self.calpha[chainName] = dict()
22 x = self.toFloat(line[30:38])
23 y = self.toFloat(line[38:46])
24 z = self.toFloat(line[46:54])
25 occupancy = self.toFloat(line[54:60], optional=True)
26 temperature = self.toFloat(line[54:60], optional=True)
27 residue = line[17:20]
28 residueNumber = self.toInt(line[22:26])
29 atomNumber = self.toInt(line[6:11])
30 atomName = line[12:16]
31 atomDict = dict(x=x, y=y, z=z,
32 residue=residue,
33 occupancy=occupancy,
34 temperature=temperature,
35 atomNumber=atomNumber,
36 atomName=atomName,
37 residueNumber=residueNumber,
38 chainName=chainName)
39 if atom.strip() == "CA":
40 self.calpha[chainName][residueNumber] = atomDict
41 self.atoms.append(atomDict)
42 biokey = "REMARK 350 BIOMOLECULE:"
43 if line[0:len(biokey)] == biokey:
44 biomolNumber = self.toInt(line[len(biokey):])
45 if biomolNumber == 0:
46 raise Exception("Invalid biomolecule identifier [0].")
47 biokey = "REMARK 350 APPLY THE FOLLOWING TO CHAINS:"
48 nextLine = next(file)
49 while nextLine[:len(biokey)] != biokey:
50 nextLine = next(file)
51 biomolChains = nextLine[len(biokey):].split(",")
52 biomolChains = list(map(lambda x: x.strip(), biomolChains))
53 biokey = "REMARK 350 AND CHAINS:"
54 nextLine = next(file)
55 while nextLine[:len(biokey)] == biokey:
56 moreChains = nextLine[len(biokey):].split(",")
57 moreChains = list(map(lambda x: x.strip(), moreChains))
58 biomolChains = biomolChains + moreChains
59 nextLine = next(file)
60 biokey = "REMARK 350 BIOMT"
61 if nextLine[:len(biokey)] == biokey:
62 biomolMatId1, biomolMat1 = self.getFloats(nextLine)
63 nextLine = next(file)
64 biomolMatId2, biomolMat2 = self.getFloats(nextLine)
65 nextLine = next(file)
66 biomolMatId3, biomolMat3 = self.getFloats(nextLine)
67 if biomolMatId1 != biomolMatId2 or biomolMatId1 != biomolMatId3:
68 raise Exception("Invalid rotation matrix format [%s]." % biomolMatId1)
69 matrix = [biomolMat1, biomolMat2, biomolMat3]
70 biomolChains = [c for c in biomolChains if c]
71 if biomolNumber not in self.biomol:
72 self.biomol[biomolNumber] = list()
73 self.biomol[biomolNumber].append(dict(chains=biomolChains, matrix=matrix))
74 removeChains = []
75 for chainName in self.calpha:
76 if len(self.calpha[chainName]) == 0:
77 removeChains.append(chainName)
78 for chainName in removeChains:
79 del self.calpha[chainName]
80 if not self.calpha:
81 raise Exception("Molecule has no atoms.")
82
83 def getFloats(self, nextLine):
84 matId = self.toInt(nextLine[20:23])
85 matLine = nextLine[23:].split()
86 matLine = list(map(lambda x: self.toFloat(x), matLine))
87 return matId, matLine
88
89 def createUnit(self, biomolNumber=1):
90 molecule = Molecule()
91 chainCount = 0
92 for matrixDict in self.biomol[biomolNumber]:
93 for chain in matrixDict["chains"]:
94 if chain in self.calpha:
95 chainCopy = dict()
96 for residue in self.calpha[chain]:
97 chainCopy[residue] = self.calpha[chain][residue].copy()
98 for atomNumber in chainCopy:
99 atom = chainCopy[atomNumber]
100 rotmat = matrixDict["matrix"]
101 self.applyMatrix(atom, rotmat)
102 if chain in molecule.calpha:
103 chainName = "%s%d" % (chain, chainCount)
104 else:
105 chainName = chain
106 molecule.calpha[chainName] = chainCopy
107 chainCount = chainCount + 1
108 return molecule
109
110 def getSequence(self, chainName):
111 seq = ""
112 if chainName not in self.calpha:
113 raise Exception("Chain identifier not found [%s]" % chainName)
114 chainDict = self.calpha[chainName]
115 for residueNumber in sorted(chainDict):
116 seq = seq + self.toSingleAmino(chainDict[residueNumber]["residue"])
117 return seq
118
119 def applyMatrix(self, atom, rotmat):
120 newx = atom["x"] * rotmat[0][0] + atom["y"] * rotmat[0][1] + atom["z"] * rotmat[0][2] + rotmat[0][3]
121 newy = atom["x"] * rotmat[1][0] + atom["y"] * rotmat[1][1] + atom["z"] * rotmat[1][2] + rotmat[1][3]
122 newz = atom["x"] * rotmat[2][0] + atom["y"] * rotmat[2][1] + atom["z"] * rotmat[2][2] + rotmat[2][3]
123 atom["x"] = newx
124 atom["y"] = newy
125 atom["z"] = newz
126 return atom
127
128 def toFloat(self, x, optional=False):
129 try:
130 return float(x)
131 except Exception:
132 if not optional:
133 raise Exception("Invalid float conversion [%s]." % x)
134 return 0.0
135
136 def toInt(self, x):
137 try:
138 return int(x)
139 except Exception:
140 raise Exception("Invalid integer conversion [%s]." % x)
141
142 def toSingleAmino(self, seq):
143 code = dict(GLY="G", ALA="A", VAL="V", LEU="L", ILE="I", MET="M", PHE="F", PRO="P", TYR="Y", TRP="W",
144 LYS="K", SER="S", CYS="C", ASN="N", GLN="Q", HIS="H", THR="T", GLU="E", ASP="D", ARG="R")
145 return code[seq] if seq in code else "X"
146
147 def saveChain(self, chainName, outputName):
148 print("Writing PDB file to %s." % outputName)
149 f = open(outputName, "w")
150 for residueNumber in sorted(self.calpha[chainName].keys()):
151 ca = self.calpha[chainName][residueNumber]
152 if ca["residue"] is not None:
153 f.write(self.atomString(ca))
154 f.close()
155
156 def save(self, outputName, append=False, chainName=None):
157 print("Writing atoms to PDB file to %s." % outputName)
158 fileFlag = "+a" if append else "w"
159 f = open(outputName, fileFlag)
160 for atom in self.atoms:
161 atom["chainName"] = chainName if chainName else atom["chainName"]
162 f.write(self.atomString(atom))
163 f.write("TER\n")
164 f.close()
165
166 def atomString(self, atom):
167 return "ATOM %5d %s %s %1s%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n" % (atom["atomNumber"], atom["atomName"], atom["residue"], atom["chainName"], atom["residueNumber"],
168 atom["x"], atom["y"], atom["z"], atom["occupancy"], atom["temperature"])