Mercurial > repos > galaxy-australia > alphafold2
comparison docker/alphafold/alphafold/common/residue_constants.py @ 1:6c92e000d684 draft
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
author | galaxy-australia |
---|---|
date | Tue, 01 Mar 2022 02:53:05 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:7ae9d78b06f5 | 1:6c92e000d684 |
---|---|
1 # Copyright 2021 DeepMind Technologies Limited | |
2 # | |
3 # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 # you may not use this file except in compliance with the License. | |
5 # You may obtain a copy of the License at | |
6 # | |
7 # http://www.apache.org/licenses/LICENSE-2.0 | |
8 # | |
9 # Unless required by applicable law or agreed to in writing, software | |
10 # distributed under the License is distributed on an "AS IS" BASIS, | |
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 # See the License for the specific language governing permissions and | |
13 # limitations under the License. | |
14 | |
15 """Constants used in AlphaFold.""" | |
16 | |
17 import collections | |
18 import functools | |
19 import os | |
20 from typing import List, Mapping, Tuple | |
21 | |
22 import numpy as np | |
23 import tree | |
24 | |
25 # Internal import (35fd). | |
26 | |
27 | |
28 # Distance from one CA to next CA [trans configuration: omega = 180]. | |
29 ca_ca = 3.80209737096 | |
30 | |
31 # Format: The list for each AA type contains chi1, chi2, chi3, chi4 in | |
32 # this order (or a relevant subset from chi1 onwards). ALA and GLY don't have | |
33 # chi angles so their chi angle lists are empty. | |
34 chi_angles_atoms = { | |
35 'ALA': [], | |
36 # Chi5 in arginine is always 0 +- 5 degrees, so ignore it. | |
37 'ARG': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], | |
38 ['CB', 'CG', 'CD', 'NE'], ['CG', 'CD', 'NE', 'CZ']], | |
39 'ASN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']], | |
40 'ASP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']], | |
41 'CYS': [['N', 'CA', 'CB', 'SG']], | |
42 'GLN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], | |
43 ['CB', 'CG', 'CD', 'OE1']], | |
44 'GLU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], | |
45 ['CB', 'CG', 'CD', 'OE1']], | |
46 'GLY': [], | |
47 'HIS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'ND1']], | |
48 'ILE': [['N', 'CA', 'CB', 'CG1'], ['CA', 'CB', 'CG1', 'CD1']], | |
49 'LEU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], | |
50 'LYS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], | |
51 ['CB', 'CG', 'CD', 'CE'], ['CG', 'CD', 'CE', 'NZ']], | |
52 'MET': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'SD'], | |
53 ['CB', 'CG', 'SD', 'CE']], | |
54 'PHE': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], | |
55 'PRO': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD']], | |
56 'SER': [['N', 'CA', 'CB', 'OG']], | |
57 'THR': [['N', 'CA', 'CB', 'OG1']], | |
58 'TRP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], | |
59 'TYR': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], | |
60 'VAL': [['N', 'CA', 'CB', 'CG1']], | |
61 } | |
62 | |
63 # If chi angles given in fixed-length array, this matrix determines how to mask | |
64 # them for each AA type. The order is as per restype_order (see below). | |
65 chi_angles_mask = [ | |
66 [0.0, 0.0, 0.0, 0.0], # ALA | |
67 [1.0, 1.0, 1.0, 1.0], # ARG | |
68 [1.0, 1.0, 0.0, 0.0], # ASN | |
69 [1.0, 1.0, 0.0, 0.0], # ASP | |
70 [1.0, 0.0, 0.0, 0.0], # CYS | |
71 [1.0, 1.0, 1.0, 0.0], # GLN | |
72 [1.0, 1.0, 1.0, 0.0], # GLU | |
73 [0.0, 0.0, 0.0, 0.0], # GLY | |
74 [1.0, 1.0, 0.0, 0.0], # HIS | |
75 [1.0, 1.0, 0.0, 0.0], # ILE | |
76 [1.0, 1.0, 0.0, 0.0], # LEU | |
77 [1.0, 1.0, 1.0, 1.0], # LYS | |
78 [1.0, 1.0, 1.0, 0.0], # MET | |
79 [1.0, 1.0, 0.0, 0.0], # PHE | |
80 [1.0, 1.0, 0.0, 0.0], # PRO | |
81 [1.0, 0.0, 0.0, 0.0], # SER | |
82 [1.0, 0.0, 0.0, 0.0], # THR | |
83 [1.0, 1.0, 0.0, 0.0], # TRP | |
84 [1.0, 1.0, 0.0, 0.0], # TYR | |
85 [1.0, 0.0, 0.0, 0.0], # VAL | |
86 ] | |
87 | |
88 # The following chi angles are pi periodic: they can be rotated by a multiple | |
89 # of pi without affecting the structure. | |
90 chi_pi_periodic = [ | |
91 [0.0, 0.0, 0.0, 0.0], # ALA | |
92 [0.0, 0.0, 0.0, 0.0], # ARG | |
93 [0.0, 0.0, 0.0, 0.0], # ASN | |
94 [0.0, 1.0, 0.0, 0.0], # ASP | |
95 [0.0, 0.0, 0.0, 0.0], # CYS | |
96 [0.0, 0.0, 0.0, 0.0], # GLN | |
97 [0.0, 0.0, 1.0, 0.0], # GLU | |
98 [0.0, 0.0, 0.0, 0.0], # GLY | |
99 [0.0, 0.0, 0.0, 0.0], # HIS | |
100 [0.0, 0.0, 0.0, 0.0], # ILE | |
101 [0.0, 0.0, 0.0, 0.0], # LEU | |
102 [0.0, 0.0, 0.0, 0.0], # LYS | |
103 [0.0, 0.0, 0.0, 0.0], # MET | |
104 [0.0, 1.0, 0.0, 0.0], # PHE | |
105 [0.0, 0.0, 0.0, 0.0], # PRO | |
106 [0.0, 0.0, 0.0, 0.0], # SER | |
107 [0.0, 0.0, 0.0, 0.0], # THR | |
108 [0.0, 0.0, 0.0, 0.0], # TRP | |
109 [0.0, 1.0, 0.0, 0.0], # TYR | |
110 [0.0, 0.0, 0.0, 0.0], # VAL | |
111 [0.0, 0.0, 0.0, 0.0], # UNK | |
112 ] | |
113 | |
114 # Atoms positions relative to the 8 rigid groups, defined by the pre-omega, phi, | |
115 # psi and chi angles: | |
116 # 0: 'backbone group', | |
117 # 1: 'pre-omega-group', (empty) | |
118 # 2: 'phi-group', (currently empty, because it defines only hydrogens) | |
119 # 3: 'psi-group', | |
120 # 4,5,6,7: 'chi1,2,3,4-group' | |
121 # The atom positions are relative to the axis-end-atom of the corresponding | |
122 # rotation axis. The x-axis is in direction of the rotation axis, and the y-axis | |
123 # is defined such that the dihedral-angle-definiting atom (the last entry in | |
124 # chi_angles_atoms above) is in the xy-plane (with a positive y-coordinate). | |
125 # format: [atomname, group_idx, rel_position] | |
126 rigid_group_atom_positions = { | |
127 'ALA': [ | |
128 ['N', 0, (-0.525, 1.363, 0.000)], | |
129 ['CA', 0, (0.000, 0.000, 0.000)], | |
130 ['C', 0, (1.526, -0.000, -0.000)], | |
131 ['CB', 0, (-0.529, -0.774, -1.205)], | |
132 ['O', 3, (0.627, 1.062, 0.000)], | |
133 ], | |
134 'ARG': [ | |
135 ['N', 0, (-0.524, 1.362, -0.000)], | |
136 ['CA', 0, (0.000, 0.000, 0.000)], | |
137 ['C', 0, (1.525, -0.000, -0.000)], | |
138 ['CB', 0, (-0.524, -0.778, -1.209)], | |
139 ['O', 3, (0.626, 1.062, 0.000)], | |
140 ['CG', 4, (0.616, 1.390, -0.000)], | |
141 ['CD', 5, (0.564, 1.414, 0.000)], | |
142 ['NE', 6, (0.539, 1.357, -0.000)], | |
143 ['NH1', 7, (0.206, 2.301, 0.000)], | |
144 ['NH2', 7, (2.078, 0.978, -0.000)], | |
145 ['CZ', 7, (0.758, 1.093, -0.000)], | |
146 ], | |
147 'ASN': [ | |
148 ['N', 0, (-0.536, 1.357, 0.000)], | |
149 ['CA', 0, (0.000, 0.000, 0.000)], | |
150 ['C', 0, (1.526, -0.000, -0.000)], | |
151 ['CB', 0, (-0.531, -0.787, -1.200)], | |
152 ['O', 3, (0.625, 1.062, 0.000)], | |
153 ['CG', 4, (0.584, 1.399, 0.000)], | |
154 ['ND2', 5, (0.593, -1.188, 0.001)], | |
155 ['OD1', 5, (0.633, 1.059, 0.000)], | |
156 ], | |
157 'ASP': [ | |
158 ['N', 0, (-0.525, 1.362, -0.000)], | |
159 ['CA', 0, (0.000, 0.000, 0.000)], | |
160 ['C', 0, (1.527, 0.000, -0.000)], | |
161 ['CB', 0, (-0.526, -0.778, -1.208)], | |
162 ['O', 3, (0.626, 1.062, -0.000)], | |
163 ['CG', 4, (0.593, 1.398, -0.000)], | |
164 ['OD1', 5, (0.610, 1.091, 0.000)], | |
165 ['OD2', 5, (0.592, -1.101, -0.003)], | |
166 ], | |
167 'CYS': [ | |
168 ['N', 0, (-0.522, 1.362, -0.000)], | |
169 ['CA', 0, (0.000, 0.000, 0.000)], | |
170 ['C', 0, (1.524, 0.000, 0.000)], | |
171 ['CB', 0, (-0.519, -0.773, -1.212)], | |
172 ['O', 3, (0.625, 1.062, -0.000)], | |
173 ['SG', 4, (0.728, 1.653, 0.000)], | |
174 ], | |
175 'GLN': [ | |
176 ['N', 0, (-0.526, 1.361, -0.000)], | |
177 ['CA', 0, (0.000, 0.000, 0.000)], | |
178 ['C', 0, (1.526, 0.000, 0.000)], | |
179 ['CB', 0, (-0.525, -0.779, -1.207)], | |
180 ['O', 3, (0.626, 1.062, -0.000)], | |
181 ['CG', 4, (0.615, 1.393, 0.000)], | |
182 ['CD', 5, (0.587, 1.399, -0.000)], | |
183 ['NE2', 6, (0.593, -1.189, -0.001)], | |
184 ['OE1', 6, (0.634, 1.060, 0.000)], | |
185 ], | |
186 'GLU': [ | |
187 ['N', 0, (-0.528, 1.361, 0.000)], | |
188 ['CA', 0, (0.000, 0.000, 0.000)], | |
189 ['C', 0, (1.526, -0.000, -0.000)], | |
190 ['CB', 0, (-0.526, -0.781, -1.207)], | |
191 ['O', 3, (0.626, 1.062, 0.000)], | |
192 ['CG', 4, (0.615, 1.392, 0.000)], | |
193 ['CD', 5, (0.600, 1.397, 0.000)], | |
194 ['OE1', 6, (0.607, 1.095, -0.000)], | |
195 ['OE2', 6, (0.589, -1.104, -0.001)], | |
196 ], | |
197 'GLY': [ | |
198 ['N', 0, (-0.572, 1.337, 0.000)], | |
199 ['CA', 0, (0.000, 0.000, 0.000)], | |
200 ['C', 0, (1.517, -0.000, -0.000)], | |
201 ['O', 3, (0.626, 1.062, -0.000)], | |
202 ], | |
203 'HIS': [ | |
204 ['N', 0, (-0.527, 1.360, 0.000)], | |
205 ['CA', 0, (0.000, 0.000, 0.000)], | |
206 ['C', 0, (1.525, 0.000, 0.000)], | |
207 ['CB', 0, (-0.525, -0.778, -1.208)], | |
208 ['O', 3, (0.625, 1.063, 0.000)], | |
209 ['CG', 4, (0.600, 1.370, -0.000)], | |
210 ['CD2', 5, (0.889, -1.021, 0.003)], | |
211 ['ND1', 5, (0.744, 1.160, -0.000)], | |
212 ['CE1', 5, (2.030, 0.851, 0.002)], | |
213 ['NE2', 5, (2.145, -0.466, 0.004)], | |
214 ], | |
215 'ILE': [ | |
216 ['N', 0, (-0.493, 1.373, -0.000)], | |
217 ['CA', 0, (0.000, 0.000, 0.000)], | |
218 ['C', 0, (1.527, -0.000, -0.000)], | |
219 ['CB', 0, (-0.536, -0.793, -1.213)], | |
220 ['O', 3, (0.627, 1.062, -0.000)], | |
221 ['CG1', 4, (0.534, 1.437, -0.000)], | |
222 ['CG2', 4, (0.540, -0.785, -1.199)], | |
223 ['CD1', 5, (0.619, 1.391, 0.000)], | |
224 ], | |
225 'LEU': [ | |
226 ['N', 0, (-0.520, 1.363, 0.000)], | |
227 ['CA', 0, (0.000, 0.000, 0.000)], | |
228 ['C', 0, (1.525, -0.000, -0.000)], | |
229 ['CB', 0, (-0.522, -0.773, -1.214)], | |
230 ['O', 3, (0.625, 1.063, -0.000)], | |
231 ['CG', 4, (0.678, 1.371, 0.000)], | |
232 ['CD1', 5, (0.530, 1.430, -0.000)], | |
233 ['CD2', 5, (0.535, -0.774, 1.200)], | |
234 ], | |
235 'LYS': [ | |
236 ['N', 0, (-0.526, 1.362, -0.000)], | |
237 ['CA', 0, (0.000, 0.000, 0.000)], | |
238 ['C', 0, (1.526, 0.000, 0.000)], | |
239 ['CB', 0, (-0.524, -0.778, -1.208)], | |
240 ['O', 3, (0.626, 1.062, -0.000)], | |
241 ['CG', 4, (0.619, 1.390, 0.000)], | |
242 ['CD', 5, (0.559, 1.417, 0.000)], | |
243 ['CE', 6, (0.560, 1.416, 0.000)], | |
244 ['NZ', 7, (0.554, 1.387, 0.000)], | |
245 ], | |
246 'MET': [ | |
247 ['N', 0, (-0.521, 1.364, -0.000)], | |
248 ['CA', 0, (0.000, 0.000, 0.000)], | |
249 ['C', 0, (1.525, 0.000, 0.000)], | |
250 ['CB', 0, (-0.523, -0.776, -1.210)], | |
251 ['O', 3, (0.625, 1.062, -0.000)], | |
252 ['CG', 4, (0.613, 1.391, -0.000)], | |
253 ['SD', 5, (0.703, 1.695, 0.000)], | |
254 ['CE', 6, (0.320, 1.786, -0.000)], | |
255 ], | |
256 'PHE': [ | |
257 ['N', 0, (-0.518, 1.363, 0.000)], | |
258 ['CA', 0, (0.000, 0.000, 0.000)], | |
259 ['C', 0, (1.524, 0.000, -0.000)], | |
260 ['CB', 0, (-0.525, -0.776, -1.212)], | |
261 ['O', 3, (0.626, 1.062, -0.000)], | |
262 ['CG', 4, (0.607, 1.377, 0.000)], | |
263 ['CD1', 5, (0.709, 1.195, -0.000)], | |
264 ['CD2', 5, (0.706, -1.196, 0.000)], | |
265 ['CE1', 5, (2.102, 1.198, -0.000)], | |
266 ['CE2', 5, (2.098, -1.201, -0.000)], | |
267 ['CZ', 5, (2.794, -0.003, -0.001)], | |
268 ], | |
269 'PRO': [ | |
270 ['N', 0, (-0.566, 1.351, -0.000)], | |
271 ['CA', 0, (0.000, 0.000, 0.000)], | |
272 ['C', 0, (1.527, -0.000, 0.000)], | |
273 ['CB', 0, (-0.546, -0.611, -1.293)], | |
274 ['O', 3, (0.621, 1.066, 0.000)], | |
275 ['CG', 4, (0.382, 1.445, 0.0)], | |
276 # ['CD', 5, (0.427, 1.440, 0.0)], | |
277 ['CD', 5, (0.477, 1.424, 0.0)], # manually made angle 2 degrees larger | |
278 ], | |
279 'SER': [ | |
280 ['N', 0, (-0.529, 1.360, -0.000)], | |
281 ['CA', 0, (0.000, 0.000, 0.000)], | |
282 ['C', 0, (1.525, -0.000, -0.000)], | |
283 ['CB', 0, (-0.518, -0.777, -1.211)], | |
284 ['O', 3, (0.626, 1.062, -0.000)], | |
285 ['OG', 4, (0.503, 1.325, 0.000)], | |
286 ], | |
287 'THR': [ | |
288 ['N', 0, (-0.517, 1.364, 0.000)], | |
289 ['CA', 0, (0.000, 0.000, 0.000)], | |
290 ['C', 0, (1.526, 0.000, -0.000)], | |
291 ['CB', 0, (-0.516, -0.793, -1.215)], | |
292 ['O', 3, (0.626, 1.062, 0.000)], | |
293 ['CG2', 4, (0.550, -0.718, -1.228)], | |
294 ['OG1', 4, (0.472, 1.353, 0.000)], | |
295 ], | |
296 'TRP': [ | |
297 ['N', 0, (-0.521, 1.363, 0.000)], | |
298 ['CA', 0, (0.000, 0.000, 0.000)], | |
299 ['C', 0, (1.525, -0.000, 0.000)], | |
300 ['CB', 0, (-0.523, -0.776, -1.212)], | |
301 ['O', 3, (0.627, 1.062, 0.000)], | |
302 ['CG', 4, (0.609, 1.370, -0.000)], | |
303 ['CD1', 5, (0.824, 1.091, 0.000)], | |
304 ['CD2', 5, (0.854, -1.148, -0.005)], | |
305 ['CE2', 5, (2.186, -0.678, -0.007)], | |
306 ['CE3', 5, (0.622, -2.530, -0.007)], | |
307 ['NE1', 5, (2.140, 0.690, -0.004)], | |
308 ['CH2', 5, (3.028, -2.890, -0.013)], | |
309 ['CZ2', 5, (3.283, -1.543, -0.011)], | |
310 ['CZ3', 5, (1.715, -3.389, -0.011)], | |
311 ], | |
312 'TYR': [ | |
313 ['N', 0, (-0.522, 1.362, 0.000)], | |
314 ['CA', 0, (0.000, 0.000, 0.000)], | |
315 ['C', 0, (1.524, -0.000, -0.000)], | |
316 ['CB', 0, (-0.522, -0.776, -1.213)], | |
317 ['O', 3, (0.627, 1.062, -0.000)], | |
318 ['CG', 4, (0.607, 1.382, -0.000)], | |
319 ['CD1', 5, (0.716, 1.195, -0.000)], | |
320 ['CD2', 5, (0.713, -1.194, -0.001)], | |
321 ['CE1', 5, (2.107, 1.200, -0.002)], | |
322 ['CE2', 5, (2.104, -1.201, -0.003)], | |
323 ['OH', 5, (4.168, -0.002, -0.005)], | |
324 ['CZ', 5, (2.791, -0.001, -0.003)], | |
325 ], | |
326 'VAL': [ | |
327 ['N', 0, (-0.494, 1.373, -0.000)], | |
328 ['CA', 0, (0.000, 0.000, 0.000)], | |
329 ['C', 0, (1.527, -0.000, -0.000)], | |
330 ['CB', 0, (-0.533, -0.795, -1.213)], | |
331 ['O', 3, (0.627, 1.062, -0.000)], | |
332 ['CG1', 4, (0.540, 1.429, -0.000)], | |
333 ['CG2', 4, (0.533, -0.776, 1.203)], | |
334 ], | |
335 } | |
336 | |
337 # A list of atoms (excluding hydrogen) for each AA type. PDB naming convention. | |
338 residue_atoms = { | |
339 'ALA': ['C', 'CA', 'CB', 'N', 'O'], | |
340 'ARG': ['C', 'CA', 'CB', 'CG', 'CD', 'CZ', 'N', 'NE', 'O', 'NH1', 'NH2'], | |
341 'ASP': ['C', 'CA', 'CB', 'CG', 'N', 'O', 'OD1', 'OD2'], | |
342 'ASN': ['C', 'CA', 'CB', 'CG', 'N', 'ND2', 'O', 'OD1'], | |
343 'CYS': ['C', 'CA', 'CB', 'N', 'O', 'SG'], | |
344 'GLU': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O', 'OE1', 'OE2'], | |
345 'GLN': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'NE2', 'O', 'OE1'], | |
346 'GLY': ['C', 'CA', 'N', 'O'], | |
347 'HIS': ['C', 'CA', 'CB', 'CG', 'CD2', 'CE1', 'N', 'ND1', 'NE2', 'O'], | |
348 'ILE': ['C', 'CA', 'CB', 'CG1', 'CG2', 'CD1', 'N', 'O'], | |
349 'LEU': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'N', 'O'], | |
350 'LYS': ['C', 'CA', 'CB', 'CG', 'CD', 'CE', 'N', 'NZ', 'O'], | |
351 'MET': ['C', 'CA', 'CB', 'CG', 'CE', 'N', 'O', 'SD'], | |
352 'PHE': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O'], | |
353 'PRO': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O'], | |
354 'SER': ['C', 'CA', 'CB', 'N', 'O', 'OG'], | |
355 'THR': ['C', 'CA', 'CB', 'CG2', 'N', 'O', 'OG1'], | |
356 'TRP': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE2', 'CE3', 'CZ2', 'CZ3', | |
357 'CH2', 'N', 'NE1', 'O'], | |
358 'TYR': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O', | |
359 'OH'], | |
360 'VAL': ['C', 'CA', 'CB', 'CG1', 'CG2', 'N', 'O'] | |
361 } | |
362 | |
363 # Naming swaps for ambiguous atom names. | |
364 # Due to symmetries in the amino acids the naming of atoms is ambiguous in | |
365 # 4 of the 20 amino acids. | |
366 # (The LDDT paper lists 7 amino acids as ambiguous, but the naming ambiguities | |
367 # in LEU, VAL and ARG can be resolved by using the 3d constellations of | |
368 # the 'ambiguous' atoms and their neighbours) | |
369 residue_atom_renaming_swaps = { | |
370 'ASP': {'OD1': 'OD2'}, | |
371 'GLU': {'OE1': 'OE2'}, | |
372 'PHE': {'CD1': 'CD2', 'CE1': 'CE2'}, | |
373 'TYR': {'CD1': 'CD2', 'CE1': 'CE2'}, | |
374 } | |
375 | |
376 # Van der Waals radii [Angstroem] of the atoms (from Wikipedia) | |
377 van_der_waals_radius = { | |
378 'C': 1.7, | |
379 'N': 1.55, | |
380 'O': 1.52, | |
381 'S': 1.8, | |
382 } | |
383 | |
384 Bond = collections.namedtuple( | |
385 'Bond', ['atom1_name', 'atom2_name', 'length', 'stddev']) | |
386 BondAngle = collections.namedtuple( | |
387 'BondAngle', | |
388 ['atom1_name', 'atom2_name', 'atom3name', 'angle_rad', 'stddev']) | |
389 | |
390 | |
391 @functools.lru_cache(maxsize=None) | |
392 def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]], | |
393 Mapping[str, List[Bond]], | |
394 Mapping[str, List[BondAngle]]]: | |
395 """Load stereo_chemical_props.txt into a nice structure. | |
396 | |
397 Load literature values for bond lengths and bond angles and translate | |
398 bond angles into the length of the opposite edge of the triangle | |
399 ("residue_virtual_bonds"). | |
400 | |
401 Returns: | |
402 residue_bonds: Dict that maps resname -> list of Bond tuples. | |
403 residue_virtual_bonds: Dict that maps resname -> list of Bond tuples. | |
404 residue_bond_angles: Dict that maps resname -> list of BondAngle tuples. | |
405 """ | |
406 stereo_chemical_props_path = os.path.join( | |
407 os.path.dirname(os.path.abspath(__file__)), 'stereo_chemical_props.txt' | |
408 ) | |
409 with open(stereo_chemical_props_path, 'rt') as f: | |
410 stereo_chemical_props = f.read() | |
411 lines_iter = iter(stereo_chemical_props.splitlines()) | |
412 # Load bond lengths. | |
413 residue_bonds = {} | |
414 next(lines_iter) # Skip header line. | |
415 for line in lines_iter: | |
416 if line.strip() == '-': | |
417 break | |
418 bond, resname, length, stddev = line.split() | |
419 atom1, atom2 = bond.split('-') | |
420 if resname not in residue_bonds: | |
421 residue_bonds[resname] = [] | |
422 residue_bonds[resname].append( | |
423 Bond(atom1, atom2, float(length), float(stddev))) | |
424 residue_bonds['UNK'] = [] | |
425 | |
426 # Load bond angles. | |
427 residue_bond_angles = {} | |
428 next(lines_iter) # Skip empty line. | |
429 next(lines_iter) # Skip header line. | |
430 for line in lines_iter: | |
431 if line.strip() == '-': | |
432 break | |
433 bond, resname, angle_degree, stddev_degree = line.split() | |
434 atom1, atom2, atom3 = bond.split('-') | |
435 if resname not in residue_bond_angles: | |
436 residue_bond_angles[resname] = [] | |
437 residue_bond_angles[resname].append( | |
438 BondAngle(atom1, atom2, atom3, | |
439 float(angle_degree) / 180. * np.pi, | |
440 float(stddev_degree) / 180. * np.pi)) | |
441 residue_bond_angles['UNK'] = [] | |
442 | |
443 def make_bond_key(atom1_name, atom2_name): | |
444 """Unique key to lookup bonds.""" | |
445 return '-'.join(sorted([atom1_name, atom2_name])) | |
446 | |
447 # Translate bond angles into distances ("virtual bonds"). | |
448 residue_virtual_bonds = {} | |
449 for resname, bond_angles in residue_bond_angles.items(): | |
450 # Create a fast lookup dict for bond lengths. | |
451 bond_cache = {} | |
452 for b in residue_bonds[resname]: | |
453 bond_cache[make_bond_key(b.atom1_name, b.atom2_name)] = b | |
454 residue_virtual_bonds[resname] = [] | |
455 for ba in bond_angles: | |
456 bond1 = bond_cache[make_bond_key(ba.atom1_name, ba.atom2_name)] | |
457 bond2 = bond_cache[make_bond_key(ba.atom2_name, ba.atom3name)] | |
458 | |
459 # Compute distance between atom1 and atom3 using the law of cosines | |
460 # c^2 = a^2 + b^2 - 2ab*cos(gamma). | |
461 gamma = ba.angle_rad | |
462 length = np.sqrt(bond1.length**2 + bond2.length**2 | |
463 - 2 * bond1.length * bond2.length * np.cos(gamma)) | |
464 | |
465 # Propagation of uncertainty assuming uncorrelated errors. | |
466 dl_outer = 0.5 / length | |
467 dl_dgamma = (2 * bond1.length * bond2.length * np.sin(gamma)) * dl_outer | |
468 dl_db1 = (2 * bond1.length - 2 * bond2.length * np.cos(gamma)) * dl_outer | |
469 dl_db2 = (2 * bond2.length - 2 * bond1.length * np.cos(gamma)) * dl_outer | |
470 stddev = np.sqrt((dl_dgamma * ba.stddev)**2 + | |
471 (dl_db1 * bond1.stddev)**2 + | |
472 (dl_db2 * bond2.stddev)**2) | |
473 residue_virtual_bonds[resname].append( | |
474 Bond(ba.atom1_name, ba.atom3name, length, stddev)) | |
475 | |
476 return (residue_bonds, | |
477 residue_virtual_bonds, | |
478 residue_bond_angles) | |
479 | |
480 | |
481 # Between-residue bond lengths for general bonds (first element) and for Proline | |
482 # (second element). | |
483 between_res_bond_length_c_n = [1.329, 1.341] | |
484 between_res_bond_length_stddev_c_n = [0.014, 0.016] | |
485 | |
486 # Between-residue cos_angles. | |
487 between_res_cos_angles_c_n_ca = [-0.5203, 0.0353] # degrees: 121.352 +- 2.315 | |
488 between_res_cos_angles_ca_c_n = [-0.4473, 0.0311] # degrees: 116.568 +- 1.995 | |
489 | |
490 # This mapping is used when we need to store atom data in a format that requires | |
491 # fixed atom data size for every residue (e.g. a numpy array). | |
492 atom_types = [ | |
493 'N', 'CA', 'C', 'CB', 'O', 'CG', 'CG1', 'CG2', 'OG', 'OG1', 'SG', 'CD', | |
494 'CD1', 'CD2', 'ND1', 'ND2', 'OD1', 'OD2', 'SD', 'CE', 'CE1', 'CE2', 'CE3', | |
495 'NE', 'NE1', 'NE2', 'OE1', 'OE2', 'CH2', 'NH1', 'NH2', 'OH', 'CZ', 'CZ2', | |
496 'CZ3', 'NZ', 'OXT' | |
497 ] | |
498 atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)} | |
499 atom_type_num = len(atom_types) # := 37. | |
500 | |
501 # A compact atom encoding with 14 columns | |
502 # pylint: disable=line-too-long | |
503 # pylint: disable=bad-whitespace | |
504 restype_name_to_atom14_names = { | |
505 'ALA': ['N', 'CA', 'C', 'O', 'CB', '', '', '', '', '', '', '', '', ''], | |
506 'ARG': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2', '', '', ''], | |
507 'ASN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'ND2', '', '', '', '', '', ''], | |
508 'ASP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'OD2', '', '', '', '', '', ''], | |
509 'CYS': ['N', 'CA', 'C', 'O', 'CB', 'SG', '', '', '', '', '', '', '', ''], | |
510 'GLN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'NE2', '', '', '', '', ''], | |
511 'GLU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'OE2', '', '', '', '', ''], | |
512 'GLY': ['N', 'CA', 'C', 'O', '', '', '', '', '', '', '', '', '', ''], | |
513 'HIS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'ND1', 'CD2', 'CE1', 'NE2', '', '', '', ''], | |
514 'ILE': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'CD1', '', '', '', '', '', ''], | |
515 'LEU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', '', '', '', '', '', ''], | |
516 'LYS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', '', '', '', '', ''], | |
517 'MET': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'SD', 'CE', '', '', '', '', '', ''], | |
518 'PHE': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', '', '', ''], | |
519 'PRO': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', '', '', '', '', '', '', ''], | |
520 'SER': ['N', 'CA', 'C', 'O', 'CB', 'OG', '', '', '', '', '', '', '', ''], | |
521 'THR': ['N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', '', '', '', '', '', '', ''], | |
522 'TRP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'], | |
523 'TYR': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'OH', '', ''], | |
524 'VAL': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', '', '', '', '', '', '', ''], | |
525 'UNK': ['', '', '', '', '', '', '', '', '', '', '', '', '', ''], | |
526 | |
527 } | |
528 # pylint: enable=line-too-long | |
529 # pylint: enable=bad-whitespace | |
530 | |
531 | |
532 # This is the standard residue order when coding AA type as a number. | |
533 # Reproduce it by taking 3-letter AA codes and sorting them alphabetically. | |
534 restypes = [ | |
535 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', | |
536 'S', 'T', 'W', 'Y', 'V' | |
537 ] | |
538 restype_order = {restype: i for i, restype in enumerate(restypes)} | |
539 restype_num = len(restypes) # := 20. | |
540 unk_restype_index = restype_num # Catch-all index for unknown restypes. | |
541 | |
542 restypes_with_x = restypes + ['X'] | |
543 restype_order_with_x = {restype: i for i, restype in enumerate(restypes_with_x)} | |
544 | |
545 | |
546 def sequence_to_onehot( | |
547 sequence: str, | |
548 mapping: Mapping[str, int], | |
549 map_unknown_to_x: bool = False) -> np.ndarray: | |
550 """Maps the given sequence into a one-hot encoded matrix. | |
551 | |
552 Args: | |
553 sequence: An amino acid sequence. | |
554 mapping: A dictionary mapping amino acids to integers. | |
555 map_unknown_to_x: If True, any amino acid that is not in the mapping will be | |
556 mapped to the unknown amino acid 'X'. If the mapping doesn't contain | |
557 amino acid 'X', an error will be thrown. If False, any amino acid not in | |
558 the mapping will throw an error. | |
559 | |
560 Returns: | |
561 A numpy array of shape (seq_len, num_unique_aas) with one-hot encoding of | |
562 the sequence. | |
563 | |
564 Raises: | |
565 ValueError: If the mapping doesn't contain values from 0 to | |
566 num_unique_aas - 1 without any gaps. | |
567 """ | |
568 num_entries = max(mapping.values()) + 1 | |
569 | |
570 if sorted(set(mapping.values())) != list(range(num_entries)): | |
571 raise ValueError('The mapping must have values from 0 to num_unique_aas-1 ' | |
572 'without any gaps. Got: %s' % sorted(mapping.values())) | |
573 | |
574 one_hot_arr = np.zeros((len(sequence), num_entries), dtype=np.int32) | |
575 | |
576 for aa_index, aa_type in enumerate(sequence): | |
577 if map_unknown_to_x: | |
578 if aa_type.isalpha() and aa_type.isupper(): | |
579 aa_id = mapping.get(aa_type, mapping['X']) | |
580 else: | |
581 raise ValueError(f'Invalid character in the sequence: {aa_type}') | |
582 else: | |
583 aa_id = mapping[aa_type] | |
584 one_hot_arr[aa_index, aa_id] = 1 | |
585 | |
586 return one_hot_arr | |
587 | |
588 | |
589 restype_1to3 = { | |
590 'A': 'ALA', | |
591 'R': 'ARG', | |
592 'N': 'ASN', | |
593 'D': 'ASP', | |
594 'C': 'CYS', | |
595 'Q': 'GLN', | |
596 'E': 'GLU', | |
597 'G': 'GLY', | |
598 'H': 'HIS', | |
599 'I': 'ILE', | |
600 'L': 'LEU', | |
601 'K': 'LYS', | |
602 'M': 'MET', | |
603 'F': 'PHE', | |
604 'P': 'PRO', | |
605 'S': 'SER', | |
606 'T': 'THR', | |
607 'W': 'TRP', | |
608 'Y': 'TYR', | |
609 'V': 'VAL', | |
610 } | |
611 | |
612 | |
613 # NB: restype_3to1 differs from Bio.PDB.protein_letters_3to1 by being a simple | |
614 # 1-to-1 mapping of 3 letter names to one letter names. The latter contains | |
615 # many more, and less common, three letter names as keys and maps many of these | |
616 # to the same one letter name (including 'X' and 'U' which we don't use here). | |
617 restype_3to1 = {v: k for k, v in restype_1to3.items()} | |
618 | |
619 # Define a restype name for all unknown residues. | |
620 unk_restype = 'UNK' | |
621 | |
622 resnames = [restype_1to3[r] for r in restypes] + [unk_restype] | |
623 resname_to_idx = {resname: i for i, resname in enumerate(resnames)} | |
624 | |
625 | |
626 # The mapping here uses hhblits convention, so that B is mapped to D, J and O | |
627 # are mapped to X, U is mapped to C, and Z is mapped to E. Other than that the | |
628 # remaining 20 amino acids are kept in alphabetical order. | |
629 # There are 2 non-amino acid codes, X (representing any amino acid) and | |
630 # "-" representing a missing amino acid in an alignment. The id for these | |
631 # codes is put at the end (20 and 21) so that they can easily be ignored if | |
632 # desired. | |
633 HHBLITS_AA_TO_ID = { | |
634 'A': 0, | |
635 'B': 2, | |
636 'C': 1, | |
637 'D': 2, | |
638 'E': 3, | |
639 'F': 4, | |
640 'G': 5, | |
641 'H': 6, | |
642 'I': 7, | |
643 'J': 20, | |
644 'K': 8, | |
645 'L': 9, | |
646 'M': 10, | |
647 'N': 11, | |
648 'O': 20, | |
649 'P': 12, | |
650 'Q': 13, | |
651 'R': 14, | |
652 'S': 15, | |
653 'T': 16, | |
654 'U': 1, | |
655 'V': 17, | |
656 'W': 18, | |
657 'X': 20, | |
658 'Y': 19, | |
659 'Z': 3, | |
660 '-': 21, | |
661 } | |
662 | |
663 # Partial inversion of HHBLITS_AA_TO_ID. | |
664 ID_TO_HHBLITS_AA = { | |
665 0: 'A', | |
666 1: 'C', # Also U. | |
667 2: 'D', # Also B. | |
668 3: 'E', # Also Z. | |
669 4: 'F', | |
670 5: 'G', | |
671 6: 'H', | |
672 7: 'I', | |
673 8: 'K', | |
674 9: 'L', | |
675 10: 'M', | |
676 11: 'N', | |
677 12: 'P', | |
678 13: 'Q', | |
679 14: 'R', | |
680 15: 'S', | |
681 16: 'T', | |
682 17: 'V', | |
683 18: 'W', | |
684 19: 'Y', | |
685 20: 'X', # Includes J and O. | |
686 21: '-', | |
687 } | |
688 | |
689 restypes_with_x_and_gap = restypes + ['X', '-'] | |
690 MAP_HHBLITS_AATYPE_TO_OUR_AATYPE = tuple( | |
691 restypes_with_x_and_gap.index(ID_TO_HHBLITS_AA[i]) | |
692 for i in range(len(restypes_with_x_and_gap))) | |
693 | |
694 | |
695 def _make_standard_atom_mask() -> np.ndarray: | |
696 """Returns [num_res_types, num_atom_types] mask array.""" | |
697 # +1 to account for unknown (all 0s). | |
698 mask = np.zeros([restype_num + 1, atom_type_num], dtype=np.int32) | |
699 for restype, restype_letter in enumerate(restypes): | |
700 restype_name = restype_1to3[restype_letter] | |
701 atom_names = residue_atoms[restype_name] | |
702 for atom_name in atom_names: | |
703 atom_type = atom_order[atom_name] | |
704 mask[restype, atom_type] = 1 | |
705 return mask | |
706 | |
707 | |
708 STANDARD_ATOM_MASK = _make_standard_atom_mask() | |
709 | |
710 | |
711 # A one hot representation for the first and second atoms defining the axis | |
712 # of rotation for each chi-angle in each residue. | |
713 def chi_angle_atom(atom_index: int) -> np.ndarray: | |
714 """Define chi-angle rigid groups via one-hot representations.""" | |
715 chi_angles_index = {} | |
716 one_hots = [] | |
717 | |
718 for k, v in chi_angles_atoms.items(): | |
719 indices = [atom_types.index(s[atom_index]) for s in v] | |
720 indices.extend([-1]*(4-len(indices))) | |
721 chi_angles_index[k] = indices | |
722 | |
723 for r in restypes: | |
724 res3 = restype_1to3[r] | |
725 one_hot = np.eye(atom_type_num)[chi_angles_index[res3]] | |
726 one_hots.append(one_hot) | |
727 | |
728 one_hots.append(np.zeros([4, atom_type_num])) # Add zeros for residue `X`. | |
729 one_hot = np.stack(one_hots, axis=0) | |
730 one_hot = np.transpose(one_hot, [0, 2, 1]) | |
731 | |
732 return one_hot | |
733 | |
734 chi_atom_1_one_hot = chi_angle_atom(1) | |
735 chi_atom_2_one_hot = chi_angle_atom(2) | |
736 | |
737 # An array like chi_angles_atoms but using indices rather than names. | |
738 chi_angles_atom_indices = [chi_angles_atoms[restype_1to3[r]] for r in restypes] | |
739 chi_angles_atom_indices = tree.map_structure( | |
740 lambda atom_name: atom_order[atom_name], chi_angles_atom_indices) | |
741 chi_angles_atom_indices = np.array([ | |
742 chi_atoms + ([[0, 0, 0, 0]] * (4 - len(chi_atoms))) | |
743 for chi_atoms in chi_angles_atom_indices]) | |
744 | |
745 # Mapping from (res_name, atom_name) pairs to the atom's chi group index | |
746 # and atom index within that group. | |
747 chi_groups_for_atom = collections.defaultdict(list) | |
748 for res_name, chi_angle_atoms_for_res in chi_angles_atoms.items(): | |
749 for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res): | |
750 for atom_i, atom in enumerate(chi_group): | |
751 chi_groups_for_atom[(res_name, atom)].append((chi_group_i, atom_i)) | |
752 chi_groups_for_atom = dict(chi_groups_for_atom) | |
753 | |
754 | |
755 def _make_rigid_transformation_4x4(ex, ey, translation): | |
756 """Create a rigid 4x4 transformation matrix from two axes and transl.""" | |
757 # Normalize ex. | |
758 ex_normalized = ex / np.linalg.norm(ex) | |
759 | |
760 # make ey perpendicular to ex | |
761 ey_normalized = ey - np.dot(ey, ex_normalized) * ex_normalized | |
762 ey_normalized /= np.linalg.norm(ey_normalized) | |
763 | |
764 # compute ez as cross product | |
765 eznorm = np.cross(ex_normalized, ey_normalized) | |
766 m = np.stack([ex_normalized, ey_normalized, eznorm, translation]).transpose() | |
767 m = np.concatenate([m, [[0., 0., 0., 1.]]], axis=0) | |
768 return m | |
769 | |
770 | |
771 # create an array with (restype, atomtype) --> rigid_group_idx | |
772 # and an array with (restype, atomtype, coord) for the atom positions | |
773 # and compute affine transformation matrices (4,4) from one rigid group to the | |
774 # previous group | |
775 restype_atom37_to_rigid_group = np.zeros([21, 37], dtype=np.int) | |
776 restype_atom37_mask = np.zeros([21, 37], dtype=np.float32) | |
777 restype_atom37_rigid_group_positions = np.zeros([21, 37, 3], dtype=np.float32) | |
778 restype_atom14_to_rigid_group = np.zeros([21, 14], dtype=np.int) | |
779 restype_atom14_mask = np.zeros([21, 14], dtype=np.float32) | |
780 restype_atom14_rigid_group_positions = np.zeros([21, 14, 3], dtype=np.float32) | |
781 restype_rigid_group_default_frame = np.zeros([21, 8, 4, 4], dtype=np.float32) | |
782 | |
783 | |
784 def _make_rigid_group_constants(): | |
785 """Fill the arrays above.""" | |
786 for restype, restype_letter in enumerate(restypes): | |
787 resname = restype_1to3[restype_letter] | |
788 for atomname, group_idx, atom_position in rigid_group_atom_positions[ | |
789 resname]: | |
790 atomtype = atom_order[atomname] | |
791 restype_atom37_to_rigid_group[restype, atomtype] = group_idx | |
792 restype_atom37_mask[restype, atomtype] = 1 | |
793 restype_atom37_rigid_group_positions[restype, atomtype, :] = atom_position | |
794 | |
795 atom14idx = restype_name_to_atom14_names[resname].index(atomname) | |
796 restype_atom14_to_rigid_group[restype, atom14idx] = group_idx | |
797 restype_atom14_mask[restype, atom14idx] = 1 | |
798 restype_atom14_rigid_group_positions[restype, | |
799 atom14idx, :] = atom_position | |
800 | |
801 for restype, restype_letter in enumerate(restypes): | |
802 resname = restype_1to3[restype_letter] | |
803 atom_positions = {name: np.array(pos) for name, _, pos | |
804 in rigid_group_atom_positions[resname]} | |
805 | |
806 # backbone to backbone is the identity transform | |
807 restype_rigid_group_default_frame[restype, 0, :, :] = np.eye(4) | |
808 | |
809 # pre-omega-frame to backbone (currently dummy identity matrix) | |
810 restype_rigid_group_default_frame[restype, 1, :, :] = np.eye(4) | |
811 | |
812 # phi-frame to backbone | |
813 mat = _make_rigid_transformation_4x4( | |
814 ex=atom_positions['N'] - atom_positions['CA'], | |
815 ey=np.array([1., 0., 0.]), | |
816 translation=atom_positions['N']) | |
817 restype_rigid_group_default_frame[restype, 2, :, :] = mat | |
818 | |
819 # psi-frame to backbone | |
820 mat = _make_rigid_transformation_4x4( | |
821 ex=atom_positions['C'] - atom_positions['CA'], | |
822 ey=atom_positions['CA'] - atom_positions['N'], | |
823 translation=atom_positions['C']) | |
824 restype_rigid_group_default_frame[restype, 3, :, :] = mat | |
825 | |
826 # chi1-frame to backbone | |
827 if chi_angles_mask[restype][0]: | |
828 base_atom_names = chi_angles_atoms[resname][0] | |
829 base_atom_positions = [atom_positions[name] for name in base_atom_names] | |
830 mat = _make_rigid_transformation_4x4( | |
831 ex=base_atom_positions[2] - base_atom_positions[1], | |
832 ey=base_atom_positions[0] - base_atom_positions[1], | |
833 translation=base_atom_positions[2]) | |
834 restype_rigid_group_default_frame[restype, 4, :, :] = mat | |
835 | |
836 # chi2-frame to chi1-frame | |
837 # chi3-frame to chi2-frame | |
838 # chi4-frame to chi3-frame | |
839 # luckily all rotation axes for the next frame start at (0,0,0) of the | |
840 # previous frame | |
841 for chi_idx in range(1, 4): | |
842 if chi_angles_mask[restype][chi_idx]: | |
843 axis_end_atom_name = chi_angles_atoms[resname][chi_idx][2] | |
844 axis_end_atom_position = atom_positions[axis_end_atom_name] | |
845 mat = _make_rigid_transformation_4x4( | |
846 ex=axis_end_atom_position, | |
847 ey=np.array([-1., 0., 0.]), | |
848 translation=axis_end_atom_position) | |
849 restype_rigid_group_default_frame[restype, 4 + chi_idx, :, :] = mat | |
850 | |
851 | |
852 _make_rigid_group_constants() | |
853 | |
854 | |
855 def make_atom14_dists_bounds(overlap_tolerance=1.5, | |
856 bond_length_tolerance_factor=15): | |
857 """compute upper and lower bounds for bonds to assess violations.""" | |
858 restype_atom14_bond_lower_bound = np.zeros([21, 14, 14], np.float32) | |
859 restype_atom14_bond_upper_bound = np.zeros([21, 14, 14], np.float32) | |
860 restype_atom14_bond_stddev = np.zeros([21, 14, 14], np.float32) | |
861 residue_bonds, residue_virtual_bonds, _ = load_stereo_chemical_props() | |
862 for restype, restype_letter in enumerate(restypes): | |
863 resname = restype_1to3[restype_letter] | |
864 atom_list = restype_name_to_atom14_names[resname] | |
865 | |
866 # create lower and upper bounds for clashes | |
867 for atom1_idx, atom1_name in enumerate(atom_list): | |
868 if not atom1_name: | |
869 continue | |
870 atom1_radius = van_der_waals_radius[atom1_name[0]] | |
871 for atom2_idx, atom2_name in enumerate(atom_list): | |
872 if (not atom2_name) or atom1_idx == atom2_idx: | |
873 continue | |
874 atom2_radius = van_der_waals_radius[atom2_name[0]] | |
875 lower = atom1_radius + atom2_radius - overlap_tolerance | |
876 upper = 1e10 | |
877 restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower | |
878 restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower | |
879 restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper | |
880 restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper | |
881 | |
882 # overwrite lower and upper bounds for bonds and angles | |
883 for b in residue_bonds[resname] + residue_virtual_bonds[resname]: | |
884 atom1_idx = atom_list.index(b.atom1_name) | |
885 atom2_idx = atom_list.index(b.atom2_name) | |
886 lower = b.length - bond_length_tolerance_factor * b.stddev | |
887 upper = b.length + bond_length_tolerance_factor * b.stddev | |
888 restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower | |
889 restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower | |
890 restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper | |
891 restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper | |
892 restype_atom14_bond_stddev[restype, atom1_idx, atom2_idx] = b.stddev | |
893 restype_atom14_bond_stddev[restype, atom2_idx, atom1_idx] = b.stddev | |
894 return {'lower_bound': restype_atom14_bond_lower_bound, # shape (21,14,14) | |
895 'upper_bound': restype_atom14_bond_upper_bound, # shape (21,14,14) | |
896 'stddev': restype_atom14_bond_stddev, # shape (21,14,14) | |
897 } |