annotate docker/alphafold/alphafold/common/protein.py @ 1:6c92e000d684 draft

"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
author galaxy-australia
date Tue, 01 Mar 2022 02:53:05 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
1 # Copyright 2021 DeepMind Technologies Limited
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
2 #
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
3 # Licensed under the Apache License, Version 2.0 (the "License");
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
4 # you may not use this file except in compliance with the License.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
5 # You may obtain a copy of the License at
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
6 #
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
7 # http://www.apache.org/licenses/LICENSE-2.0
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
8 #
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
9 # Unless required by applicable law or agreed to in writing, software
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
10 # distributed under the License is distributed on an "AS IS" BASIS,
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
12 # See the License for the specific language governing permissions and
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
13 # limitations under the License.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
14
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
15 """Protein data type."""
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
16 import dataclasses
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
17 import io
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
18 from typing import Any, Mapping, Optional
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
19 from alphafold.common import residue_constants
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
20 from Bio.PDB import PDBParser
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
21 import numpy as np
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
22
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
23 FeatureDict = Mapping[str, np.ndarray]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
24 ModelOutput = Mapping[str, Any] # Is a nested dict.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
25
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
26 # Complete sequence of chain IDs supported by the PDB format.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
27 PDB_CHAIN_IDS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
28 PDB_MAX_CHAINS = len(PDB_CHAIN_IDS) # := 62.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
29
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
30
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
31 @dataclasses.dataclass(frozen=True)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
32 class Protein:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
33 """Protein structure representation."""
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
34
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
35 # Cartesian coordinates of atoms in angstroms. The atom types correspond to
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
36 # residue_constants.atom_types, i.e. the first three are N, CA, CB.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
37 atom_positions: np.ndarray # [num_res, num_atom_type, 3]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
38
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
39 # Amino-acid type for each residue represented as an integer between 0 and
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
40 # 20, where 20 is 'X'.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
41 aatype: np.ndarray # [num_res]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
42
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
43 # Binary float mask to indicate presence of a particular atom. 1.0 if an atom
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
44 # is present and 0.0 if not. This should be used for loss masking.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
45 atom_mask: np.ndarray # [num_res, num_atom_type]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
46
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
47 # Residue index as used in PDB. It is not necessarily continuous or 0-indexed.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
48 residue_index: np.ndarray # [num_res]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
49
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
50 # 0-indexed number corresponding to the chain in the protein that this residue
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
51 # belongs to.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
52 chain_index: np.ndarray # [num_res]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
53
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
54 # B-factors, or temperature factors, of each residue (in sq. angstroms units),
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
55 # representing the displacement of the residue from its ground truth mean
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
56 # value.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
57 b_factors: np.ndarray # [num_res, num_atom_type]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
58
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
59 def __post_init__(self):
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
60 if len(np.unique(self.chain_index)) > PDB_MAX_CHAINS:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
61 raise ValueError(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
62 f'Cannot build an instance with more than {PDB_MAX_CHAINS} chains '
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
63 'because these cannot be written to PDB format.')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
64
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
65
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
66 def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
67 """Takes a PDB string and constructs a Protein object.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
68
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
69 WARNING: All non-standard residue types will be converted into UNK. All
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
70 non-standard atoms will be ignored.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
71
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
72 Args:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
73 pdb_str: The contents of the pdb file
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
74 chain_id: If chain_id is specified (e.g. A), then only that chain
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
75 is parsed. Otherwise all chains are parsed.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
76
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
77 Returns:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
78 A new `Protein` parsed from the pdb contents.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
79 """
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
80 pdb_fh = io.StringIO(pdb_str)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
81 parser = PDBParser(QUIET=True)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
82 structure = parser.get_structure('none', pdb_fh)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
83 models = list(structure.get_models())
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
84 if len(models) != 1:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
85 raise ValueError(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
86 f'Only single model PDBs are supported. Found {len(models)} models.')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
87 model = models[0]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
88
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
89 atom_positions = []
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
90 aatype = []
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
91 atom_mask = []
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
92 residue_index = []
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
93 chain_ids = []
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
94 b_factors = []
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
95
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
96 for chain in model:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
97 if chain_id is not None and chain.id != chain_id:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
98 continue
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
99 for res in chain:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
100 if res.id[2] != ' ':
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
101 raise ValueError(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
102 f'PDB contains an insertion code at chain {chain.id} and residue '
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
103 f'index {res.id[1]}. These are not supported.')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
104 res_shortname = residue_constants.restype_3to1.get(res.resname, 'X')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
105 restype_idx = residue_constants.restype_order.get(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
106 res_shortname, residue_constants.restype_num)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
107 pos = np.zeros((residue_constants.atom_type_num, 3))
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
108 mask = np.zeros((residue_constants.atom_type_num,))
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
109 res_b_factors = np.zeros((residue_constants.atom_type_num,))
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
110 for atom in res:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
111 if atom.name not in residue_constants.atom_types:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
112 continue
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
113 pos[residue_constants.atom_order[atom.name]] = atom.coord
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
114 mask[residue_constants.atom_order[atom.name]] = 1.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
115 res_b_factors[residue_constants.atom_order[atom.name]] = atom.bfactor
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
116 if np.sum(mask) < 0.5:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
117 # If no known atom positions are reported for the residue then skip it.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
118 continue
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
119 aatype.append(restype_idx)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
120 atom_positions.append(pos)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
121 atom_mask.append(mask)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
122 residue_index.append(res.id[1])
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
123 chain_ids.append(chain.id)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
124 b_factors.append(res_b_factors)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
125
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
126 # Chain IDs are usually characters so map these to ints.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
127 unique_chain_ids = np.unique(chain_ids)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
128 chain_id_mapping = {cid: n for n, cid in enumerate(unique_chain_ids)}
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
129 chain_index = np.array([chain_id_mapping[cid] for cid in chain_ids])
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
130
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
131 return Protein(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
132 atom_positions=np.array(atom_positions),
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
133 atom_mask=np.array(atom_mask),
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
134 aatype=np.array(aatype),
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
135 residue_index=np.array(residue_index),
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
136 chain_index=chain_index,
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
137 b_factors=np.array(b_factors))
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
138
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
139
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
140 def _chain_end(atom_index, end_resname, chain_name, residue_index) -> str:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
141 chain_end = 'TER'
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
142 return (f'{chain_end:<6}{atom_index:>5} {end_resname:>3} '
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
143 f'{chain_name:>1}{residue_index:>4}')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
144
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
145
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
146 def to_pdb(prot: Protein) -> str:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
147 """Converts a `Protein` instance to a PDB string.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
148
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
149 Args:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
150 prot: The protein to convert to PDB.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
151
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
152 Returns:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
153 PDB string.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
154 """
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
155 restypes = residue_constants.restypes + ['X']
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
156 res_1to3 = lambda r: residue_constants.restype_1to3.get(restypes[r], 'UNK')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
157 atom_types = residue_constants.atom_types
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
158
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
159 pdb_lines = []
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
160
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
161 atom_mask = prot.atom_mask
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
162 aatype = prot.aatype
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
163 atom_positions = prot.atom_positions
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
164 residue_index = prot.residue_index.astype(np.int32)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
165 chain_index = prot.chain_index.astype(np.int32)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
166 b_factors = prot.b_factors
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
167
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
168 if np.any(aatype > residue_constants.restype_num):
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
169 raise ValueError('Invalid aatypes.')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
170
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
171 # Construct a mapping from chain integer indices to chain ID strings.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
172 chain_ids = {}
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
173 for i in np.unique(chain_index): # np.unique gives sorted output.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
174 if i >= PDB_MAX_CHAINS:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
175 raise ValueError(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
176 f'The PDB format supports at most {PDB_MAX_CHAINS} chains.')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
177 chain_ids[i] = PDB_CHAIN_IDS[i]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
178
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
179 pdb_lines.append('MODEL 1')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
180 atom_index = 1
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
181 last_chain_index = chain_index[0]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
182 # Add all atom sites.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
183 for i in range(aatype.shape[0]):
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
184 # Close the previous chain if in a multichain PDB.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
185 if last_chain_index != chain_index[i]:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
186 pdb_lines.append(_chain_end(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
187 atom_index, res_1to3(aatype[i - 1]), chain_ids[chain_index[i - 1]],
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
188 residue_index[i - 1]))
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
189 last_chain_index = chain_index[i]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
190 atom_index += 1 # Atom index increases at the TER symbol.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
191
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
192 res_name_3 = res_1to3(aatype[i])
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
193 for atom_name, pos, mask, b_factor in zip(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
194 atom_types, atom_positions[i], atom_mask[i], b_factors[i]):
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
195 if mask < 0.5:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
196 continue
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
197
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
198 record_type = 'ATOM'
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
199 name = atom_name if len(atom_name) == 4 else f' {atom_name}'
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
200 alt_loc = ''
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
201 insertion_code = ''
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
202 occupancy = 1.00
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
203 element = atom_name[0] # Protein supports only C, N, O, S, this works.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
204 charge = ''
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
205 # PDB is a columnar format, every space matters here!
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
206 atom_line = (f'{record_type:<6}{atom_index:>5} {name:<4}{alt_loc:>1}'
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
207 f'{res_name_3:>3} {chain_ids[chain_index[i]]:>1}'
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
208 f'{residue_index[i]:>4}{insertion_code:>1} '
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
209 f'{pos[0]:>8.3f}{pos[1]:>8.3f}{pos[2]:>8.3f}'
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
210 f'{occupancy:>6.2f}{b_factor:>6.2f} '
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
211 f'{element:>2}{charge:>2}')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
212 pdb_lines.append(atom_line)
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
213 atom_index += 1
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
214
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
215 # Close the final chain.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
216 pdb_lines.append(_chain_end(atom_index, res_1to3(aatype[-1]),
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
217 chain_ids[chain_index[-1]], residue_index[-1]))
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
218 pdb_lines.append('ENDMDL')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
219 pdb_lines.append('END')
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
220
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
221 # Pad all lines to 80 characters.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
222 pdb_lines = [line.ljust(80) for line in pdb_lines]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
223 return '\n'.join(pdb_lines) + '\n' # Add terminating newline.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
224
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
225
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
226 def ideal_atom_mask(prot: Protein) -> np.ndarray:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
227 """Computes an ideal atom mask.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
228
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
229 `Protein.atom_mask` typically is defined according to the atoms that are
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
230 reported in the PDB. This function computes a mask according to heavy atoms
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
231 that should be present in the given sequence of amino acids.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
232
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
233 Args:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
234 prot: `Protein` whose fields are `numpy.ndarray` objects.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
235
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
236 Returns:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
237 An ideal atom mask.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
238 """
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
239 return residue_constants.STANDARD_ATOM_MASK[prot.aatype]
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
240
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
241
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
242 def from_prediction(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
243 features: FeatureDict,
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
244 result: ModelOutput,
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
245 b_factors: Optional[np.ndarray] = None,
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
246 remove_leading_feature_dimension: bool = True) -> Protein:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
247 """Assembles a protein from a prediction.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
248
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
249 Args:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
250 features: Dictionary holding model inputs.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
251 result: Dictionary holding model outputs.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
252 b_factors: (Optional) B-factors to use for the protein.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
253 remove_leading_feature_dimension: Whether to remove the leading dimension
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
254 of the `features` values.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
255
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
256 Returns:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
257 A protein instance.
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
258 """
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
259 fold_output = result['structure_module']
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
260
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
261 def _maybe_remove_leading_dim(arr: np.ndarray) -> np.ndarray:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
262 return arr[0] if remove_leading_feature_dimension else arr
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
263
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
264 if 'asym_id' in features:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
265 chain_index = _maybe_remove_leading_dim(features['asym_id'])
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
266 else:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
267 chain_index = np.zeros_like(_maybe_remove_leading_dim(features['aatype']))
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
268
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
269 if b_factors is None:
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
270 b_factors = np.zeros_like(fold_output['final_atom_mask'])
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
271
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
272 return Protein(
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
273 aatype=_maybe_remove_leading_dim(features['aatype']),
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
274 atom_positions=fold_output['final_atom_positions'],
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
275 atom_mask=fold_output['final_atom_mask'],
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
276 residue_index=_maybe_remove_leading_dim(features['residue_index']) + 1,
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
277 chain_index=chain_index,
6c92e000d684 "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff changeset
278 b_factors=b_factors)