diff docker/alphafold/alphafold/relax/cleanup_test.py @ 1:6c92e000d684 draft

"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
author galaxy-australia
date Tue, 01 Mar 2022 02:53:05 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docker/alphafold/alphafold/relax/cleanup_test.py	Tue Mar 01 02:53:05 2022 +0000
@@ -0,0 +1,137 @@
+# Copyright 2021 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for relax.cleanup."""
+import io
+
+from absl.testing import absltest
+from alphafold.relax import cleanup
+from simtk.openmm.app.internal import pdbstructure
+
+
+def _pdb_to_structure(pdb_str):
+  handle = io.StringIO(pdb_str)
+  return pdbstructure.PdbStructure(handle)
+
+
+def _lines_to_structure(pdb_lines):
+  return _pdb_to_structure('\n'.join(pdb_lines))
+
+
+class CleanupTest(absltest.TestCase):
+
+  def test_missing_residues(self):
+    pdb_lines = ['SEQRES   1 C    3  CYS GLY LEU',
+                 'ATOM      1  N   CYS C   1     -12.262  20.115  60.959  1.00 '
+                 '19.08           N',
+                 'ATOM      2  CA  CYS C   1     -11.065  20.934  60.773  1.00 '
+                 '17.23           C',
+                 'ATOM      3  C   CYS C   1     -10.002  20.742  61.844  1.00 '
+                 '15.38           C',
+                 'ATOM      4  O   CYS C   1     -10.284  20.225  62.929  1.00 '
+                 '16.04           O',
+                 'ATOM      5  N   LEU C   3      -7.688  18.700  62.045  1.00 '
+                 '14.75           N',
+                 'ATOM      6  CA  LEU C   3      -7.256  17.320  62.234  1.00 '
+                 '16.81           C',
+                 'ATOM      7  C   LEU C   3      -6.380  16.864  61.070  1.00 '
+                 '16.95           C',
+                 'ATOM      8  O   LEU C   3      -6.551  17.332  59.947  1.00 '
+                 '16.97           O']
+    input_handle = io.StringIO('\n'.join(pdb_lines))
+    alterations = {}
+    result = cleanup.fix_pdb(input_handle, alterations)
+    structure = _pdb_to_structure(result)
+    residue_names = [r.get_name() for r in structure.iter_residues()]
+    self.assertCountEqual(residue_names, ['CYS', 'GLY', 'LEU'])
+    self.assertCountEqual(alterations['missing_residues'].values(), [['GLY']])
+
+  def test_missing_atoms(self):
+    pdb_lines = ['SEQRES   1 A    1  PRO',
+                 'ATOM      1  CA  PRO A   1       1.000   1.000   1.000  1.00 '
+                 ' 0.00           C']
+    input_handle = io.StringIO('\n'.join(pdb_lines))
+    alterations = {}
+    result = cleanup.fix_pdb(input_handle, alterations)
+    structure = _pdb_to_structure(result)
+    atom_names = [a.get_name() for a in structure.iter_atoms()]
+    self.assertCountEqual(atom_names, ['N', 'CD', 'HD2', 'HD3', 'CG', 'HG2',
+                                       'HG3', 'CB', 'HB2', 'HB3', 'CA', 'HA',
+                                       'C', 'O', 'H2', 'H3', 'OXT'])
+    missing_atoms_by_residue = list(alterations['missing_heavy_atoms'].values())
+    self.assertLen(missing_atoms_by_residue, 1)
+    atoms_added = [a.name for a in missing_atoms_by_residue[0]]
+    self.assertCountEqual(atoms_added, ['N', 'CD', 'CG', 'CB', 'C', 'O'])
+    missing_terminals_by_residue = alterations['missing_terminals']
+    self.assertLen(missing_terminals_by_residue, 1)
+    has_missing_terminal = [r.name for r in missing_terminals_by_residue.keys()]
+    self.assertCountEqual(has_missing_terminal, ['PRO'])
+    self.assertCountEqual([t for t in missing_terminals_by_residue.values()],
+                          [['OXT']])
+
+  def test_remove_heterogens(self):
+    pdb_lines = ['SEQRES   1 A    1  GLY',
+                 'ATOM      1  CA  GLY A   1       0.000   0.000   0.000  1.00 '
+                 ' 0.00           C',
+                 'ATOM      2   O  HOH A   2       0.000   0.000   0.000  1.00 '
+                 ' 0.00           O']
+    input_handle = io.StringIO('\n'.join(pdb_lines))
+    alterations = {}
+    result = cleanup.fix_pdb(input_handle, alterations)
+    structure = _pdb_to_structure(result)
+    self.assertCountEqual([res.get_name() for res in structure.iter_residues()],
+                          ['GLY'])
+    self.assertEqual(alterations['removed_heterogens'], set(['HOH']))
+
+  def test_fix_nonstandard_residues(self):
+    pdb_lines = ['SEQRES   1 A    1  DAL',
+                 'ATOM      1  CA  DAL A   1       0.000   0.000   0.000  1.00 '
+                 ' 0.00           C']
+    input_handle = io.StringIO('\n'.join(pdb_lines))
+    alterations = {}
+    result = cleanup.fix_pdb(input_handle, alterations)
+    structure = _pdb_to_structure(result)
+    residue_names = [res.get_name() for res in structure.iter_residues()]
+    self.assertCountEqual(residue_names, ['ALA'])
+    self.assertLen(alterations['nonstandard_residues'], 1)
+    original_res, new_name = alterations['nonstandard_residues'][0]
+    self.assertEqual(original_res.id, '1')
+    self.assertEqual(new_name, 'ALA')
+
+  def test_replace_met_se(self):
+    pdb_lines = ['SEQRES   1 A    1  MET',
+                 'ATOM      1  SD  MET A   1       0.000   0.000   0.000  1.00 '
+                 ' 0.00          Se']
+    structure = _lines_to_structure(pdb_lines)
+    alterations = {}
+    cleanup._replace_met_se(structure, alterations)
+    sd = [a for a in structure.iter_atoms() if a.get_name() == 'SD']
+    self.assertLen(sd, 1)
+    self.assertEqual(sd[0].element_symbol, 'S')
+    self.assertCountEqual(alterations['Se_in_MET'], [sd[0].residue_number])
+
+  def test_remove_chains_of_length_one(self):
+    pdb_lines = ['SEQRES   1 A    1  GLY',
+                 'ATOM      1  CA  GLY A   1       0.000   0.000   0.000  1.00 '
+                 ' 0.00           C']
+    structure = _lines_to_structure(pdb_lines)
+    alterations = {}
+    cleanup._remove_chains_of_length_one(structure, alterations)
+    chains = list(structure.iter_chains())
+    self.assertEmpty(chains)
+    self.assertCountEqual(alterations['removed_chains'].values(), [['A']])
+
+
+if __name__ == '__main__':
+  absltest.main()