diff rdconf.py @ 6:4beb3e026bbb draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author bgruening
date Sat, 04 Dec 2021 16:39:05 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rdconf.py	Sat Dec 04 16:39:05 2021 +0000
@@ -0,0 +1,229 @@
+#!/usr/bin/python3
+
+import gzip
+import os
+import sys
+from optparse import OptionParser
+
+from rdkit.Chem import AllChem as Chem
+
+"""
+This script was originally written by David Koes, University of Pittsburgh:
+https://github.com/dkoes/rdkit-scripts/blob/master/rdconf.py
+It is licensed under the MIT licence.
+
+Given a smiles file, generate 3D conformers in output sdf.
+Energy minimizes and filters conformers to meet energy window and rms constraints.
+
+Some time ago I compared this to alternative conformer generators and
+it was quite competitive (especially after RDKit's UFF implementation
+added OOP terms).
+"""
+
+
+# convert smiles to sdf
+def getRMS(mol, c1, c2):
+    rms = Chem.GetBestRMS(mol, mol, c1, c2)
+    return rms
+
+
+parser = OptionParser(usage="Usage: %prog [options] <input>.smi <output>.sdf")
+parser.add_option(
+    "--maxconfs",
+    dest="maxconfs",
+    action="store",
+    help="maximum number of conformers to generate per a molecule (default 20)",
+    default="20",
+    type="int",
+    metavar="CNT",
+)
+parser.add_option(
+    "--sample_multiplier",
+    dest="sample",
+    action="store",
+    help="sample N*maxconfs conformers and choose the maxconformers with lowest energy (default 1)",
+    default="1",
+    type="float",
+    metavar="N",
+)
+parser.add_option(
+    "--seed",
+    dest="seed",
+    action="store",
+    help="random seed (default 9162006)",
+    default="9162006",
+    type="int",
+    metavar="s",
+)
+parser.add_option(
+    "--rms_threshold",
+    dest="rms",
+    action="store",
+    help="filter based on rms (default 0.7)",
+    default="0.7",
+    type="float",
+    metavar="R",
+)
+parser.add_option(
+    "--energy_window",
+    dest="energy",
+    action="store",
+    help="filter based on energy difference with lowest energy conformer",
+    default="10",
+    type="float",
+    metavar="E",
+)
+parser.add_option(
+    "-v",
+    "--verbose",
+    dest="verbose",
+    action="store_true",
+    default=False,
+    help="verbose output",
+)
+parser.add_option(
+    "--mmff",
+    dest="mmff",
+    action="store_true",
+    default=False,
+    help="use MMFF forcefield instead of UFF",
+)
+parser.add_option(
+    "--nomin",
+    dest="nomin",
+    action="store_true",
+    default=False,
+    help="don't perform energy minimization (bad idea)",
+)
+parser.add_option(
+    "--etkdg",
+    dest="etkdg",
+    action="store_true",
+    default=False,
+    help="use new ETKDG knowledge-based method instead of distance geometry",
+)
+
+
+(options, args) = parser.parse_args()
+
+if len(args) < 2:
+    parser.error("Need input and output")
+    sys.exit(-1)
+
+input = args[0]
+output = args[1]
+smifile = open(input)
+if options.verbose:
+    print("Generating a maximum of", options.maxconfs, "per a mol")
+
+if options.etkdg and not Chem.ETKDG:
+    print("ETKDB does not appear to be implemented.  Please upgrade RDKit.")
+    sys.exit(1)
+
+split = os.path.splitext(output)
+if split[1] == ".gz":
+    outf = gzip.open(output, "wt+")
+    output = split[0]  # strip .gz
+else:
+    outf = open(output, "w+")
+
+
+if os.path.splitext(output)[1] == ".pdb":
+    sdwriter = Chem.PDBWriter(outf)
+else:
+    sdwriter = Chem.SDWriter(outf)
+
+if sdwriter is None:
+    print("Could not open ".output)
+    sys.exit(-1)
+
+for line in smifile:
+    toks = line.split()
+    smi = toks[0]
+    name = " ".join(toks[1:])
+
+    pieces = smi.split(".")
+    if len(pieces) > 1:
+        smi = max(pieces, key=len)  # take largest component by length
+        print("Taking largest component: %s\t%s" % (smi, name))
+
+    mol = Chem.MolFromSmiles(smi)
+    if mol is not None:
+        if options.verbose:
+            print(smi)
+        try:
+            Chem.SanitizeMol(mol)
+            mol = Chem.AddHs(mol)
+            mol.SetProp("_Name", name)
+
+            if options.etkdg:
+                cids = Chem.EmbedMultipleConfs(
+                    mol, int(options.sample * options.maxconfs), Chem.ETKDG()
+                )
+            else:
+                cids = Chem.EmbedMultipleConfs(
+                    mol, int(options.sample * options.maxconfs), randomSeed=options.seed
+                )
+            if options.verbose:
+                print(len(cids), "conformers found")
+            cenergy = []
+            for conf in cids:
+                # not passing confID only minimizes the first conformer
+                if options.nomin:
+                    cenergy.append(conf)
+                elif options.mmff:
+                    converged = Chem.MMFFOptimizeMolecule(mol, confId=conf)
+                    mp = Chem.MMFFGetMoleculeProperties(mol)
+                    cenergy.append(
+                        Chem.MMFFGetMoleculeForceField(
+                            mol, mp, confId=conf
+                        ).CalcEnergy()
+                    )
+                else:
+                    converged = not Chem.UFFOptimizeMolecule(mol, confId=conf)
+                    cenergy.append(
+                        Chem.UFFGetMoleculeForceField(mol, confId=conf).CalcEnergy()
+                    )
+                if options.verbose:
+                    print("Convergence of conformer", conf, converged)
+
+            mol = Chem.RemoveHs(mol)
+            sortedcids = sorted(cids, key=lambda cid: cenergy[cid])
+            if len(sortedcids) > 0:
+                mine = cenergy[sortedcids[0]]
+            else:
+                mine = 0
+            if options.rms == 0:
+                cnt = 0
+                for conf in sortedcids:
+                    if cnt >= options.maxconfs:
+                        break
+                    if (options.energy < 0) or cenergy[conf] - mine <= options.energy:
+                        sdwriter.write(mol, conf)
+                        cnt += 1
+            else:
+                written = {}
+                for conf in sortedcids:
+                    if len(written) >= options.maxconfs:
+                        break
+                    # check rmsd
+                    passed = True
+                    for seenconf in written.keys():
+                        rms = getRMS(mol, seenconf, conf)
+                        if (rms < options.rms) or (
+                            options.energy > 0 and cenergy[conf] - mine > options.energy
+                        ):
+                            passed = False
+                            break
+                    if passed:
+                        written[conf] = True
+                        sdwriter.write(mol, conf)
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except Exception as e:
+            print("Exception", e)
+    else:
+        print("ERROR:", smi)
+
+sdwriter.close()
+outf.close()