Mercurial > repos > bgruening > ctb_rdkit_descriptors
view rdconf.py @ 9:0993ac4f4a23 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:40:00 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/python3 import gzip import os import sys from optparse import OptionParser from rdkit.Chem import AllChem as Chem """ This script was originally written by David Koes, University of Pittsburgh: https://github.com/dkoes/rdkit-scripts/blob/master/rdconf.py It is licensed under the MIT licence. Given a smiles file, generate 3D conformers in output sdf. Energy minimizes and filters conformers to meet energy window and rms constraints. Some time ago I compared this to alternative conformer generators and it was quite competitive (especially after RDKit's UFF implementation added OOP terms). """ # convert smiles to sdf def getRMS(mol, c1, c2): rms = Chem.GetBestRMS(mol, mol, c1, c2) return rms parser = OptionParser(usage="Usage: %prog [options] <input>.smi <output>.sdf") parser.add_option( "--maxconfs", dest="maxconfs", action="store", help="maximum number of conformers to generate per a molecule (default 20)", default="20", type="int", metavar="CNT", ) parser.add_option( "--sample_multiplier", dest="sample", action="store", help="sample N*maxconfs conformers and choose the maxconformers with lowest energy (default 1)", default="1", type="float", metavar="N", ) parser.add_option( "--seed", dest="seed", action="store", help="random seed (default 9162006)", default="9162006", type="int", metavar="s", ) parser.add_option( "--rms_threshold", dest="rms", action="store", help="filter based on rms (default 0.7)", default="0.7", type="float", metavar="R", ) parser.add_option( "--energy_window", dest="energy", action="store", help="filter based on energy difference with lowest energy conformer", default="10", type="float", metavar="E", ) parser.add_option( "-v", "--verbose", dest="verbose", action="store_true", default=False, help="verbose output", ) parser.add_option( "--mmff", dest="mmff", action="store_true", default=False, help="use MMFF forcefield instead of UFF", ) parser.add_option( "--nomin", dest="nomin", action="store_true", default=False, help="don't perform energy minimization (bad idea)", ) parser.add_option( "--etkdg", dest="etkdg", action="store_true", default=False, help="use new ETKDG knowledge-based method instead of distance geometry", ) (options, args) = parser.parse_args() if len(args) < 2: parser.error("Need input and output") sys.exit(-1) input = args[0] output = args[1] smifile = open(input) if options.verbose: print("Generating a maximum of", options.maxconfs, "per a mol") if options.etkdg and not Chem.ETKDG: print("ETKDB does not appear to be implemented. Please upgrade RDKit.") sys.exit(1) split = os.path.splitext(output) if split[1] == ".gz": outf = gzip.open(output, "wt+") output = split[0] # strip .gz else: outf = open(output, "w+") if os.path.splitext(output)[1] == ".pdb": sdwriter = Chem.PDBWriter(outf) else: sdwriter = Chem.SDWriter(outf) if sdwriter is None: print("Could not open ".output) sys.exit(-1) for line in smifile: toks = line.split() smi = toks[0] name = " ".join(toks[1:]) pieces = smi.split(".") if len(pieces) > 1: smi = max(pieces, key=len) # take largest component by length print("Taking largest component: %s\t%s" % (smi, name)) mol = Chem.MolFromSmiles(smi) if mol is not None: if options.verbose: print(smi) try: Chem.SanitizeMol(mol) mol = Chem.AddHs(mol) mol.SetProp("_Name", name) if options.etkdg: cids = Chem.EmbedMultipleConfs( mol, int(options.sample * options.maxconfs), Chem.ETKDG() ) else: cids = Chem.EmbedMultipleConfs( mol, int(options.sample * options.maxconfs), randomSeed=options.seed ) if options.verbose: print(len(cids), "conformers found") cenergy = [] for conf in cids: # not passing confID only minimizes the first conformer if options.nomin: cenergy.append(conf) elif options.mmff: converged = Chem.MMFFOptimizeMolecule(mol, confId=conf) mp = Chem.MMFFGetMoleculeProperties(mol) cenergy.append( Chem.MMFFGetMoleculeForceField( mol, mp, confId=conf ).CalcEnergy() ) else: converged = not Chem.UFFOptimizeMolecule(mol, confId=conf) cenergy.append( Chem.UFFGetMoleculeForceField(mol, confId=conf).CalcEnergy() ) if options.verbose: print("Convergence of conformer", conf, converged) mol = Chem.RemoveHs(mol) sortedcids = sorted(cids, key=lambda cid: cenergy[cid]) if len(sortedcids) > 0: mine = cenergy[sortedcids[0]] else: mine = 0 if options.rms == 0: cnt = 0 for conf in sortedcids: if cnt >= options.maxconfs: break if (options.energy < 0) or cenergy[conf] - mine <= options.energy: sdwriter.write(mol, conf) cnt += 1 else: written = {} for conf in sortedcids: if len(written) >= options.maxconfs: break # check rmsd passed = True for seenconf in written.keys(): rms = getRMS(mol, seenconf, conf) if (rms < options.rms) or ( options.energy > 0 and cenergy[conf] - mine > options.energy ): passed = False break if passed: written[conf] = True sdwriter.write(mol, conf) except (KeyboardInterrupt, SystemExit): raise except Exception as e: print("Exception", e) else: print("ERROR:", smi) sdwriter.close() outf.close()