Mercurial > repos > bgruening > enumerate_charges
comparison rdconf.py @ 5:67ee76f0e497 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:40:23 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:bbbf5fb356dd | 5:67ee76f0e497 |
---|---|
1 #!/usr/bin/python3 | |
2 | |
3 import gzip | |
4 import os | |
5 import sys | |
6 from optparse import OptionParser | |
7 | |
8 from rdkit.Chem import AllChem as Chem | |
9 | |
10 """ | |
11 This script was originally written by David Koes, University of Pittsburgh: | |
12 https://github.com/dkoes/rdkit-scripts/blob/master/rdconf.py | |
13 It is licensed under the MIT licence. | |
14 | |
15 Given a smiles file, generate 3D conformers in output sdf. | |
16 Energy minimizes and filters conformers to meet energy window and rms constraints. | |
17 | |
18 Some time ago I compared this to alternative conformer generators and | |
19 it was quite competitive (especially after RDKit's UFF implementation | |
20 added OOP terms). | |
21 """ | |
22 | |
23 | |
24 # convert smiles to sdf | |
25 def getRMS(mol, c1, c2): | |
26 rms = Chem.GetBestRMS(mol, mol, c1, c2) | |
27 return rms | |
28 | |
29 | |
30 parser = OptionParser(usage="Usage: %prog [options] <input>.smi <output>.sdf") | |
31 parser.add_option( | |
32 "--maxconfs", | |
33 dest="maxconfs", | |
34 action="store", | |
35 help="maximum number of conformers to generate per a molecule (default 20)", | |
36 default="20", | |
37 type="int", | |
38 metavar="CNT", | |
39 ) | |
40 parser.add_option( | |
41 "--sample_multiplier", | |
42 dest="sample", | |
43 action="store", | |
44 help="sample N*maxconfs conformers and choose the maxconformers with lowest energy (default 1)", | |
45 default="1", | |
46 type="float", | |
47 metavar="N", | |
48 ) | |
49 parser.add_option( | |
50 "--seed", | |
51 dest="seed", | |
52 action="store", | |
53 help="random seed (default 9162006)", | |
54 default="9162006", | |
55 type="int", | |
56 metavar="s", | |
57 ) | |
58 parser.add_option( | |
59 "--rms_threshold", | |
60 dest="rms", | |
61 action="store", | |
62 help="filter based on rms (default 0.7)", | |
63 default="0.7", | |
64 type="float", | |
65 metavar="R", | |
66 ) | |
67 parser.add_option( | |
68 "--energy_window", | |
69 dest="energy", | |
70 action="store", | |
71 help="filter based on energy difference with lowest energy conformer", | |
72 default="10", | |
73 type="float", | |
74 metavar="E", | |
75 ) | |
76 parser.add_option( | |
77 "-v", | |
78 "--verbose", | |
79 dest="verbose", | |
80 action="store_true", | |
81 default=False, | |
82 help="verbose output", | |
83 ) | |
84 parser.add_option( | |
85 "--mmff", | |
86 dest="mmff", | |
87 action="store_true", | |
88 default=False, | |
89 help="use MMFF forcefield instead of UFF", | |
90 ) | |
91 parser.add_option( | |
92 "--nomin", | |
93 dest="nomin", | |
94 action="store_true", | |
95 default=False, | |
96 help="don't perform energy minimization (bad idea)", | |
97 ) | |
98 parser.add_option( | |
99 "--etkdg", | |
100 dest="etkdg", | |
101 action="store_true", | |
102 default=False, | |
103 help="use new ETKDG knowledge-based method instead of distance geometry", | |
104 ) | |
105 | |
106 | |
107 (options, args) = parser.parse_args() | |
108 | |
109 if len(args) < 2: | |
110 parser.error("Need input and output") | |
111 sys.exit(-1) | |
112 | |
113 input = args[0] | |
114 output = args[1] | |
115 smifile = open(input) | |
116 if options.verbose: | |
117 print("Generating a maximum of", options.maxconfs, "per a mol") | |
118 | |
119 if options.etkdg and not Chem.ETKDG: | |
120 print("ETKDB does not appear to be implemented. Please upgrade RDKit.") | |
121 sys.exit(1) | |
122 | |
123 split = os.path.splitext(output) | |
124 if split[1] == ".gz": | |
125 outf = gzip.open(output, "wt+") | |
126 output = split[0] # strip .gz | |
127 else: | |
128 outf = open(output, "w+") | |
129 | |
130 | |
131 if os.path.splitext(output)[1] == ".pdb": | |
132 sdwriter = Chem.PDBWriter(outf) | |
133 else: | |
134 sdwriter = Chem.SDWriter(outf) | |
135 | |
136 if sdwriter is None: | |
137 print("Could not open ".output) | |
138 sys.exit(-1) | |
139 | |
140 for line in smifile: | |
141 toks = line.split() | |
142 smi = toks[0] | |
143 name = " ".join(toks[1:]) | |
144 | |
145 pieces = smi.split(".") | |
146 if len(pieces) > 1: | |
147 smi = max(pieces, key=len) # take largest component by length | |
148 print("Taking largest component: %s\t%s" % (smi, name)) | |
149 | |
150 mol = Chem.MolFromSmiles(smi) | |
151 if mol is not None: | |
152 if options.verbose: | |
153 print(smi) | |
154 try: | |
155 Chem.SanitizeMol(mol) | |
156 mol = Chem.AddHs(mol) | |
157 mol.SetProp("_Name", name) | |
158 | |
159 if options.etkdg: | |
160 cids = Chem.EmbedMultipleConfs( | |
161 mol, int(options.sample * options.maxconfs), Chem.ETKDG() | |
162 ) | |
163 else: | |
164 cids = Chem.EmbedMultipleConfs( | |
165 mol, int(options.sample * options.maxconfs), randomSeed=options.seed | |
166 ) | |
167 if options.verbose: | |
168 print(len(cids), "conformers found") | |
169 cenergy = [] | |
170 for conf in cids: | |
171 # not passing confID only minimizes the first conformer | |
172 if options.nomin: | |
173 cenergy.append(conf) | |
174 elif options.mmff: | |
175 converged = Chem.MMFFOptimizeMolecule(mol, confId=conf) | |
176 mp = Chem.MMFFGetMoleculeProperties(mol) | |
177 cenergy.append( | |
178 Chem.MMFFGetMoleculeForceField( | |
179 mol, mp, confId=conf | |
180 ).CalcEnergy() | |
181 ) | |
182 else: | |
183 converged = not Chem.UFFOptimizeMolecule(mol, confId=conf) | |
184 cenergy.append( | |
185 Chem.UFFGetMoleculeForceField(mol, confId=conf).CalcEnergy() | |
186 ) | |
187 if options.verbose: | |
188 print("Convergence of conformer", conf, converged) | |
189 | |
190 mol = Chem.RemoveHs(mol) | |
191 sortedcids = sorted(cids, key=lambda cid: cenergy[cid]) | |
192 if len(sortedcids) > 0: | |
193 mine = cenergy[sortedcids[0]] | |
194 else: | |
195 mine = 0 | |
196 if options.rms == 0: | |
197 cnt = 0 | |
198 for conf in sortedcids: | |
199 if cnt >= options.maxconfs: | |
200 break | |
201 if (options.energy < 0) or cenergy[conf] - mine <= options.energy: | |
202 sdwriter.write(mol, conf) | |
203 cnt += 1 | |
204 else: | |
205 written = {} | |
206 for conf in sortedcids: | |
207 if len(written) >= options.maxconfs: | |
208 break | |
209 # check rmsd | |
210 passed = True | |
211 for seenconf in written.keys(): | |
212 rms = getRMS(mol, seenconf, conf) | |
213 if (rms < options.rms) or ( | |
214 options.energy > 0 and cenergy[conf] - mine > options.energy | |
215 ): | |
216 passed = False | |
217 break | |
218 if passed: | |
219 written[conf] = True | |
220 sdwriter.write(mol, conf) | |
221 except (KeyboardInterrupt, SystemExit): | |
222 raise | |
223 except Exception as e: | |
224 print("Exception", e) | |
225 else: | |
226 print("ERROR:", smi) | |
227 | |
228 sdwriter.close() | |
229 outf.close() |