comparison dimorphite_dl.py @ 5:67ee76f0e497 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author bgruening
date Sat, 04 Dec 2021 16:40:23 +0000
parents bbbf5fb356dd
children
comparison
equal deleted inserted replaced
4:bbbf5fb356dd 5:67ee76f0e497
17 This script identifies and enumerates the possible protonation sites of SMILES 17 This script identifies and enumerates the possible protonation sites of SMILES
18 strings. 18 strings.
19 """ 19 """
20 20
21 from __future__ import print_function 21 from __future__ import print_function
22
23 import argparse
22 import os 24 import os
23 import argparse
24 import sys 25 import sys
25 26
26 try: 27 try:
27 # Python2 28 # Python2
28 from StringIO import StringIO 29 from StringIO import StringIO
41 42
42 try: 43 try:
43 import rdkit 44 import rdkit
44 from rdkit import Chem 45 from rdkit import Chem
45 from rdkit.Chem import AllChem 46 from rdkit.Chem import AllChem
46 except: 47 except Exception:
47 msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/" 48 msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/"
48 print(msg) 49 print(msg)
49 raise Exception(msg) 50 raise Exception(msg)
51
50 52
51 def main(params=None): 53 def main(params=None):
52 """The main definition run when you call the script from the commandline. 54 """The main definition run when you call the script from the commandline.
53 55
54 :param params: The parameters to use. Entirely optional. If absent, 56 :param params: The parameters to use. Entirely optional. If absent,
82 if "output_file" in args and args["output_file"] is not None: 84 if "output_file" in args and args["output_file"] is not None:
83 # An output file was specified, so write to that. 85 # An output file was specified, so write to that.
84 with open(args["output_file"], "w") as file: 86 with open(args["output_file"], "w") as file:
85 for protonated_smi in Protonate(args): 87 for protonated_smi in Protonate(args):
86 file.write(protonated_smi + "\n") 88 file.write(protonated_smi + "\n")
87 elif "return_as_list" in args and args["return_as_list"] == True: 89 elif "return_as_list" in args and args["return_as_list"]:
88 return list(Protonate(args)) 90 return list(Protonate(args))
89 else: 91 else:
90 # No output file specified. Just print it to the screen. 92 # No output file specified. Just print it to the screen.
91 for protonated_smi in Protonate(args): 93 for protonated_smi in Protonate(args):
92 print(protonated_smi) 94 print(protonated_smi)
93 95
96
94 class MyParser(argparse.ArgumentParser): 97 class MyParser(argparse.ArgumentParser):
95 """Overwrite default parse so it displays help file on error. See 98 """Overwrite default parse so it displays help file on error. See
96 https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu""" 99 https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu"""
97 100
98 def error(self, message): 101 def error(self, message):
115 print("") 118 print("")
116 119
117 if file is None: 120 if file is None:
118 file = sys.stdout 121 file = sys.stdout
119 self._print_message(self.format_help(), file) 122 self._print_message(self.format_help(), file)
120 print(""" 123 print(
124 """
121 examples: 125 examples:
122 python dimorphite_dl.py --smiles_file sample_molecules.smi 126 python dimorphite_dl.py --smiles_file sample_molecules.smi
123 python dimorphite_dl.py --smiles "CCC(=O)O" --min_ph -3.0 --max_ph -2.0 127 python dimorphite_dl.py --smiles "CCC(=O)O" --min_ph -3.0 --max_ph -2.0
124 python dimorphite_dl.py --smiles "CCCN" --min_ph -3.0 --max_ph -2.0 --output_file output.smi 128 python dimorphite_dl.py --smiles "CCCN" --min_ph -3.0 --max_ph -2.0 --output_file output.smi
125 python dimorphite_dl.py --smiles_file sample_molecules.smi --pka_precision 2.0 --label_states 129 python dimorphite_dl.py --smiles_file sample_molecules.smi --pka_precision 2.0 --label_states
126 python dimorphite_dl.py --test""") 130 python dimorphite_dl.py --test"""
131 )
127 print("") 132 print("")
133
128 134
129 class ArgParseFuncs: 135 class ArgParseFuncs:
130 """A namespace for storing functions that are useful for processing 136 """A namespace for storing functions that are useful for processing
131 command-line arguments. To keep things organized.""" 137 command-line arguments. To keep things organized."""
132 138
135 """Gets the arguments from the command line. 141 """Gets the arguments from the command line.
136 142
137 :return: A parser object. 143 :return: A parser object.
138 """ 144 """
139 145
140 parser = MyParser(description="Dimorphite 1.2: Creates models of " + 146 parser = MyParser(
141 "appropriately protonated small moleucles. " + 147 description="Dimorphite 1.2: Creates models of "
142 "Apache 2.0 License. Copyright 2018 Jacob D. " + 148 + "appropriately protonated small moleucles. "
143 "Durrant.") 149 + "Apache 2.0 License. Copyright 2018 Jacob D. "
144 parser.add_argument('--min_ph', metavar='MIN', type=float, default=6.4, 150 + "Durrant."
145 help='minimum pH to consider (default: 6.4)') 151 )
146 parser.add_argument('--max_ph', metavar='MAX', type=float, default=8.4, 152 parser.add_argument(
147 help='maximum pH to consider (default: 8.4)') 153 "--min_ph",
148 parser.add_argument('--pka_precision', metavar='PRE', type=float, default=1.0, 154 metavar="MIN",
149 help='pKa precision factor (number of standard devations, default: 1.0)') 155 type=float,
150 parser.add_argument('--smiles', metavar='SMI', type=str, 156 default=6.4,
151 help='SMILES string to protonate') 157 help="minimum pH to consider (default: 6.4)",
152 parser.add_argument('--smiles_file', metavar="FILE", type=str, 158 )
153 help='file that contains SMILES strings to protonate') 159 parser.add_argument(
154 parser.add_argument('--output_file', metavar="FILE", type=str, 160 "--max_ph",
155 help='output file to write protonated SMILES (optional)') 161 metavar="MAX",
156 parser.add_argument('--label_states', action="store_true", 162 type=float,
157 help='label protonated SMILES with target state ' + \ 163 default=8.4,
158 '(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").') 164 help="maximum pH to consider (default: 8.4)",
159 parser.add_argument('--test', action="store_true", 165 )
160 help='run unit tests (for debugging)') 166 parser.add_argument(
167 "--pka_precision",
168 metavar="PRE",
169 type=float,
170 default=1.0,
171 help="pKa precision factor (number of standard devations, default: 1.0)",
172 )
173 parser.add_argument(
174 "--smiles", metavar="SMI", type=str, help="SMILES string to protonate"
175 )
176 parser.add_argument(
177 "--smiles_file",
178 metavar="FILE",
179 type=str,
180 help="file that contains SMILES strings to protonate",
181 )
182 parser.add_argument(
183 "--output_file",
184 metavar="FILE",
185 type=str,
186 help="output file to write protonated SMILES (optional)",
187 )
188 parser.add_argument(
189 "--label_states",
190 action="store_true",
191 help="label protonated SMILES with target state "
192 + '(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").',
193 )
194 parser.add_argument(
195 "--test", action="store_true", help="run unit tests (for debugging)"
196 )
161 197
162 return parser 198 return parser
163 199
164 @staticmethod 200 @staticmethod
165 def clean_args(args): 201 def clean_args(args):
168 :param args: A dictionary containing the arguments. 204 :param args: A dictionary containing the arguments.
169 :type args: dict 205 :type args: dict
170 :raises Exception: No SMILES in params. 206 :raises Exception: No SMILES in params.
171 """ 207 """
172 208
173 defaults = {'min_ph' : 6.4, 209 defaults = {
174 'max_ph' : 8.4, 210 "min_ph": 6.4,
175 'pka_precision' : 1.0, 211 "max_ph": 8.4,
176 'label_states' : False, 212 "pka_precision": 1.0,
177 'test' : False} 213 "label_states": False,
214 "test": False,
215 }
178 216
179 for key in defaults: 217 for key in defaults:
180 if key not in args: 218 if key not in args:
181 args[key] = defaults[key] 219 args[key] = defaults[key]
182 220
192 230
193 # If the user provides a smiles string, turn it into a file-like StringIO 231 # If the user provides a smiles string, turn it into a file-like StringIO
194 # object. 232 # object.
195 if "smiles" in args: 233 if "smiles" in args:
196 if isinstance(args["smiles"], str): 234 if isinstance(args["smiles"], str):
197 args["smiles_file"] = StringIO(args["smiles"]) 235 args["smiles_file"] = StringIO(args["smiles"])
198 236
199 args["smiles_and_data"] = LoadSMIFile(args["smiles_file"]) 237 args["smiles_and_data"] = LoadSMIFile(args["smiles_file"])
200 238
201 return args 239 return args
240
202 241
203 class UtilFuncs: 242 class UtilFuncs:
204 """A namespace to store functions for manipulating mol objects. To keep 243 """A namespace to store functions for manipulating mol objects. To keep
205 things organized.""" 244 things organized."""
206 245
213 :return: The neutralized Mol object. 252 :return: The neutralized Mol object.
214 """ 253 """
215 254
216 # Get the reaction data 255 # Get the reaction data
217 rxn_data = [ 256 rxn_data = [
218 ['[Ov1-1:1]', '[Ov2+0:1]-[H]'], # To handle O- bonded to only one atom (add hydrogen). 257 [
219 ['[#7v4+1:1]-[H]', '[#7v3+0:1]'], # To handle N+ bonded to a hydrogen (remove hydrogen). 258 "[Ov1-1:1]",
220 ['[Ov2-:1]', '[Ov2+0:1]'], # To handle O- bonded to two atoms. Should not be Negative. 259 "[Ov2+0:1]-[H]",
221 ['[#7v3+1:1]', '[#7v3+0:1]'], # To handle N+ bonded to three atoms. Should not be positive. 260 ], # To handle O- bonded to only one atom (add hydrogen).
222 ['[#7v2-1:1]', '[#7+0:1]-[H]'], # To handle N- Bonded to two atoms. Add hydrogen. 261 [
262 "[#7v4+1:1]-[H]",
263 "[#7v3+0:1]",
264 ], # To handle N+ bonded to a hydrogen (remove hydrogen).
265 [
266 "[Ov2-:1]",
267 "[Ov2+0:1]",
268 ], # To handle O- bonded to two atoms. Should not be Negative.
269 [
270 "[#7v3+1:1]",
271 "[#7v3+0:1]",
272 ], # To handle N+ bonded to three atoms. Should not be positive.
273 [
274 "[#7v2-1:1]",
275 "[#7+0:1]-[H]",
276 ], # To handle N- Bonded to two atoms. Add hydrogen.
223 # ['[N:1]=[N+0:2]=[N:3]-[H]', '[N:1]=[N+1:2]=[N+0:3]-[H]'], # To 277 # ['[N:1]=[N+0:2]=[N:3]-[H]', '[N:1]=[N+1:2]=[N+0:3]-[H]'], # To
224 # handle bad azide. Must be protonated. (Now handled elsewhere, before 278 # handle bad azide. Must be protonated. (Now handled elsewhere, before
225 # SMILES converted to Mol object.) 279 # SMILES converted to Mol object.)
226 ['[H]-[N:1]-[N:2]#[N:3]', '[N:1]=[N+1:2]=[N:3]-[H]'] # To handle bad azide. R-N-N#N should be R-N=[N+]=N 280 [
281 "[H]-[N:1]-[N:2]#[N:3]",
282 "[N:1]=[N+1:2]=[N:3]-[H]",
283 ], # To handle bad azide. R-N-N#N should be R-N=[N+]=N
227 ] 284 ]
228 285
229 # Add substructures and reactions (initially none) 286 # Add substructures and reactions (initially none)
230 for i, rxn_datum in enumerate(rxn_data): 287 for i, rxn_datum in enumerate(rxn_data):
231 rxn_data[i].append(Chem.MolFromSmarts(rxn_datum[0])) 288 rxn_data[i].append(Chem.MolFromSmarts(rxn_datum[0]))
239 while True: # Keep going until all these issues have been resolved. 296 while True: # Keep going until all these issues have been resolved.
240 current_rxn = None # The reaction to perform. 297 current_rxn = None # The reaction to perform.
241 current_rxn_str = None 298 current_rxn_str = None
242 299
243 for i, rxn_datum in enumerate(rxn_data): 300 for i, rxn_datum in enumerate(rxn_data):
244 reactant_smarts, product_smarts, substruct_match_mol, rxn_placeholder = rxn_datum 301 (
302 reactant_smarts,
303 product_smarts,
304 substruct_match_mol,
305 rxn_placeholder,
306 ) = rxn_datum
245 if mol.HasSubstructMatch(substruct_match_mol): 307 if mol.HasSubstructMatch(substruct_match_mol):
246 if rxn_placeholder is None: 308 if rxn_placeholder is None:
247 current_rxn_str = reactant_smarts + '>>' + product_smarts 309 current_rxn_str = reactant_smarts + ">>" + product_smarts
248 current_rxn = AllChem.ReactionFromSmarts(current_rxn_str) 310 current_rxn = AllChem.ReactionFromSmarts(current_rxn_str)
249 rxn_data[i][3] = current_rxn # Update the placeholder. 311 rxn_data[i][3] = current_rxn # Update the placeholder.
250 else: 312 else:
251 current_rxn = rxn_data[i][3] 313 current_rxn = rxn_data[i][3]
252 break 314 break
260 322
261 # The mols have been altered from the reactions described above, we need 323 # The mols have been altered from the reactions described above, we need
262 # to resanitize them. Make sure aromatic rings are shown as such This 324 # to resanitize them. Make sure aromatic rings are shown as such This
263 # catches all RDKit Errors. without the catchError and sanitizeOps the 325 # catches all RDKit Errors. without the catchError and sanitizeOps the
264 # Chem.SanitizeMol can crash the program. 326 # Chem.SanitizeMol can crash the program.
265 sanitize_string = Chem.SanitizeMol( 327 sanitize_string = Chem.SanitizeMol(
266 mol, 328 mol,
267 sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, 329 sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL,
268 catchErrors = True 330 catchErrors=True,
269 ) 331 )
270 332
271 return mol if sanitize_string.name == "SANITIZE_NONE" else None 333 return mol if sanitize_string.name == "SANITIZE_NONE" else None
272 334
273 @staticmethod 335 @staticmethod
319 """Error messages should be printed to STDERR. See 381 """Error messages should be printed to STDERR. See
320 https://stackoverflow.com/questions/5574702/how-to-print-to-stderr-in-python""" 382 https://stackoverflow.com/questions/5574702/how-to-print-to-stderr-in-python"""
321 383
322 print(*args, file=sys.stderr, **kwargs) 384 print(*args, file=sys.stderr, **kwargs)
323 385
386
324 class LoadSMIFile(object): 387 class LoadSMIFile(object):
325 """A generator class for loading in the SMILES strings from a file, one at 388 """A generator class for loading in the SMILES strings from a file, one at
326 a time.""" 389 a time."""
327 390
328 def __init__(self, filename): 391 def __init__(self, filename):
386 # Convert from SMILES string to RDKIT Mol. This series of tests is 449 # Convert from SMILES string to RDKIT Mol. This series of tests is
387 # to make sure the SMILES string is properly formed and to get it 450 # to make sure the SMILES string is properly formed and to get it
388 # into a canonical form. Filter if failed. 451 # into a canonical form. Filter if failed.
389 mol = UtilFuncs.convert_smiles_str_to_mol(smiles_str) 452 mol = UtilFuncs.convert_smiles_str_to_mol(smiles_str)
390 if mol is None: 453 if mol is None:
391 UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line) 454 UtilFuncs.eprint(
455 "WARNING: Skipping poorly formed SMILES string: " + line
456 )
392 return self.next() 457 return self.next()
393 458
394 # Handle nuetralizing the molecules. Filter if failed. 459 # Handle nuetralizing the molecules. Filter if failed.
395 mol = UtilFuncs.neutralize_mol(mol) 460 mol = UtilFuncs.neutralize_mol(mol)
396 if mol is None: 461 if mol is None:
397 UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line) 462 UtilFuncs.eprint(
463 "WARNING: Skipping poorly formed SMILES string: " + line
464 )
398 return self.next() 465 return self.next()
399 466
400 # Remove the hydrogens. 467 # Remove the hydrogens.
401 try: 468 try:
402 mol = Chem.RemoveHs(mol) 469 mol = Chem.RemoveHs(mol)
403 except: 470 except Exception:
404 UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line) 471 UtilFuncs.eprint(
472 "WARNING: Skipping poorly formed SMILES string: " + line
473 )
405 return self.next() 474 return self.next()
406 475
407 if mol is None: 476 if mol is None:
408 UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line) 477 UtilFuncs.eprint(
478 "WARNING: Skipping poorly formed SMILES string: " + line
479 )
409 return self.next() 480 return self.next()
410 481
411 # Regenerate the smiles string (to standardize). 482 # Regenerate the smiles string (to standardize).
412 new_mol_string = Chem.MolToSmiles(mol, isomericSmiles=True) 483 new_mol_string = Chem.MolToSmiles(mol, isomericSmiles=True)
413 484
414 return { 485 return {"smiles": new_mol_string, "data": splits[1:]}
415 "smiles": new_mol_string,
416 "data": splits[1:]
417 }
418 else: 486 else:
419 # Blank line? Go to next one. 487 # Blank line? Go to next one.
420 return self.next() 488 return self.next()
489
421 490
422 class Protonate(object): 491 class Protonate(object):
423 """A generator class for protonating SMILES strings, one at a time.""" 492 """A generator class for protonating SMILES strings, one at a time."""
424 493
425 def __init__(self, args): 494 def __init__(self, args):
489 # There are no more input smiles strings... 558 # There are no more input smiles strings...
490 raise StopIteration() 559 raise StopIteration()
491 560
492 smi = smile_and_datum["smiles"] 561 smi = smile_and_datum["smiles"]
493 data = smile_and_datum["data"] # Everything on SMILES line but the 562 data = smile_and_datum["data"] # Everything on SMILES line but the
494 # SMILES string itself (e.g., the 563 # SMILES string itself (e.g., the
495 # molecule name). 564 # molecule name).
496 565
497 # Collect the data associated with this smiles (e.g., the molecule 566 # Collect the data associated with this smiles (e.g., the molecule
498 # name). 567 # name).
499 tag = " ".join(data) 568 tag = " ".join(data)
500 569
514 new_smis = ProtSubstructFuncs.protonate_site(new_smis, site) 583 new_smis = ProtSubstructFuncs.protonate_site(new_smis, site)
515 # print(site, new_smis) # Good for debugging. 584 # print(site, new_smis) # Good for debugging.
516 585
517 # Only add new smiles if not already in the list. 586 # Only add new smiles if not already in the list.
518 # for s in new_smis_to_perhaps_add: 587 # for s in new_smis_to_perhaps_add:
519 # if not s in new_smis: 588 # if not s in new_smis:
520 # new_smis.append(s) 589 # new_smis.append(s)
521 590
522 # In some cases, the script might generate redundant molecules. 591 # In some cases, the script might generate redundant molecules.
523 # Phosphonates, when the pH is between the two pKa values and the 592 # Phosphonates, when the pH is between the two pKa values and the
524 # stdev value is big enough, for example, will generate two identical 593 # stdev value is big enough, for example, will generate two identical
525 # BOTH states. Let's remove this redundancy. 594 # BOTH states. Let's remove this redundancy.
530 new_smis = [s.replace("[nH-]", "[n-]") for s in new_smis] 599 new_smis = [s.replace("[nH-]", "[n-]") for s in new_smis]
531 600
532 # Sometimes Dimorphite-DL generates molecules that aren't actually 601 # Sometimes Dimorphite-DL generates molecules that aren't actually
533 # possible. Simply convert these to mol objects to eliminate the bad 602 # possible. Simply convert these to mol objects to eliminate the bad
534 # ones (that are None). 603 # ones (that are None).
535 new_smis = [s for s in new_smis if UtilFuncs.convert_smiles_str_to_mol(s) is not None] 604 new_smis = [
605 s for s in new_smis if UtilFuncs.convert_smiles_str_to_mol(s) is not None
606 ]
536 607
537 # If there are no smi left, return the input one at the very least. 608 # If there are no smi left, return the input one at the very least.
538 # All generated forms have apparently been judged 609 # All generated forms have apparently been judged
539 # inappropriate/mal-formed. 610 # inappropriate/mal-formed.
540 if len(new_smis) == 0: 611 if len(new_smis) == 0:
541 new_smis = [smi] 612 new_smis = [smi]
542 613
543 # If the user wants to see the target states, add those 614 # If the user wants to see the target states, add those
544 # to the ends of each line. 615 # to the ends of each line.
545 if self.args["label_states"]: 616 if self.args["label_states"]:
546 states = '\t'.join([x[1] for x in sites]) 617 states = "\t".join([x[1] for x in sites])
547 new_lines = [x + "\t" + tag + "\t" + states for x in new_smis] 618 new_lines = [x + "\t" + tag + "\t" + states for x in new_smis]
548 else: 619 else:
549 new_lines = [x + "\t" + tag for x in new_smis] 620 new_lines = [x + "\t" + tag for x in new_smis]
550 621
551 self.cur_prot_SMI = new_lines 622 self.cur_prot_SMI = new_lines
552 623
553 return self.next() 624 return self.next()
554 625
626
555 class ProtSubstructFuncs: 627 class ProtSubstructFuncs:
556 """A namespace to store functions for loading the substructures that can 628 """A namespace to store functions for loading the substructures that can
557 be protonated. To keep things organized.""" 629 be protonated. To keep things organized."""
558 630
559 @staticmethod 631 @staticmethod
560 def load_protonation_substructs_calc_state_for_ph(min_ph=6.4, max_ph=8.4, pka_std_range=1): 632 def load_protonation_substructs_calc_state_for_ph(
633 min_ph=6.4, max_ph=8.4, pka_std_range=1
634 ):
561 """A pre-calculated list of R-groups with protonation sites, with their 635 """A pre-calculated list of R-groups with protonation sites, with their
562 likely pKa bins. 636 likely pKa bins.
563 637
564 :param float min_ph: The lower bound on the pH range, defaults to 6.4. 638 :param float min_ph: The lower bound on the pH range, defaults to 6.4.
565 :param float max_ph: The upper bound on the pH range, defaults to 8.4. 639 :param float max_ph: The upper bound on the pH range, defaults to 8.4.
571 645
572 subs = [] 646 subs = []
573 pwd = os.path.dirname(os.path.realpath(__file__)) 647 pwd = os.path.dirname(os.path.realpath(__file__))
574 648
575 site_structures_file = "{}/{}".format(pwd, "site_substructures.smarts") 649 site_structures_file = "{}/{}".format(pwd, "site_substructures.smarts")
576 with open(site_structures_file, 'r') as substruct: 650 with open(site_structures_file, "r") as substruct:
577 for line in substruct: 651 for line in substruct:
578 line = line.strip() 652 line = line.strip()
579 sub = {} 653 sub = {}
580 if line is not "": 654 if line is not "":
581 splits = line.split() 655 splits = line.split()
582 sub["name"] = splits[0] 656 sub["name"] = splits[0]
583 sub["smart"] = splits[1] 657 sub["smart"] = splits[1]
584 sub["mol"] = Chem.MolFromSmarts(sub["smart"]) 658 sub["mol"] = Chem.MolFromSmarts(sub["smart"])
585 659
586 # NEED TO DIVIDE THIS BY 3s 660 # NEED TO DIVIDE THIS BY 3s
587 pka_ranges = [splits[i:i+3] for i in range(2, len(splits)-1, 3)] 661 pka_ranges = [
662 splits[i : i + 3] for i in range(2, len(splits) - 1, 3)
663 ]
588 664
589 prot = [] 665 prot = []
590 for pka_range in pka_ranges: 666 for pka_range in pka_ranges:
591 site = pka_range[0] 667 site = pka_range[0]
592 std = float(pka_range[2]) * pka_std_range 668 std = float(pka_range[2]) * pka_std_range
618 max_pka = mean + std 694 max_pka = mean + std
619 695
620 # This needs to be reassigned, and 'ERROR' should never make it past the 696 # This needs to be reassigned, and 'ERROR' should never make it past the
621 # next set of checks. 697 # next set of checks.
622 if min_pka <= max_ph and min_ph <= max_pka: 698 if min_pka <= max_ph and min_ph <= max_pka:
623 protonation_state = 'BOTH' 699 protonation_state = "BOTH"
624 elif mean > max_ph: 700 elif mean > max_ph:
625 protonation_state = 'PROTONATED' 701 protonation_state = "PROTONATED"
626 else: 702 else:
627 protonation_state = 'DEPROTONATED' 703 protonation_state = "DEPROTONATED"
628 704
629 return protonation_state 705 return protonation_state
630 706
631 @staticmethod 707 @staticmethod
632 def get_prot_sites_and_target_states(smi, subs): 708 def get_prot_sites_and_target_states(smi, subs):
648 UtilFuncs.eprint("ERROR: ", smi) 724 UtilFuncs.eprint("ERROR: ", smi)
649 return [] 725 return []
650 726
651 # Try to Add hydrogens. if failed return [] 727 # Try to Add hydrogens. if failed return []
652 try: 728 try:
653 mol = Chem.AddHs(mol) 729 mol = Chem.AddHs(mol)
654 except: 730 except Exception:
655 UtilFuncs.eprint("ERROR: ", smi) 731 UtilFuncs.eprint("ERROR: ", smi)
656 return [] 732 return []
657 733
658 # Check adding Hs worked 734 # Check adding Hs worked
659 if mol is None: 735 if mol is None:
699 idx, target_prot_state, prot_site_name = site 775 idx, target_prot_state, prot_site_name = site
700 776
701 # Initialize the output list 777 # Initialize the output list
702 output_smis = [] 778 output_smis = []
703 779
704 state_to_charge = {"DEPROTONATED": [-1], 780 state_to_charge = {"DEPROTONATED": [-1], "PROTONATED": [0], "BOTH": [-1, 0]}
705 "PROTONATED": [0],
706 "BOTH": [-1, 0]}
707 781
708 charges = state_to_charge[target_prot_state] 782 charges = state_to_charge[target_prot_state]
709 783
710 # Now make the actual smiles match the target protonation state. 784 # Now make the actual smiles match the target protonation state.
711 output_smis = ProtSubstructFuncs.set_protonation_charge(smis, idx, charges, prot_site_name) 785 output_smis = ProtSubstructFuncs.set_protonation_charge(
786 smis, idx, charges, prot_site_name
787 )
712 788
713 return output_smis 789 return output_smis
714 790
715 @staticmethod 791 @staticmethod
716 def set_protonation_charge(smis, idx, charges, prot_site_name): 792 def set_protonation_charge(smis, idx, charges, prot_site_name):
757 atom.SetFormalCharge(nitro_charge) 833 atom.SetFormalCharge(nitro_charge)
758 else: 834 else:
759 atom.SetFormalCharge(charge) 835 atom.SetFormalCharge(charge)
760 836
761 # Convert back to SMILE and add to output 837 # Convert back to SMILE and add to output
762 out_smile = Chem.MolToSmiles(mol, isomericSmiles=True,canonical=True) 838 out_smile = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)
763 output.append(out_smile) 839 output.append(out_smile)
764 840
765 return output 841 return output
842
766 843
767 class ProtectUnprotectFuncs: 844 class ProtectUnprotectFuncs:
768 """A namespace for storing functions that are useful for protecting and 845 """A namespace for storing functions that are useful for protecting and
769 unprotecting molecules. To keep things organized. We need to identify and 846 unprotecting molecules. To keep things organized. We need to identify and
770 mark groups that have been matched with a substructure.""" 847 mark groups that have been matched with a substructure."""
777 :param rdkit.Chem.rdchem.Mol mol: The rdkit Mol object. 854 :param rdkit.Chem.rdchem.Mol mol: The rdkit Mol object.
778 :type mol: The rdkit Mol object with atoms unprotected. 855 :type mol: The rdkit Mol object with atoms unprotected.
779 """ 856 """
780 857
781 for atom in mol.GetAtoms(): 858 for atom in mol.GetAtoms():
782 atom.SetProp('_protected', '0') 859 atom.SetProp("_protected", "0")
783 860
784 @staticmethod 861 @staticmethod
785 def protect_molecule(mol, match): 862 def protect_molecule(mol, match):
786 """Given a 'match', a list of molecules idx's, we set the protected status 863 """Given a 'match', a list of molecules idx's, we set the protected status
787 of each atom to 1. This will prevent any matches using that atom in the 864 of each atom to 1. This will prevent any matches using that atom in the
791 :param list match: A list of molecule idx's. 868 :param list match: A list of molecule idx's.
792 """ 869 """
793 870
794 for idx in match: 871 for idx in match:
795 atom = mol.GetAtomWithIdx(idx) 872 atom = mol.GetAtomWithIdx(idx)
796 atom.SetProp('_protected', '1') 873 atom.SetProp("_protected", "1")
797 874
798 @staticmethod 875 @staticmethod
799 def get_unprotected_matches(mol, substruct): 876 def get_unprotected_matches(mol, substruct):
800 """Finds substructure matches with atoms that have not been protected. 877 """Finds substructure matches with atoms that have not been protected.
801 Returns list of matches, each match a list of atom idxs. 878 Returns list of matches, each match a list of atom idxs.
827 protected = atom.GetProp("_protected") 904 protected = atom.GetProp("_protected")
828 if protected == "1": 905 if protected == "1":
829 return False 906 return False
830 return True 907 return True
831 908
909
832 class TestFuncs: 910 class TestFuncs:
833 """A namespace for storing functions that perform tests on the code. To 911 """A namespace for storing functions that perform tests on the code. To
834 keep things organized.""" 912 keep things organized."""
835 913
836 @staticmethod 914 @staticmethod
837 def test(): 915 def test():
838 """Tests all the 38 groups.""" 916 """Tests all the 38 groups."""
839 917
840 smis = [ 918 smis = [
841 # [input smiles, pka, protonated, deprotonated, category] 919 # [input smiles, pka, protonated, deprotonated, category]
842 ["C#CCO", "C#CCO", "C#CC[O-]", "Alcohol"], 920 ["C#CCO", "C#CCO", "C#CC[O-]", "Alcohol"],
843 ["C(=O)N", "NC=O", "[NH-]C=O", "Amide"], 921 ["C(=O)N", "NC=O", "[NH-]C=O", "Amide"],
844 ["CC(=O)NOC(C)=O", "CC(=O)NOC(C)=O", "CC(=O)[N-]OC(C)=O", "Amide_electronegative"], 922 [
845 ["COC(=N)N", "COC(N)=[NH2+]", "COC(=N)N", "AmidineGuanidine2"], 923 "CC(=O)NOC(C)=O",
846 ["Brc1ccc(C2NCCS2)cc1", "Brc1ccc(C2[NH2+]CCS2)cc1", "Brc1ccc(C2NCCS2)cc1", "Amines_primary_secondary_tertiary"], 924 "CC(=O)NOC(C)=O",
847 ["CC(=O)[n+]1ccc(N)cc1", "CC(=O)[n+]1ccc([NH3+])cc1", "CC(=O)[n+]1ccc(N)cc1", "Anilines_primary"], 925 "CC(=O)[N-]OC(C)=O",
848 ["CCNc1ccccc1", "CC[NH2+]c1ccccc1", "CCNc1ccccc1", "Anilines_secondary"], 926 "Amide_electronegative",
849 ["Cc1ccccc1N(C)C", "Cc1ccccc1[NH+](C)C", "Cc1ccccc1N(C)C", "Anilines_tertiary"], 927 ],
850 ["BrC1=CC2=C(C=C1)NC=C2", "Brc1ccc2[nH]ccc2c1", "Brc1ccc2[n-]ccc2c1", "Indole_pyrrole"], 928 ["COC(=N)N", "COC(N)=[NH2+]", "COC(=N)N", "AmidineGuanidine2"],
851 ["O=c1cc[nH]cc1", "O=c1cc[nH]cc1", "O=c1cc[n-]cc1", "Aromatic_nitrogen_protonated"], 929 [
852 ["C-N=[N+]=[N@H]", "CN=[N+]=N", "CN=[N+]=[N-]", "Azide"], 930 "Brc1ccc(C2NCCS2)cc1",
853 ["BrC(C(O)=O)CBr", "O=C(O)C(Br)CBr", "O=C([O-])C(Br)CBr", "Carboxyl"], 931 "Brc1ccc(C2[NH2+]CCS2)cc1",
854 ["NC(NN=O)=N", "NC(=[NH2+])NN=O", "N=C(N)NN=O", "AmidineGuanidine1"], 932 "Brc1ccc(C2NCCS2)cc1",
855 ["C(F)(F)(F)C(=O)NC(=O)C", "CC(=O)NC(=O)C(F)(F)F", "CC(=O)[N-]C(=O)C(F)(F)F", "Imide"], 933 "Amines_primary_secondary_tertiary",
856 ["O=C(C)NC(C)=O", "CC(=O)NC(C)=O", "CC(=O)[N-]C(C)=O", "Imide2"], 934 ],
857 ["CC(C)(C)C(N(C)O)=O", "CN(O)C(=O)C(C)(C)C", "CN([O-])C(=O)C(C)(C)C", "N-hydroxyamide"], 935 [
858 ["C[N+](O)=O", "C[N+](=O)O", "C[N+](=O)[O-]", "Nitro"], 936 "CC(=O)[n+]1ccc(N)cc1",
859 ["O=C1C=C(O)CC1", "O=C1C=C(O)CC1", "O=C1C=C([O-])CC1", "O=C-C=C-OH"], 937 "CC(=O)[n+]1ccc([NH3+])cc1",
860 ["C1CC1OO", "OOC1CC1", "[O-]OC1CC1", "Peroxide2"], 938 "CC(=O)[n+]1ccc(N)cc1",
861 ["C(=O)OO", "O=COO", "O=CO[O-]", "Peroxide1"], 939 "Anilines_primary",
862 ["Brc1cc(O)cc(Br)c1", "Oc1cc(Br)cc(Br)c1", "[O-]c1cc(Br)cc(Br)c1", "Phenol"], 940 ],
863 ["CC(=O)c1ccc(S)cc1", "CC(=O)c1ccc(S)cc1", "CC(=O)c1ccc([S-])cc1", "Phenyl_Thiol"], 941 ["CCNc1ccccc1", "CC[NH2+]c1ccccc1", "CCNc1ccccc1", "Anilines_secondary"],
864 ["C=CCOc1ccc(C(=O)O)cc1", "C=CCOc1ccc(C(=O)O)cc1", "C=CCOc1ccc(C(=O)[O-])cc1", "Phenyl_carboxyl"], 942 [
865 ["COP(=O)(O)OC", "COP(=O)(O)OC", "COP(=O)([O-])OC", "Phosphate_diester"], 943 "Cc1ccccc1N(C)C",
866 ["CP(C)(=O)O", "CP(C)(=O)O", "CP(C)(=O)[O-]", "Phosphinic_acid"], 944 "Cc1ccccc1[NH+](C)C",
867 ["CC(C)OP(C)(=O)O", "CC(C)OP(C)(=O)O", "CC(C)OP(C)(=O)[O-]", "Phosphonate_ester"], 945 "Cc1ccccc1N(C)C",
868 ["CC1(C)OC(=O)NC1=O", "CC1(C)OC(=O)NC1=O", "CC1(C)OC(=O)[N-]C1=O", "Ringed_imide1"], 946 "Anilines_tertiary",
869 ["O=C(N1)C=CC1=O", "O=C1C=CC(=O)N1", "O=C1C=CC(=O)[N-]1", "Ringed_imide2"], 947 ],
870 ["O=S(OC)(O)=O", "COS(=O)(=O)O", "COS(=O)(=O)[O-]", "Sulfate"], 948 [
871 ["COc1ccc(S(=O)O)cc1", "COc1ccc(S(=O)O)cc1", "COc1ccc(S(=O)[O-])cc1", "Sulfinic_acid"], 949 "BrC1=CC2=C(C=C1)NC=C2",
872 ["CS(N)(=O)=O", "CS(N)(=O)=O", "CS([NH-])(=O)=O", "Sulfonamide"], 950 "Brc1ccc2[nH]ccc2c1",
873 ["CC(=O)CSCCS(O)(=O)=O", "CC(=O)CSCCS(=O)(=O)O", "CC(=O)CSCCS(=O)(=O)[O-]", "Sulfonate"], 951 "Brc1ccc2[n-]ccc2c1",
874 ["CC(=O)S", "CC(=O)S", "CC(=O)[S-]", "Thioic_acid"], 952 "Indole_pyrrole",
875 ["C(C)(C)(C)(S)", "CC(C)(C)S", "CC(C)(C)[S-]", "Thiol"], 953 ],
876 ["Brc1cc[nH+]cc1", "Brc1cc[nH+]cc1", "Brc1ccncc1", "Aromatic_nitrogen_unprotonated"], 954 [
877 ["C=C(O)c1c(C)cc(C)cc1C", "C=C(O)c1c(C)cc(C)cc1C", "C=C([O-])c1c(C)cc(C)cc1C", "Vinyl_alcohol"], 955 "O=c1cc[nH]cc1",
878 ["CC(=O)ON", "CC(=O)O[NH3+]", "CC(=O)ON", "Primary_hydroxyl_amine"] 956 "O=c1cc[nH]cc1",
957 "O=c1cc[n-]cc1",
958 "Aromatic_nitrogen_protonated",
959 ],
960 ["C-N=[N+]=[N@H]", "CN=[N+]=N", "CN=[N+]=[N-]", "Azide"],
961 ["BrC(C(O)=O)CBr", "O=C(O)C(Br)CBr", "O=C([O-])C(Br)CBr", "Carboxyl"],
962 ["NC(NN=O)=N", "NC(=[NH2+])NN=O", "N=C(N)NN=O", "AmidineGuanidine1"],
963 [
964 "C(F)(F)(F)C(=O)NC(=O)C",
965 "CC(=O)NC(=O)C(F)(F)F",
966 "CC(=O)[N-]C(=O)C(F)(F)F",
967 "Imide",
968 ],
969 ["O=C(C)NC(C)=O", "CC(=O)NC(C)=O", "CC(=O)[N-]C(C)=O", "Imide2"],
970 [
971 "CC(C)(C)C(N(C)O)=O",
972 "CN(O)C(=O)C(C)(C)C",
973 "CN([O-])C(=O)C(C)(C)C",
974 "N-hydroxyamide",
975 ],
976 ["C[N+](O)=O", "C[N+](=O)O", "C[N+](=O)[O-]", "Nitro"],
977 ["O=C1C=C(O)CC1", "O=C1C=C(O)CC1", "O=C1C=C([O-])CC1", "O=C-C=C-OH"],
978 ["C1CC1OO", "OOC1CC1", "[O-]OC1CC1", "Peroxide2"],
979 ["C(=O)OO", "O=COO", "O=CO[O-]", "Peroxide1"],
980 [
981 "Brc1cc(O)cc(Br)c1",
982 "Oc1cc(Br)cc(Br)c1",
983 "[O-]c1cc(Br)cc(Br)c1",
984 "Phenol",
985 ],
986 [
987 "CC(=O)c1ccc(S)cc1",
988 "CC(=O)c1ccc(S)cc1",
989 "CC(=O)c1ccc([S-])cc1",
990 "Phenyl_Thiol",
991 ],
992 [
993 "C=CCOc1ccc(C(=O)O)cc1",
994 "C=CCOc1ccc(C(=O)O)cc1",
995 "C=CCOc1ccc(C(=O)[O-])cc1",
996 "Phenyl_carboxyl",
997 ],
998 ["COP(=O)(O)OC", "COP(=O)(O)OC", "COP(=O)([O-])OC", "Phosphate_diester"],
999 ["CP(C)(=O)O", "CP(C)(=O)O", "CP(C)(=O)[O-]", "Phosphinic_acid"],
1000 [
1001 "CC(C)OP(C)(=O)O",
1002 "CC(C)OP(C)(=O)O",
1003 "CC(C)OP(C)(=O)[O-]",
1004 "Phosphonate_ester",
1005 ],
1006 [
1007 "CC1(C)OC(=O)NC1=O",
1008 "CC1(C)OC(=O)NC1=O",
1009 "CC1(C)OC(=O)[N-]C1=O",
1010 "Ringed_imide1",
1011 ],
1012 ["O=C(N1)C=CC1=O", "O=C1C=CC(=O)N1", "O=C1C=CC(=O)[N-]1", "Ringed_imide2"],
1013 ["O=S(OC)(O)=O", "COS(=O)(=O)O", "COS(=O)(=O)[O-]", "Sulfate"],
1014 [
1015 "COc1ccc(S(=O)O)cc1",
1016 "COc1ccc(S(=O)O)cc1",
1017 "COc1ccc(S(=O)[O-])cc1",
1018 "Sulfinic_acid",
1019 ],
1020 ["CS(N)(=O)=O", "CS(N)(=O)=O", "CS([NH-])(=O)=O", "Sulfonamide"],
1021 [
1022 "CC(=O)CSCCS(O)(=O)=O",
1023 "CC(=O)CSCCS(=O)(=O)O",
1024 "CC(=O)CSCCS(=O)(=O)[O-]",
1025 "Sulfonate",
1026 ],
1027 ["CC(=O)S", "CC(=O)S", "CC(=O)[S-]", "Thioic_acid"],
1028 ["C(C)(C)(C)(S)", "CC(C)(C)S", "CC(C)(C)[S-]", "Thiol"],
1029 [
1030 "Brc1cc[nH+]cc1",
1031 "Brc1cc[nH+]cc1",
1032 "Brc1ccncc1",
1033 "Aromatic_nitrogen_unprotonated",
1034 ],
1035 [
1036 "C=C(O)c1c(C)cc(C)cc1C",
1037 "C=C(O)c1c(C)cc(C)cc1C",
1038 "C=C([O-])c1c(C)cc(C)cc1C",
1039 "Vinyl_alcohol",
1040 ],
1041 ["CC(=O)ON", "CC(=O)O[NH3+]", "CC(=O)ON", "Primary_hydroxyl_amine"],
879 ] 1042 ]
880 1043
881 smis_phos = [ 1044 smis_phos = [
882 ["O=P(O)(O)OCCCC", "CCCCOP(=O)(O)O", "CCCCOP(=O)([O-])O", "CCCCOP(=O)([O-])[O-]", "Phosphate"], 1045 [
883 ["CC(P(O)(O)=O)C", "CC(C)P(=O)(O)O", "CC(C)P(=O)([O-])O", "CC(C)P(=O)([O-])[O-]", "Phosphonate"] 1046 "O=P(O)(O)OCCCC",
1047 "CCCCOP(=O)(O)O",
1048 "CCCCOP(=O)([O-])O",
1049 "CCCCOP(=O)([O-])[O-]",
1050 "Phosphate",
1051 ],
1052 [
1053 "CC(P(O)(O)=O)C",
1054 "CC(C)P(=O)(O)O",
1055 "CC(C)P(=O)([O-])O",
1056 "CC(C)P(=O)([O-])[O-]",
1057 "Phosphonate",
1058 ],
884 ] 1059 ]
885 1060
886 # Load the average pKa values. 1061 # Load the average pKa values.
887 average_pkas = {l.split()[0].replace("*", ""):float(l.split()[3]) for l in open("site_substructures.smarts") if l.split()[0] not in ["Phosphate", "Phosphonate"]} 1062 average_pkas = {
888 average_pkas_phos = {l.split()[0].replace("*", ""):[float(l.split()[3]), float(l.split()[6])] for l in open("site_substructures.smarts") if l.split()[0] in ["Phosphate", "Phosphonate"]} 1063 l.split()[0].replace("*", ""): float(l.split()[3])
1064 for l in open("site_substructures.smarts")
1065 if l.split()[0] not in ["Phosphate", "Phosphonate"]
1066 }
1067 average_pkas_phos = {
1068 l.split()[0].replace("*", ""): [float(l.split()[3]), float(l.split()[6])]
1069 for l in open("site_substructures.smarts")
1070 if l.split()[0] in ["Phosphate", "Phosphonate"]
1071 }
889 1072
890 print("Running Tests") 1073 print("Running Tests")
891 print("=============") 1074 print("=============")
892 print("") 1075 print("")
893 1076
898 args = { 1081 args = {
899 "min_ph": -10000000, 1082 "min_ph": -10000000,
900 "max_ph": -10000000, 1083 "max_ph": -10000000,
901 "pka_precision": 0.5, 1084 "pka_precision": 0.5,
902 "smiles": "", 1085 "smiles": "",
903 "label_states": True 1086 "label_states": True,
904 } 1087 }
905 1088
906 for smi, protonated, deprotonated, category in smis: 1089 for smi, protonated, deprotonated, category in smis:
907 args["smiles"] = smi 1090 args["smiles"] = smi
908 TestFuncs.test_check(args, [protonated], ["PROTONATED"]) 1091 TestFuncs.test_check(args, [protonated], ["PROTONATED"])
952 1135
953 avg_pka = average_pkas_phos[category][1] 1136 avg_pka = average_pkas_phos[category][1]
954 args["min_ph"] = avg_pka 1137 args["min_ph"] = avg_pka
955 args["max_ph"] = avg_pka 1138 args["max_ph"] = avg_pka
956 1139
957 TestFuncs.test_check(args, [mix, deprotonated], ["DEPROTONATED", "DEPROTONATED"]) 1140 TestFuncs.test_check(
958 1141 args, [mix, deprotonated], ["DEPROTONATED", "DEPROTONATED"]
959 avg_pka = 0.5 * (average_pkas_phos[category][0] + average_pkas_phos[category][1]) 1142 )
1143
1144 avg_pka = 0.5 * (
1145 average_pkas_phos[category][0] + average_pkas_phos[category][1]
1146 )
960 args["min_ph"] = avg_pka 1147 args["min_ph"] = avg_pka
961 args["max_ph"] = avg_pka 1148 args["max_ph"] = avg_pka
962 args["pka_precision"] = 5 # Should give all three 1149 args["pka_precision"] = 5 # Should give all three
963 1150
964 TestFuncs.test_check(args, [mix, deprotonated, protonated], ["BOTH", "BOTH"]) 1151 TestFuncs.test_check(
1152 args, [mix, deprotonated, protonated], ["BOTH", "BOTH"]
1153 )
965 1154
966 @staticmethod 1155 @staticmethod
967 def test_check(args, expected_output, labels): 1156 def test_check(args, expected_output, labels):
968 """Tests most ionizable groups. The ones that can only loose or gain a single proton. 1157 """Tests most ionizable groups. The ones that can only loose or gain a single proton.
969 1158
979 output = list(Protonate(args)) 1168 output = list(Protonate(args))
980 output = [o.split() for o in output] 1169 output = [o.split() for o in output]
981 1170
982 num_states = len(expected_output) 1171 num_states = len(expected_output)
983 1172
984 if (len(output) != num_states): 1173 if len(output) != num_states:
985 msg = args["smiles"] + " should have " + str(num_states) + \ 1174 msg = (
986 " states at at pH " + str(args["min_ph"]) + ": " + str(output) 1175 args["smiles"]
1176 + " should have "
1177 + str(num_states)
1178 + " states at at pH "
1179 + str(args["min_ph"])
1180 + ": "
1181 + str(output)
1182 )
987 print(msg) 1183 print(msg)
988 raise Exception(msg) 1184 raise Exception(msg)
989 1185
990 if (len(set([l[0] for l in output]) - set(expected_output)) != 0): 1186 if len(set([l[0] for l in output]) - set(expected_output)) != 0:
991 msg = args["smiles"] + " is not " + " AND ".join(expected_output) + \ 1187 msg = (
992 " at pH " + str(args["min_ph"]) + " - " + str(args["max_ph"]) + \ 1188 args["smiles"]
993 "; it is " + " AND ".join([l[0] for l in output]) 1189 + " is not "
1190 + " AND ".join(expected_output)
1191 + " at pH "
1192 + str(args["min_ph"])
1193 + " - "
1194 + str(args["max_ph"])
1195 + "; it is "
1196 + " AND ".join([l[0] for l in output])
1197 )
994 print(msg) 1198 print(msg)
995 raise Exception(msg) 1199 raise Exception(msg)
996 1200
997 if (len(set([l[1] for l in output]) - set(labels)) != 0): 1201 if len(set([l[1] for l in output]) - set(labels)) != 0:
998 msg = args["smiles"] + " not labeled as " + " AND ".join(labels) + \ 1202 msg = (
999 "; it is " + " AND ".join([l[1] for l in output]) 1203 args["smiles"]
1204 + " not labeled as "
1205 + " AND ".join(labels)
1206 + "; it is "
1207 + " AND ".join([l[1] for l in output])
1208 )
1000 print(msg) 1209 print(msg)
1001 raise Exception(msg) 1210 raise Exception(msg)
1002 1211
1003 ph_range = sorted(list(set([args["min_ph"], args["max_ph"]]))) 1212 ph_range = sorted(list(set([args["min_ph"], args["max_ph"]])))
1004 ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")" 1213 ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")"
1005 print("(CORRECT) " + ph_range_str.ljust(10) + " " + args["smiles"] + " => " + " AND ".join([l[0] for l in output])) 1214 print(
1215 "(CORRECT) "
1216 + ph_range_str.ljust(10)
1217 + " "
1218 + args["smiles"]
1219 + " => "
1220 + " AND ".join([l[0] for l in output])
1221 )
1222
1006 1223
1007 def run(**kwargs): 1224 def run(**kwargs):
1008 """A helpful, importable function for those who want to call Dimorphite-DL 1225 """A helpful, importable function for those who want to call Dimorphite-DL
1009 from another Python script rather than the command line. Note that this 1226 from another Python script rather than the command line. Note that this
1010 function accepts keyword arguments that match the command-line parameters 1227 function accepts keyword arguments that match the command-line parameters
1016 :type kwargs: dict 1233 :type kwargs: dict
1017 """ 1234 """
1018 1235
1019 # Run the main function with the specified arguments. 1236 # Run the main function with the specified arguments.
1020 main(kwargs) 1237 main(kwargs)
1238
1021 1239
1022 def run_with_mol_list(mol_lst, **kwargs): 1240 def run_with_mol_list(mol_lst, **kwargs):
1023 """A helpful, importable function for those who want to call Dimorphite-DL 1241 """A helpful, importable function for those who want to call Dimorphite-DL
1024 from another Python script rather than the command line. Note that this 1242 from another Python script rather than the command line. Note that this
1025 function is for passing Dimorphite-DL a list of RDKit Mol objects, together 1243 function is for passing Dimorphite-DL a list of RDKit Mol objects, together
1035 """ 1253 """
1036 1254
1037 # Do a quick check to make sure the user input makes sense. 1255 # Do a quick check to make sure the user input makes sense.
1038 for bad_arg in ["smiles", "smiles_file", "output_file", "test"]: 1256 for bad_arg in ["smiles", "smiles_file", "output_file", "test"]:
1039 if bad_arg in kwargs: 1257 if bad_arg in kwargs:
1040 msg = "You're using Dimorphite-DL's run_with_mol_list(mol_lst, " + \ 1258 msg = (
1041 "**kwargs) function, but you also passed the \"" + \ 1259 "You're using Dimorphite-DL's run_with_mol_list(mol_lst, "
1042 bad_arg + "\" argument. Did you mean to use the " + \ 1260 + '**kwargs) function, but you also passed the "'
1043 "run(**kwargs) function instead?" 1261 + bad_arg
1262 + '" argument. Did you mean to use the '
1263 + "run(**kwargs) function instead?"
1264 )
1044 print(msg) 1265 print(msg)
1045 raise Exception(msg) 1266 raise Exception(msg)
1046 1267
1047 # Set the return_as_list flag so main() will return the protonated smiles 1268 # Set the return_as_list flag so main() will return the protonated smiles
1048 # as a list. 1269 # as a list.
1074 m.SetBoolProp(prop, val) 1295 m.SetBoolProp(prop, val)
1075 else: 1296 else:
1076 m.SetProp(prop, str(val)) 1297 m.SetProp(prop, str(val))
1077 mols.append(m) 1298 mols.append(m)
1078 else: 1299 else:
1079 UtilFuncs.eprint("WARNING: Could not process molecule with SMILES string " + s + " and properties " + str(props)) 1300 UtilFuncs.eprint(
1301 "WARNING: Could not process molecule with SMILES string "
1302 + s
1303 + " and properties "
1304 + str(props)
1305 )
1080 1306
1081 return mols 1307 return mols
1308
1082 1309
1083 if __name__ == "__main__": 1310 if __name__ == "__main__":
1084 main() 1311 main()