# HG changeset patch
# User bgruening
# Date 1638636023 0
# Node ID 67ee76f0e497c7c51d680cf673e4a9657367a0f9
# Parent bbbf5fb356dd0b27622da91f168f1107bab7a4f8
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
diff -r bbbf5fb356dd -r 67ee76f0e497 dimorphite_dl.py
--- a/dimorphite_dl.py Wed Feb 17 12:59:24 2021 +0000
+++ b/dimorphite_dl.py Sat Dec 04 16:40:23 2021 +0000
@@ -19,8 +19,9 @@
"""
from __future__ import print_function
+
+import argparse
import os
-import argparse
import sys
try:
@@ -43,11 +44,12 @@
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
-except:
+except Exception:
msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/"
print(msg)
raise Exception(msg)
+
def main(params=None):
"""The main definition run when you call the script from the commandline.
@@ -84,13 +86,14 @@
with open(args["output_file"], "w") as file:
for protonated_smi in Protonate(args):
file.write(protonated_smi + "\n")
- elif "return_as_list" in args and args["return_as_list"] == True:
+ elif "return_as_list" in args and args["return_as_list"]:
return list(Protonate(args))
else:
# No output file specified. Just print it to the screen.
for protonated_smi in Protonate(args):
print(protonated_smi)
+
class MyParser(argparse.ArgumentParser):
"""Overwrite default parse so it displays help file on error. See
https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu"""
@@ -117,15 +120,18 @@
if file is None:
file = sys.stdout
self._print_message(self.format_help(), file)
- print("""
+ print(
+ """
examples:
python dimorphite_dl.py --smiles_file sample_molecules.smi
python dimorphite_dl.py --smiles "CCC(=O)O" --min_ph -3.0 --max_ph -2.0
python dimorphite_dl.py --smiles "CCCN" --min_ph -3.0 --max_ph -2.0 --output_file output.smi
python dimorphite_dl.py --smiles_file sample_molecules.smi --pka_precision 2.0 --label_states
- python dimorphite_dl.py --test""")
+ python dimorphite_dl.py --test"""
+ )
print("")
+
class ArgParseFuncs:
"""A namespace for storing functions that are useful for processing
command-line arguments. To keep things organized."""
@@ -137,27 +143,57 @@
:return: A parser object.
"""
- parser = MyParser(description="Dimorphite 1.2: Creates models of " +
- "appropriately protonated small moleucles. " +
- "Apache 2.0 License. Copyright 2018 Jacob D. " +
- "Durrant.")
- parser.add_argument('--min_ph', metavar='MIN', type=float, default=6.4,
- help='minimum pH to consider (default: 6.4)')
- parser.add_argument('--max_ph', metavar='MAX', type=float, default=8.4,
- help='maximum pH to consider (default: 8.4)')
- parser.add_argument('--pka_precision', metavar='PRE', type=float, default=1.0,
- help='pKa precision factor (number of standard devations, default: 1.0)')
- parser.add_argument('--smiles', metavar='SMI', type=str,
- help='SMILES string to protonate')
- parser.add_argument('--smiles_file', metavar="FILE", type=str,
- help='file that contains SMILES strings to protonate')
- parser.add_argument('--output_file', metavar="FILE", type=str,
- help='output file to write protonated SMILES (optional)')
- parser.add_argument('--label_states', action="store_true",
- help='label protonated SMILES with target state ' + \
- '(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").')
- parser.add_argument('--test', action="store_true",
- help='run unit tests (for debugging)')
+ parser = MyParser(
+ description="Dimorphite 1.2: Creates models of "
+ + "appropriately protonated small moleucles. "
+ + "Apache 2.0 License. Copyright 2018 Jacob D. "
+ + "Durrant."
+ )
+ parser.add_argument(
+ "--min_ph",
+ metavar="MIN",
+ type=float,
+ default=6.4,
+ help="minimum pH to consider (default: 6.4)",
+ )
+ parser.add_argument(
+ "--max_ph",
+ metavar="MAX",
+ type=float,
+ default=8.4,
+ help="maximum pH to consider (default: 8.4)",
+ )
+ parser.add_argument(
+ "--pka_precision",
+ metavar="PRE",
+ type=float,
+ default=1.0,
+ help="pKa precision factor (number of standard devations, default: 1.0)",
+ )
+ parser.add_argument(
+ "--smiles", metavar="SMI", type=str, help="SMILES string to protonate"
+ )
+ parser.add_argument(
+ "--smiles_file",
+ metavar="FILE",
+ type=str,
+ help="file that contains SMILES strings to protonate",
+ )
+ parser.add_argument(
+ "--output_file",
+ metavar="FILE",
+ type=str,
+ help="output file to write protonated SMILES (optional)",
+ )
+ parser.add_argument(
+ "--label_states",
+ action="store_true",
+ help="label protonated SMILES with target state "
+ + '(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").',
+ )
+ parser.add_argument(
+ "--test", action="store_true", help="run unit tests (for debugging)"
+ )
return parser
@@ -170,11 +206,13 @@
:raises Exception: No SMILES in params.
"""
- defaults = {'min_ph' : 6.4,
- 'max_ph' : 8.4,
- 'pka_precision' : 1.0,
- 'label_states' : False,
- 'test' : False}
+ defaults = {
+ "min_ph": 6.4,
+ "max_ph": 8.4,
+ "pka_precision": 1.0,
+ "label_states": False,
+ "test": False,
+ }
for key in defaults:
if key not in args:
@@ -194,12 +232,13 @@
# object.
if "smiles" in args:
if isinstance(args["smiles"], str):
- args["smiles_file"] = StringIO(args["smiles"])
+ args["smiles_file"] = StringIO(args["smiles"])
args["smiles_and_data"] = LoadSMIFile(args["smiles_file"])
return args
+
class UtilFuncs:
"""A namespace to store functions for manipulating mol objects. To keep
things organized."""
@@ -215,15 +254,33 @@
# Get the reaction data
rxn_data = [
- ['[Ov1-1:1]', '[Ov2+0:1]-[H]'], # To handle O- bonded to only one atom (add hydrogen).
- ['[#7v4+1:1]-[H]', '[#7v3+0:1]'], # To handle N+ bonded to a hydrogen (remove hydrogen).
- ['[Ov2-:1]', '[Ov2+0:1]'], # To handle O- bonded to two atoms. Should not be Negative.
- ['[#7v3+1:1]', '[#7v3+0:1]'], # To handle N+ bonded to three atoms. Should not be positive.
- ['[#7v2-1:1]', '[#7+0:1]-[H]'], # To handle N- Bonded to two atoms. Add hydrogen.
+ [
+ "[Ov1-1:1]",
+ "[Ov2+0:1]-[H]",
+ ], # To handle O- bonded to only one atom (add hydrogen).
+ [
+ "[#7v4+1:1]-[H]",
+ "[#7v3+0:1]",
+ ], # To handle N+ bonded to a hydrogen (remove hydrogen).
+ [
+ "[Ov2-:1]",
+ "[Ov2+0:1]",
+ ], # To handle O- bonded to two atoms. Should not be Negative.
+ [
+ "[#7v3+1:1]",
+ "[#7v3+0:1]",
+ ], # To handle N+ bonded to three atoms. Should not be positive.
+ [
+ "[#7v2-1:1]",
+ "[#7+0:1]-[H]",
+ ], # To handle N- Bonded to two atoms. Add hydrogen.
# ['[N:1]=[N+0:2]=[N:3]-[H]', '[N:1]=[N+1:2]=[N+0:3]-[H]'], # To
# handle bad azide. Must be protonated. (Now handled elsewhere, before
# SMILES converted to Mol object.)
- ['[H]-[N:1]-[N:2]#[N:3]', '[N:1]=[N+1:2]=[N:3]-[H]'] # To handle bad azide. R-N-N#N should be R-N=[N+]=N
+ [
+ "[H]-[N:1]-[N:2]#[N:3]",
+ "[N:1]=[N+1:2]=[N:3]-[H]",
+ ], # To handle bad azide. R-N-N#N should be R-N=[N+]=N
]
# Add substructures and reactions (initially none)
@@ -241,10 +298,15 @@
current_rxn_str = None
for i, rxn_datum in enumerate(rxn_data):
- reactant_smarts, product_smarts, substruct_match_mol, rxn_placeholder = rxn_datum
+ (
+ reactant_smarts,
+ product_smarts,
+ substruct_match_mol,
+ rxn_placeholder,
+ ) = rxn_datum
if mol.HasSubstructMatch(substruct_match_mol):
if rxn_placeholder is None:
- current_rxn_str = reactant_smarts + '>>' + product_smarts
+ current_rxn_str = reactant_smarts + ">>" + product_smarts
current_rxn = AllChem.ReactionFromSmarts(current_rxn_str)
rxn_data[i][3] = current_rxn # Update the placeholder.
else:
@@ -262,10 +324,10 @@
# to resanitize them. Make sure aromatic rings are shown as such This
# catches all RDKit Errors. without the catchError and sanitizeOps the
# Chem.SanitizeMol can crash the program.
- sanitize_string = Chem.SanitizeMol(
+ sanitize_string = Chem.SanitizeMol(
mol,
sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL,
- catchErrors = True
+ catchErrors=True,
)
return mol if sanitize_string.name == "SANITIZE_NONE" else None
@@ -321,6 +383,7 @@
print(*args, file=sys.stderr, **kwargs)
+
class LoadSMIFile(object):
"""A generator class for loading in the SMILES strings from a file, one at
a time."""
@@ -388,37 +451,43 @@
# into a canonical form. Filter if failed.
mol = UtilFuncs.convert_smiles_str_to_mol(smiles_str)
if mol is None:
- UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line)
+ UtilFuncs.eprint(
+ "WARNING: Skipping poorly formed SMILES string: " + line
+ )
return self.next()
# Handle nuetralizing the molecules. Filter if failed.
mol = UtilFuncs.neutralize_mol(mol)
if mol is None:
- UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line)
+ UtilFuncs.eprint(
+ "WARNING: Skipping poorly formed SMILES string: " + line
+ )
return self.next()
# Remove the hydrogens.
try:
mol = Chem.RemoveHs(mol)
- except:
- UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line)
+ except Exception:
+ UtilFuncs.eprint(
+ "WARNING: Skipping poorly formed SMILES string: " + line
+ )
return self.next()
if mol is None:
- UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line)
+ UtilFuncs.eprint(
+ "WARNING: Skipping poorly formed SMILES string: " + line
+ )
return self.next()
# Regenerate the smiles string (to standardize).
new_mol_string = Chem.MolToSmiles(mol, isomericSmiles=True)
- return {
- "smiles": new_mol_string,
- "data": splits[1:]
- }
+ return {"smiles": new_mol_string, "data": splits[1:]}
else:
# Blank line? Go to next one.
return self.next()
+
class Protonate(object):
"""A generator class for protonating SMILES strings, one at a time."""
@@ -491,8 +560,8 @@
smi = smile_and_datum["smiles"]
data = smile_and_datum["data"] # Everything on SMILES line but the
- # SMILES string itself (e.g., the
- # molecule name).
+ # SMILES string itself (e.g., the
+ # molecule name).
# Collect the data associated with this smiles (e.g., the molecule
# name).
@@ -516,8 +585,8 @@
# Only add new smiles if not already in the list.
# for s in new_smis_to_perhaps_add:
- # if not s in new_smis:
- # new_smis.append(s)
+ # if not s in new_smis:
+ # new_smis.append(s)
# In some cases, the script might generate redundant molecules.
# Phosphonates, when the pH is between the two pKa values and the
@@ -532,7 +601,9 @@
# Sometimes Dimorphite-DL generates molecules that aren't actually
# possible. Simply convert these to mol objects to eliminate the bad
# ones (that are None).
- new_smis = [s for s in new_smis if UtilFuncs.convert_smiles_str_to_mol(s) is not None]
+ new_smis = [
+ s for s in new_smis if UtilFuncs.convert_smiles_str_to_mol(s) is not None
+ ]
# If there are no smi left, return the input one at the very least.
# All generated forms have apparently been judged
@@ -543,7 +614,7 @@
# If the user wants to see the target states, add those
# to the ends of each line.
if self.args["label_states"]:
- states = '\t'.join([x[1] for x in sites])
+ states = "\t".join([x[1] for x in sites])
new_lines = [x + "\t" + tag + "\t" + states for x in new_smis]
else:
new_lines = [x + "\t" + tag for x in new_smis]
@@ -552,12 +623,15 @@
return self.next()
+
class ProtSubstructFuncs:
"""A namespace to store functions for loading the substructures that can
be protonated. To keep things organized."""
@staticmethod
- def load_protonation_substructs_calc_state_for_ph(min_ph=6.4, max_ph=8.4, pka_std_range=1):
+ def load_protonation_substructs_calc_state_for_ph(
+ min_ph=6.4, max_ph=8.4, pka_std_range=1
+ ):
"""A pre-calculated list of R-groups with protonation sites, with their
likely pKa bins.
@@ -573,7 +647,7 @@
pwd = os.path.dirname(os.path.realpath(__file__))
site_structures_file = "{}/{}".format(pwd, "site_substructures.smarts")
- with open(site_structures_file, 'r') as substruct:
+ with open(site_structures_file, "r") as substruct:
for line in substruct:
line = line.strip()
sub = {}
@@ -584,7 +658,9 @@
sub["mol"] = Chem.MolFromSmarts(sub["smart"])
# NEED TO DIVIDE THIS BY 3s
- pka_ranges = [splits[i:i+3] for i in range(2, len(splits)-1, 3)]
+ pka_ranges = [
+ splits[i : i + 3] for i in range(2, len(splits) - 1, 3)
+ ]
prot = []
for pka_range in pka_ranges:
@@ -620,11 +696,11 @@
# This needs to be reassigned, and 'ERROR' should never make it past the
# next set of checks.
if min_pka <= max_ph and min_ph <= max_pka:
- protonation_state = 'BOTH'
+ protonation_state = "BOTH"
elif mean > max_ph:
- protonation_state = 'PROTONATED'
+ protonation_state = "PROTONATED"
else:
- protonation_state = 'DEPROTONATED'
+ protonation_state = "DEPROTONATED"
return protonation_state
@@ -650,8 +726,8 @@
# Try to Add hydrogens. if failed return []
try:
- mol = Chem.AddHs(mol)
- except:
+ mol = Chem.AddHs(mol)
+ except Exception:
UtilFuncs.eprint("ERROR: ", smi)
return []
@@ -701,14 +777,14 @@
# Initialize the output list
output_smis = []
- state_to_charge = {"DEPROTONATED": [-1],
- "PROTONATED": [0],
- "BOTH": [-1, 0]}
+ state_to_charge = {"DEPROTONATED": [-1], "PROTONATED": [0], "BOTH": [-1, 0]}
charges = state_to_charge[target_prot_state]
# Now make the actual smiles match the target protonation state.
- output_smis = ProtSubstructFuncs.set_protonation_charge(smis, idx, charges, prot_site_name)
+ output_smis = ProtSubstructFuncs.set_protonation_charge(
+ smis, idx, charges, prot_site_name
+ )
return output_smis
@@ -759,11 +835,12 @@
atom.SetFormalCharge(charge)
# Convert back to SMILE and add to output
- out_smile = Chem.MolToSmiles(mol, isomericSmiles=True,canonical=True)
+ out_smile = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)
output.append(out_smile)
return output
+
class ProtectUnprotectFuncs:
"""A namespace for storing functions that are useful for protecting and
unprotecting molecules. To keep things organized. We need to identify and
@@ -779,7 +856,7 @@
"""
for atom in mol.GetAtoms():
- atom.SetProp('_protected', '0')
+ atom.SetProp("_protected", "0")
@staticmethod
def protect_molecule(mol, match):
@@ -793,7 +870,7 @@
for idx in match:
atom = mol.GetAtomWithIdx(idx)
- atom.SetProp('_protected', '1')
+ atom.SetProp("_protected", "1")
@staticmethod
def get_unprotected_matches(mol, substruct):
@@ -829,6 +906,7 @@
return False
return True
+
class TestFuncs:
"""A namespace for storing functions that perform tests on the code. To
keep things organized."""
@@ -839,53 +917,158 @@
smis = [
# [input smiles, pka, protonated, deprotonated, category]
- ["C#CCO", "C#CCO", "C#CC[O-]", "Alcohol"],
- ["C(=O)N", "NC=O", "[NH-]C=O", "Amide"],
- ["CC(=O)NOC(C)=O", "CC(=O)NOC(C)=O", "CC(=O)[N-]OC(C)=O", "Amide_electronegative"],
- ["COC(=N)N", "COC(N)=[NH2+]", "COC(=N)N", "AmidineGuanidine2"],
- ["Brc1ccc(C2NCCS2)cc1", "Brc1ccc(C2[NH2+]CCS2)cc1", "Brc1ccc(C2NCCS2)cc1", "Amines_primary_secondary_tertiary"],
- ["CC(=O)[n+]1ccc(N)cc1", "CC(=O)[n+]1ccc([NH3+])cc1", "CC(=O)[n+]1ccc(N)cc1", "Anilines_primary"],
- ["CCNc1ccccc1", "CC[NH2+]c1ccccc1", "CCNc1ccccc1", "Anilines_secondary"],
- ["Cc1ccccc1N(C)C", "Cc1ccccc1[NH+](C)C", "Cc1ccccc1N(C)C", "Anilines_tertiary"],
- ["BrC1=CC2=C(C=C1)NC=C2", "Brc1ccc2[nH]ccc2c1", "Brc1ccc2[n-]ccc2c1", "Indole_pyrrole"],
- ["O=c1cc[nH]cc1", "O=c1cc[nH]cc1", "O=c1cc[n-]cc1", "Aromatic_nitrogen_protonated"],
- ["C-N=[N+]=[N@H]", "CN=[N+]=N", "CN=[N+]=[N-]", "Azide"],
- ["BrC(C(O)=O)CBr", "O=C(O)C(Br)CBr", "O=C([O-])C(Br)CBr", "Carboxyl"],
- ["NC(NN=O)=N", "NC(=[NH2+])NN=O", "N=C(N)NN=O", "AmidineGuanidine1"],
- ["C(F)(F)(F)C(=O)NC(=O)C", "CC(=O)NC(=O)C(F)(F)F", "CC(=O)[N-]C(=O)C(F)(F)F", "Imide"],
- ["O=C(C)NC(C)=O", "CC(=O)NC(C)=O", "CC(=O)[N-]C(C)=O", "Imide2"],
- ["CC(C)(C)C(N(C)O)=O", "CN(O)C(=O)C(C)(C)C", "CN([O-])C(=O)C(C)(C)C", "N-hydroxyamide"],
- ["C[N+](O)=O", "C[N+](=O)O", "C[N+](=O)[O-]", "Nitro"],
- ["O=C1C=C(O)CC1", "O=C1C=C(O)CC1", "O=C1C=C([O-])CC1", "O=C-C=C-OH"],
- ["C1CC1OO", "OOC1CC1", "[O-]OC1CC1", "Peroxide2"],
- ["C(=O)OO", "O=COO", "O=CO[O-]", "Peroxide1"],
- ["Brc1cc(O)cc(Br)c1", "Oc1cc(Br)cc(Br)c1", "[O-]c1cc(Br)cc(Br)c1", "Phenol"],
- ["CC(=O)c1ccc(S)cc1", "CC(=O)c1ccc(S)cc1", "CC(=O)c1ccc([S-])cc1", "Phenyl_Thiol"],
- ["C=CCOc1ccc(C(=O)O)cc1", "C=CCOc1ccc(C(=O)O)cc1", "C=CCOc1ccc(C(=O)[O-])cc1", "Phenyl_carboxyl"],
- ["COP(=O)(O)OC", "COP(=O)(O)OC", "COP(=O)([O-])OC", "Phosphate_diester"],
- ["CP(C)(=O)O", "CP(C)(=O)O", "CP(C)(=O)[O-]", "Phosphinic_acid"],
- ["CC(C)OP(C)(=O)O", "CC(C)OP(C)(=O)O", "CC(C)OP(C)(=O)[O-]", "Phosphonate_ester"],
- ["CC1(C)OC(=O)NC1=O", "CC1(C)OC(=O)NC1=O", "CC1(C)OC(=O)[N-]C1=O", "Ringed_imide1"],
- ["O=C(N1)C=CC1=O", "O=C1C=CC(=O)N1", "O=C1C=CC(=O)[N-]1", "Ringed_imide2"],
- ["O=S(OC)(O)=O", "COS(=O)(=O)O", "COS(=O)(=O)[O-]", "Sulfate"],
- ["COc1ccc(S(=O)O)cc1", "COc1ccc(S(=O)O)cc1", "COc1ccc(S(=O)[O-])cc1", "Sulfinic_acid"],
- ["CS(N)(=O)=O", "CS(N)(=O)=O", "CS([NH-])(=O)=O", "Sulfonamide"],
- ["CC(=O)CSCCS(O)(=O)=O", "CC(=O)CSCCS(=O)(=O)O", "CC(=O)CSCCS(=O)(=O)[O-]", "Sulfonate"],
- ["CC(=O)S", "CC(=O)S", "CC(=O)[S-]", "Thioic_acid"],
- ["C(C)(C)(C)(S)", "CC(C)(C)S", "CC(C)(C)[S-]", "Thiol"],
- ["Brc1cc[nH+]cc1", "Brc1cc[nH+]cc1", "Brc1ccncc1", "Aromatic_nitrogen_unprotonated"],
- ["C=C(O)c1c(C)cc(C)cc1C", "C=C(O)c1c(C)cc(C)cc1C", "C=C([O-])c1c(C)cc(C)cc1C", "Vinyl_alcohol"],
- ["CC(=O)ON", "CC(=O)O[NH3+]", "CC(=O)ON", "Primary_hydroxyl_amine"]
+ ["C#CCO", "C#CCO", "C#CC[O-]", "Alcohol"],
+ ["C(=O)N", "NC=O", "[NH-]C=O", "Amide"],
+ [
+ "CC(=O)NOC(C)=O",
+ "CC(=O)NOC(C)=O",
+ "CC(=O)[N-]OC(C)=O",
+ "Amide_electronegative",
+ ],
+ ["COC(=N)N", "COC(N)=[NH2+]", "COC(=N)N", "AmidineGuanidine2"],
+ [
+ "Brc1ccc(C2NCCS2)cc1",
+ "Brc1ccc(C2[NH2+]CCS2)cc1",
+ "Brc1ccc(C2NCCS2)cc1",
+ "Amines_primary_secondary_tertiary",
+ ],
+ [
+ "CC(=O)[n+]1ccc(N)cc1",
+ "CC(=O)[n+]1ccc([NH3+])cc1",
+ "CC(=O)[n+]1ccc(N)cc1",
+ "Anilines_primary",
+ ],
+ ["CCNc1ccccc1", "CC[NH2+]c1ccccc1", "CCNc1ccccc1", "Anilines_secondary"],
+ [
+ "Cc1ccccc1N(C)C",
+ "Cc1ccccc1[NH+](C)C",
+ "Cc1ccccc1N(C)C",
+ "Anilines_tertiary",
+ ],
+ [
+ "BrC1=CC2=C(C=C1)NC=C2",
+ "Brc1ccc2[nH]ccc2c1",
+ "Brc1ccc2[n-]ccc2c1",
+ "Indole_pyrrole",
+ ],
+ [
+ "O=c1cc[nH]cc1",
+ "O=c1cc[nH]cc1",
+ "O=c1cc[n-]cc1",
+ "Aromatic_nitrogen_protonated",
+ ],
+ ["C-N=[N+]=[N@H]", "CN=[N+]=N", "CN=[N+]=[N-]", "Azide"],
+ ["BrC(C(O)=O)CBr", "O=C(O)C(Br)CBr", "O=C([O-])C(Br)CBr", "Carboxyl"],
+ ["NC(NN=O)=N", "NC(=[NH2+])NN=O", "N=C(N)NN=O", "AmidineGuanidine1"],
+ [
+ "C(F)(F)(F)C(=O)NC(=O)C",
+ "CC(=O)NC(=O)C(F)(F)F",
+ "CC(=O)[N-]C(=O)C(F)(F)F",
+ "Imide",
+ ],
+ ["O=C(C)NC(C)=O", "CC(=O)NC(C)=O", "CC(=O)[N-]C(C)=O", "Imide2"],
+ [
+ "CC(C)(C)C(N(C)O)=O",
+ "CN(O)C(=O)C(C)(C)C",
+ "CN([O-])C(=O)C(C)(C)C",
+ "N-hydroxyamide",
+ ],
+ ["C[N+](O)=O", "C[N+](=O)O", "C[N+](=O)[O-]", "Nitro"],
+ ["O=C1C=C(O)CC1", "O=C1C=C(O)CC1", "O=C1C=C([O-])CC1", "O=C-C=C-OH"],
+ ["C1CC1OO", "OOC1CC1", "[O-]OC1CC1", "Peroxide2"],
+ ["C(=O)OO", "O=COO", "O=CO[O-]", "Peroxide1"],
+ [
+ "Brc1cc(O)cc(Br)c1",
+ "Oc1cc(Br)cc(Br)c1",
+ "[O-]c1cc(Br)cc(Br)c1",
+ "Phenol",
+ ],
+ [
+ "CC(=O)c1ccc(S)cc1",
+ "CC(=O)c1ccc(S)cc1",
+ "CC(=O)c1ccc([S-])cc1",
+ "Phenyl_Thiol",
+ ],
+ [
+ "C=CCOc1ccc(C(=O)O)cc1",
+ "C=CCOc1ccc(C(=O)O)cc1",
+ "C=CCOc1ccc(C(=O)[O-])cc1",
+ "Phenyl_carboxyl",
+ ],
+ ["COP(=O)(O)OC", "COP(=O)(O)OC", "COP(=O)([O-])OC", "Phosphate_diester"],
+ ["CP(C)(=O)O", "CP(C)(=O)O", "CP(C)(=O)[O-]", "Phosphinic_acid"],
+ [
+ "CC(C)OP(C)(=O)O",
+ "CC(C)OP(C)(=O)O",
+ "CC(C)OP(C)(=O)[O-]",
+ "Phosphonate_ester",
+ ],
+ [
+ "CC1(C)OC(=O)NC1=O",
+ "CC1(C)OC(=O)NC1=O",
+ "CC1(C)OC(=O)[N-]C1=O",
+ "Ringed_imide1",
+ ],
+ ["O=C(N1)C=CC1=O", "O=C1C=CC(=O)N1", "O=C1C=CC(=O)[N-]1", "Ringed_imide2"],
+ ["O=S(OC)(O)=O", "COS(=O)(=O)O", "COS(=O)(=O)[O-]", "Sulfate"],
+ [
+ "COc1ccc(S(=O)O)cc1",
+ "COc1ccc(S(=O)O)cc1",
+ "COc1ccc(S(=O)[O-])cc1",
+ "Sulfinic_acid",
+ ],
+ ["CS(N)(=O)=O", "CS(N)(=O)=O", "CS([NH-])(=O)=O", "Sulfonamide"],
+ [
+ "CC(=O)CSCCS(O)(=O)=O",
+ "CC(=O)CSCCS(=O)(=O)O",
+ "CC(=O)CSCCS(=O)(=O)[O-]",
+ "Sulfonate",
+ ],
+ ["CC(=O)S", "CC(=O)S", "CC(=O)[S-]", "Thioic_acid"],
+ ["C(C)(C)(C)(S)", "CC(C)(C)S", "CC(C)(C)[S-]", "Thiol"],
+ [
+ "Brc1cc[nH+]cc1",
+ "Brc1cc[nH+]cc1",
+ "Brc1ccncc1",
+ "Aromatic_nitrogen_unprotonated",
+ ],
+ [
+ "C=C(O)c1c(C)cc(C)cc1C",
+ "C=C(O)c1c(C)cc(C)cc1C",
+ "C=C([O-])c1c(C)cc(C)cc1C",
+ "Vinyl_alcohol",
+ ],
+ ["CC(=O)ON", "CC(=O)O[NH3+]", "CC(=O)ON", "Primary_hydroxyl_amine"],
]
smis_phos = [
- ["O=P(O)(O)OCCCC", "CCCCOP(=O)(O)O", "CCCCOP(=O)([O-])O", "CCCCOP(=O)([O-])[O-]", "Phosphate"],
- ["CC(P(O)(O)=O)C", "CC(C)P(=O)(O)O", "CC(C)P(=O)([O-])O", "CC(C)P(=O)([O-])[O-]", "Phosphonate"]
+ [
+ "O=P(O)(O)OCCCC",
+ "CCCCOP(=O)(O)O",
+ "CCCCOP(=O)([O-])O",
+ "CCCCOP(=O)([O-])[O-]",
+ "Phosphate",
+ ],
+ [
+ "CC(P(O)(O)=O)C",
+ "CC(C)P(=O)(O)O",
+ "CC(C)P(=O)([O-])O",
+ "CC(C)P(=O)([O-])[O-]",
+ "Phosphonate",
+ ],
]
# Load the average pKa values.
- average_pkas = {l.split()[0].replace("*", ""):float(l.split()[3]) for l in open("site_substructures.smarts") if l.split()[0] not in ["Phosphate", "Phosphonate"]}
- average_pkas_phos = {l.split()[0].replace("*", ""):[float(l.split()[3]), float(l.split()[6])] for l in open("site_substructures.smarts") if l.split()[0] in ["Phosphate", "Phosphonate"]}
+ average_pkas = {
+ l.split()[0].replace("*", ""): float(l.split()[3])
+ for l in open("site_substructures.smarts")
+ if l.split()[0] not in ["Phosphate", "Phosphonate"]
+ }
+ average_pkas_phos = {
+ l.split()[0].replace("*", ""): [float(l.split()[3]), float(l.split()[6])]
+ for l in open("site_substructures.smarts")
+ if l.split()[0] in ["Phosphate", "Phosphonate"]
+ }
print("Running Tests")
print("=============")
@@ -900,7 +1083,7 @@
"max_ph": -10000000,
"pka_precision": 0.5,
"smiles": "",
- "label_states": True
+ "label_states": True,
}
for smi, protonated, deprotonated, category in smis:
@@ -954,14 +1137,20 @@
args["min_ph"] = avg_pka
args["max_ph"] = avg_pka
- TestFuncs.test_check(args, [mix, deprotonated], ["DEPROTONATED", "DEPROTONATED"])
+ TestFuncs.test_check(
+ args, [mix, deprotonated], ["DEPROTONATED", "DEPROTONATED"]
+ )
- avg_pka = 0.5 * (average_pkas_phos[category][0] + average_pkas_phos[category][1])
+ avg_pka = 0.5 * (
+ average_pkas_phos[category][0] + average_pkas_phos[category][1]
+ )
args["min_ph"] = avg_pka
args["max_ph"] = avg_pka
args["pka_precision"] = 5 # Should give all three
- TestFuncs.test_check(args, [mix, deprotonated, protonated], ["BOTH", "BOTH"])
+ TestFuncs.test_check(
+ args, [mix, deprotonated, protonated], ["BOTH", "BOTH"]
+ )
@staticmethod
def test_check(args, expected_output, labels):
@@ -981,28 +1170,56 @@
num_states = len(expected_output)
- if (len(output) != num_states):
- msg = args["smiles"] + " should have " + str(num_states) + \
- " states at at pH " + str(args["min_ph"]) + ": " + str(output)
+ if len(output) != num_states:
+ msg = (
+ args["smiles"]
+ + " should have "
+ + str(num_states)
+ + " states at at pH "
+ + str(args["min_ph"])
+ + ": "
+ + str(output)
+ )
print(msg)
raise Exception(msg)
- if (len(set([l[0] for l in output]) - set(expected_output)) != 0):
- msg = args["smiles"] + " is not " + " AND ".join(expected_output) + \
- " at pH " + str(args["min_ph"]) + " - " + str(args["max_ph"]) + \
- "; it is " + " AND ".join([l[0] for l in output])
+ if len(set([l[0] for l in output]) - set(expected_output)) != 0:
+ msg = (
+ args["smiles"]
+ + " is not "
+ + " AND ".join(expected_output)
+ + " at pH "
+ + str(args["min_ph"])
+ + " - "
+ + str(args["max_ph"])
+ + "; it is "
+ + " AND ".join([l[0] for l in output])
+ )
print(msg)
raise Exception(msg)
- if (len(set([l[1] for l in output]) - set(labels)) != 0):
- msg = args["smiles"] + " not labeled as " + " AND ".join(labels) + \
- "; it is " + " AND ".join([l[1] for l in output])
+ if len(set([l[1] for l in output]) - set(labels)) != 0:
+ msg = (
+ args["smiles"]
+ + " not labeled as "
+ + " AND ".join(labels)
+ + "; it is "
+ + " AND ".join([l[1] for l in output])
+ )
print(msg)
raise Exception(msg)
ph_range = sorted(list(set([args["min_ph"], args["max_ph"]])))
ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")"
- print("(CORRECT) " + ph_range_str.ljust(10) + " " + args["smiles"] + " => " + " AND ".join([l[0] for l in output]))
+ print(
+ "(CORRECT) "
+ + ph_range_str.ljust(10)
+ + " "
+ + args["smiles"]
+ + " => "
+ + " AND ".join([l[0] for l in output])
+ )
+
def run(**kwargs):
"""A helpful, importable function for those who want to call Dimorphite-DL
@@ -1019,6 +1236,7 @@
# Run the main function with the specified arguments.
main(kwargs)
+
def run_with_mol_list(mol_lst, **kwargs):
"""A helpful, importable function for those who want to call Dimorphite-DL
from another Python script rather than the command line. Note that this
@@ -1037,10 +1255,13 @@
# Do a quick check to make sure the user input makes sense.
for bad_arg in ["smiles", "smiles_file", "output_file", "test"]:
if bad_arg in kwargs:
- msg = "You're using Dimorphite-DL's run_with_mol_list(mol_lst, " + \
- "**kwargs) function, but you also passed the \"" + \
- bad_arg + "\" argument. Did you mean to use the " + \
- "run(**kwargs) function instead?"
+ msg = (
+ "You're using Dimorphite-DL's run_with_mol_list(mol_lst, "
+ + '**kwargs) function, but you also passed the "'
+ + bad_arg
+ + '" argument. Did you mean to use the '
+ + "run(**kwargs) function instead?"
+ )
print(msg)
raise Exception(msg)
@@ -1076,9 +1297,15 @@
m.SetProp(prop, str(val))
mols.append(m)
else:
- UtilFuncs.eprint("WARNING: Could not process molecule with SMILES string " + s + " and properties " + str(props))
+ UtilFuncs.eprint(
+ "WARNING: Could not process molecule with SMILES string "
+ + s
+ + " and properties "
+ + str(props)
+ )
return mols
+
if __name__ == "__main__":
main()
diff -r bbbf5fb356dd -r 67ee76f0e497 rdconf.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rdconf.py Sat Dec 04 16:40:23 2021 +0000
@@ -0,0 +1,229 @@
+#!/usr/bin/python3
+
+import gzip
+import os
+import sys
+from optparse import OptionParser
+
+from rdkit.Chem import AllChem as Chem
+
+"""
+This script was originally written by David Koes, University of Pittsburgh:
+https://github.com/dkoes/rdkit-scripts/blob/master/rdconf.py
+It is licensed under the MIT licence.
+
+Given a smiles file, generate 3D conformers in output sdf.
+Energy minimizes and filters conformers to meet energy window and rms constraints.
+
+Some time ago I compared this to alternative conformer generators and
+it was quite competitive (especially after RDKit's UFF implementation
+added OOP terms).
+"""
+
+
+# convert smiles to sdf
+def getRMS(mol, c1, c2):
+ rms = Chem.GetBestRMS(mol, mol, c1, c2)
+ return rms
+
+
+parser = OptionParser(usage="Usage: %prog [options] .smi