Previous changeset 0:06828e0cc8a7 (2019-10-16) Next changeset 2:40ff81f67f5e (2020-03-21) |
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06" |
added:
dimorphite_dl.py site_substructures.smarts test-data/mols.smi |
b |
diff -r 06828e0cc8a7 -r 193389d3a5f0 dimorphite_dl.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dimorphite_dl.py Tue Mar 10 12:57:48 2020 -0400 |
[ |
b'@@ -0,0 +1,1084 @@\n+# Copyright 2018 Jacob D. Durrant\n+#\n+# Licensed under the Apache License, Version 2.0 (the "License");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an "AS IS" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+"""\n+This script identifies and enumerates the possible protonation sites of SMILES\n+strings.\n+"""\n+\n+from __future__ import print_function\n+import copy\n+import os\n+import argparse\n+import sys\n+\n+try:\n+ # Python2\n+ from StringIO import StringIO\n+except ImportError:\n+ # Python3\n+ from io import StringIO\n+\n+# Always let the user know a help file is available.\n+print("\\nFor help, use: python dimorphite_dl.py --help")\n+\n+# And always report citation information.\n+print("\\nIf you use Dimorphite-DL in your research, please cite:")\n+print("Ropp PJ, Kaminsky JC, Yablonski S, Durrant JD (2019) Dimorphite-DL: An")\n+print("open-source program for enumerating the ionization states of drug-like small")\n+print("molecules. J Cheminform 11:14. doi:10.1186/s13321-019-0336-9.\\n")\n+\n+try:\n+ import rdkit\n+ from rdkit import Chem\n+ from rdkit.Chem import AllChem\n+except:\n+ msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/"\n+ print(msg)\n+ raise Exception(msg)\n+\n+def main(params=None):\n+ """The main definition run when you call the script from the commandline.\n+\n+ :param params: The parameters to use. Entirely optional. If absent,\n+ defaults to None, in which case argments will be taken from\n+ those given at the command line.\n+ :param params: dict, optional\n+ :return: Returns a list of the SMILES strings return_as_list parameter is\n+ True. Otherwise, returns None.\n+ """\n+\n+ parser = ArgParseFuncs.get_args()\n+ args = vars(parser.parse_args())\n+\n+ # Add in any parameters in params.\n+ if params is not None:\n+ for k, v in params.items():\n+ args[k] = v\n+\n+ # If being run from the command line, print out all parameters.\n+ if __name__ == "__main__":\n+ print("\\nPARAMETERS:\\n")\n+ for k in sorted(args.keys()):\n+ print(k.rjust(13) + ": " + str(args[k]))\n+ print("")\n+\n+ if args["test"]:\n+ # Run tests.\n+ TestFuncs.test()\n+ else:\n+ # Run protonation\n+ if "output_file" in args and args["output_file"] is not None:\n+ # An output file was specified, so write to that.\n+ with open(args["output_file"], "w") as file:\n+ for protonated_smi in Protonate(args):\n+ file.write(protonated_smi + "\\n")\n+ elif "return_as_list" in args and args["return_as_list"] == True:\n+ return list(Protonate(args))\n+ else:\n+ # No output file specified. Just print it to the screen.\n+ for protonated_smi in Protonate(args):\n+ print(protonated_smi)\n+\n+class MyParser(argparse.ArgumentParser):\n+ """Overwrite default parse so it displays help file on error. See\n+ https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu"""\n+\n+ def error(self, message):\n+ """Overwrites the default error message.\n+\n+ :param message: The default error message.\n+ """\n+\n+ self.print_help()\n+ msg = "ERROR: %s\\n\\n" % message\n+ print(msg)\n+ raise Exception(msg)\n+\n+ def print_help(self, file=None):\n+ """Overwrite the default print_help function\n+\n+ :param file: Output file, defaults to None\n+ """\n+\n+ print("")\n+\n+ if file is None:\n+ '..b' raise Exception(msg)\n+\n+ if (len(set([l[1] for l in output]) - set(labels)) != 0):\n+ msg = args["smiles"] + " not labeled as " + " AND ".join(labels) + \\\n+ "; it is " + " AND ".join([l[1] for l in output])\n+ print(msg)\n+ raise Exception(msg)\n+\n+ ph_range = sorted(list(set([args["min_ph"], args["max_ph"]])))\n+ ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")"\n+ print("(CORRECT) " + ph_range_str.ljust(10) + " " + args["smiles"] + " => " + " AND ".join([l[0] for l in output]))\n+\n+def run(**kwargs):\n+ """A helpful, importable function for those who want to call Dimorphite-DL\n+ from another Python script rather than the command line. Note that this\n+ function accepts keyword arguments that match the command-line parameters\n+ exactly. If you want to pass and return a list of RDKit Mol objects, import\n+ run_with_mol_list() instead.\n+\n+ :param **kwargs: For a complete description, run dimorphite_dl.py from the\n+ command line with the -h option.\n+ :type kwargs: dict\n+ """\n+\n+ # Run the main function with the specified arguments.\n+ main(kwargs)\n+\n+def run_with_mol_list(mol_lst, **kwargs):\n+ """A helpful, importable function for those who want to call Dimorphite-DL\n+ from another Python script rather than the command line. Note that this\n+ function is for passing Dimorphite-DL a list of RDKit Mol objects, together\n+ with command-line parameters. If you want to use only the same parameters\n+ that you would use from the command line, import run() instead.\n+\n+ :param mol_lst: A list of rdkit.Chem.rdchem.Mol objects.\n+ :type mol_lst: list\n+ :raises Exception: If the **kwargs includes "smiles", "smiles_file",\n+ "output_file", or "test" parameters.\n+ :return: A list of properly protonated rdkit.Chem.rdchem.Mol objects.\n+ :rtype: list\n+ """\n+\n+ # Do a quick check to make sure the user input makes sense.\n+ for bad_arg in ["smiles", "smiles_file", "output_file", "test"]:\n+ if bad_arg in kwargs:\n+ msg = "You\'re using Dimorphite-DL\'s run_with_mol_list(mol_lst, " + \\\n+ "**kwargs) function, but you also passed the \\"" + \\\n+ bad_arg + "\\" argument. Did you mean to use the " + \\\n+ "run(**kwargs) function instead?"\n+ print(msg)\n+ raise Exception(msg)\n+\n+ # Set the return_as_list flag so main() will return the protonated smiles\n+ # as a list.\n+ kwargs["return_as_list"] = True\n+\n+ # Having reviewed the code, it will be very difficult to rewrite it so\n+ # that a list of Mol objects can be used directly. Intead, convert this\n+ # list of mols to smiles and pass that. Not efficient, but it will work.\n+ protonated_smiles_and_props = []\n+ for m in mol_lst:\n+ props = m.GetPropsAsDict()\n+ kwargs["smiles"] = Chem.MolToSmiles(m, isomericSmiles=True)\n+ protonated_smiles_and_props.extend(\n+ [(s.split("\\t")[0], props) for s in main(kwargs)]\n+ )\n+\n+ # Now convert the list of protonated smiles strings back to RDKit Mol\n+ # objects. Also, add back in the properties from the original mol objects.\n+ mols = []\n+ for s, props in protonated_smiles_and_props:\n+ m = Chem.MolFromSmiles(s)\n+ if m:\n+ for prop, val in props.items():\n+ if type(val) is int:\n+ m.SetIntProp(prop, val)\n+ elif type(val) is float:\n+ m.SetDoubleProp(prop, val)\n+ elif type(val) is bool:\n+ m.SetBoolProp(prop, val)\n+ else:\n+ m.SetProp(prop, str(val))\n+ mols.append(m)\n+ else:\n+ UtilFuncs.eprint("WARNING: Could not process molecule with SMILES string " + s + " and properties " + str(props))\n+\n+ return mols\n+\n+if __name__ == "__main__":\n+ main()\n' |
b |
diff -r 06828e0cc8a7 -r 193389d3a5f0 site_substructures.smarts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/site_substructures.smarts Tue Mar 10 12:57:48 2020 -0400 |
[ |
@@ -0,0 +1,39 @@ +*Azide [N+0:1]=[N+:2]=[N+0:3]-[H] 2 4.65 0.07071067811865513 +Nitro [C,c,N,n,O,o:1]-[NX3:2](=[O:3])-[O:4]-[H] 3 -1000.0 0 +AmidineGuanidine1 [N:1]-[C:2](-[N:3])=[NX2:4]-[H:5] 3 12.025333333333334 1.5941046150769165 +AmidineGuanidine2 [C:1](-[N:2])=[NX2+0:3] 2 10.035538461538462 2.1312826469414716 +Sulfate [SX4:1](=[O:2])(=[O:3])([O:4]-[C,c,N,n:5])-[OX2:6]-[H] 5 -2.36 1.3048043093561141 +Sulfonate [SX4:1](=[O:2])(=[O:3])(-[C,c,N,n:4])-[OX2:5]-[H] 4 -1.8184615384615386 1.4086213481855594 +Sulfinic_acid [SX3:1](=[O:2])-[O:3]-[H] 2 1.7933333333333332 0.4372070447739835 +Phenyl_carboxyl [c,n,o:1]-[C:2](=[O:3])-[O:4]-[H] 3 3.463441968255319 1.2518054407928614 +Carboxyl [C:1](=[O:2])-[O:3]-[H] 2 3.456652971502591 1.2871420886834017 +Thioic_acid [C,c,N,n:1](=[O,S:2])-[SX2,OX2:3]-[H] 2 0.678267 1.497048763660801 +Phenyl_Thiol [c,n:1]-[SX2:2]-[H] 1 4.978235294117647 2.6137000480499806 +Thiol [C,N:1]-[SX2:2]-[H] 1 9.12448275862069 1.3317968158171463 +Phosphate [PX4:1](=[O:2])(-[OX2:3]-[H])(-[O+0:4])-[OX2:5]-[H] 2 2.4182608695652172 1.1091177991945305 5 6.5055 0.9512787792174668 +Phosphonate [PX4:1](=[O:2])(-[OX2:3]-[H])(-[C,c,N,n:4])-[OX2:5]-[H] 2 1.8835714285714287 0.5925999820080644 5 7.247254901960784 0.8511476450801531 +Phenol [c,n,o:1]-[O:2]-[H] 1 7.065359866910526 3.277356122295936 +Peroxide1 [O:1]([$(C=O),$(C[Cl]),$(CF),$(C[Br]),$(CC#N):2])-[O:3]-[H] 2 8.738888888888889 0.7562592839596507 +Peroxide2 [C:1]-[O:2]-[O:3]-[H] 2 11.978235294117647 0.8697645895163075 +O=C-C=C-OH [O:1]=[C;R:2]-[C;R:3]=[C;R:4]-[O:5]-[H] 4 3.554 0.803339458581667 +Vinyl_alcohol [C:1]=[C:2]-[O:3]-[H] 2 8.871850714285713 1.660200255394124 +Alcohol [C:1]-[O:2]-[H] 1 14.780384615384616 2.546464970533435 +N-hydroxyamide [C:1](=[O:2])-[N:3]-[O:4]-[H] 3 9.301904761904762 1.2181897185891002 +*Ringed_imide1 [O,S:1]=[C;R:2]([$([#8]),$([#7]),$([#16]),$([#6][Cl]),$([#6]F),$([#6][Br]):3])-[N;R:4]([C;R:5]=[O,S:6])-[H] 3 6.4525 0.5555627777308341 +*Ringed_imide2 [O,S:1]=[C;R:2]-[N;R:3]([C;R:4]=[O,S:5])-[H] 2 8.681666666666667 1.8657779975741713 +*Imide [F,Cl,Br,S,s,P,p:1][#6:2][CX3:3](=[O,S:4])-[NX3+0:5]([CX3:6]=[O,S:7])-[H] 4 2.466666666666667 1.4843629385474877 +*Imide2 [O,S:1]=[CX3:2]-[NX3+0:3]([CX3:4]=[O,S:5])-[H] 2 10.23 1.1198214143335534 +*Amide_electronegative [C:1](=[O:2])-[N:3](-[Br,Cl,I,F,S,O,N,P:4])-[H] 2 3.4896 2.688124315081677 +*Amide [C:1](=[O:2])-[N:3]-[H] 2 12.00611111111111 4.512491341218857 +*Sulfonamide [SX4:1](=[O:2])(=[O:3])-[NX3+0:4]-[H] 3 7.9160326086956525 1.9842121316708763 +Anilines_primary [c:1]-[NX3+0:2]([H:3])[H:4] 1 3.899298673194805 2.068768503987161 +Anilines_secondary [c:1]-[NX3+0:2]([H:3])[!H:4] 1 4.335408163265306 2.1768842022330843 +Anilines_tertiary [c:1]-[NX3+0:2]([!H:3])[!H:4] 1 4.16690685045614 2.005865735782679 +Aromatic_nitrogen_unprotonated [n+0&H0:1] 0 4.3535441240733945 2.0714072661859584 +Amines_primary_secondary_tertiary [C:1]-[NX3+0:2] 1 8.159107682388349 2.5183597445318147 +Phosphinic_acid [PX4:1](=[O:2])(-[C,c,N,n,F,Cl,Br,I:3])(-[C,c,N,n,F,Cl,Br,I:4])-[OX2:5]-[H] 4 2.9745 0.6867886750744557 +Phosphate_diester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[O+0:5]-[C,c,N,n,F,Cl,Br,I:4])-[OX2:6]-[H] 6 2.7280434782608696 2.5437448856908316 +Phosphonate_ester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[C,c,N,n,F,Cl,Br,I:5])-[OX2:6]-[H] 5 2.0868 0.4503028610465036 +Primary_hydroxyl_amine [C,c:1]-[O:2]-[NH2:3] 2 4.035714285714286 0.8463816543155368 +*Indole_pyrrole [c;R:1]1[c;R:2][c;R:3][c;R:4][n;R:5]1[H] 4 14.52875 4.06702491591416 +*Aromatic_nitrogen_protonated [n:1]-[H] 0 7.17 2.94602395490212 |
b |
diff -r 06828e0cc8a7 -r 193389d3a5f0 test-data/mols.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mols.smi Tue Mar 10 12:57:48 2020 -0400 |
[ |
@@ -0,0 +1,3 @@ +NCCCCC(N)C(=O)O lysine +O=C(O)C(N)C alanine +N[C@@H](CC1=CC=CC=C1)C(O)=O phenylanaline |