Repository 'ctb_rdkit_descriptors'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/ctb_rdkit_descriptors

Changeset 4:414edd9ea77d (2020-03-10)
Previous changeset 3:617d4555d8d3 (2019-10-16) Next changeset 5:1cf3bab54ddd (2020-03-21)
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
added:
dimorphite_dl.py
site_substructures.smarts
test-data/mols.smi
b
diff -r 617d4555d8d3 -r 414edd9ea77d dimorphite_dl.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dimorphite_dl.py Tue Mar 10 12:58:24 2020 -0400
[
b'@@ -0,0 +1,1084 @@\n+# Copyright 2018 Jacob D. Durrant\n+#\n+# Licensed under the Apache License, Version 2.0 (the "License");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+#     http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an "AS IS" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+"""\n+This script identifies and enumerates the possible protonation sites of SMILES\n+strings.\n+"""\n+\n+from __future__ import print_function\n+import copy\n+import os\n+import argparse\n+import sys\n+\n+try:\n+    # Python2\n+    from StringIO import StringIO\n+except ImportError:\n+    # Python3\n+    from io import StringIO\n+\n+# Always let the user know a help file is available.\n+print("\\nFor help, use: python dimorphite_dl.py --help")\n+\n+# And always report citation information.\n+print("\\nIf you use Dimorphite-DL in your research, please cite:")\n+print("Ropp PJ, Kaminsky JC, Yablonski S, Durrant JD (2019) Dimorphite-DL: An")\n+print("open-source program for enumerating the ionization states of drug-like small")\n+print("molecules. J Cheminform 11:14. doi:10.1186/s13321-019-0336-9.\\n")\n+\n+try:\n+    import rdkit\n+    from rdkit import Chem\n+    from rdkit.Chem import AllChem\n+except:\n+    msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/"\n+    print(msg)\n+    raise Exception(msg)\n+\n+def main(params=None):\n+    """The main definition run when you call the script from the commandline.\n+\n+    :param params: The parameters to use. Entirely optional. If absent,\n+                   defaults to None, in which case argments will be taken from\n+                   those given at the command line.\n+    :param params: dict, optional\n+    :return: Returns a list of the SMILES strings return_as_list parameter is\n+             True. Otherwise, returns None.\n+    """\n+\n+    parser = ArgParseFuncs.get_args()\n+    args = vars(parser.parse_args())\n+\n+    # Add in any parameters in params.\n+    if params is not None:\n+        for k, v in params.items():\n+            args[k] = v\n+\n+    # If being run from the command line, print out all parameters.\n+    if __name__ == "__main__":\n+        print("\\nPARAMETERS:\\n")\n+        for k in sorted(args.keys()):\n+            print(k.rjust(13) + ": " + str(args[k]))\n+        print("")\n+\n+    if args["test"]:\n+        # Run tests.\n+        TestFuncs.test()\n+    else:\n+        # Run protonation\n+        if "output_file" in args and args["output_file"] is not None:\n+            # An output file was specified, so write to that.\n+            with open(args["output_file"], "w") as file:\n+                for protonated_smi in Protonate(args):\n+                    file.write(protonated_smi + "\\n")\n+        elif "return_as_list" in args and args["return_as_list"] == True:\n+            return list(Protonate(args))\n+        else:\n+            # No output file specified. Just print it to the screen.\n+            for protonated_smi in Protonate(args):\n+                print(protonated_smi)\n+\n+class MyParser(argparse.ArgumentParser):\n+    """Overwrite default parse so it displays help file on error. See\n+    https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu"""\n+\n+    def error(self, message):\n+        """Overwrites the default error message.\n+\n+        :param message: The default error message.\n+        """\n+\n+        self.print_help()\n+        msg = "ERROR: %s\\n\\n" % message\n+        print(msg)\n+        raise Exception(msg)\n+\n+    def print_help(self, file=None):\n+        """Overwrite the default print_help function\n+\n+        :param file: Output file, defaults to None\n+        """\n+\n+        print("")\n+\n+        if file is None:\n+            '..b'          raise Exception(msg)\n+\n+        if (len(set([l[1] for l in output]) - set(labels)) != 0):\n+            msg = args["smiles"] + " not labeled as " + " AND ".join(labels) + \\\n+                "; it is " + " AND ".join([l[1] for l in output])\n+            print(msg)\n+            raise Exception(msg)\n+\n+        ph_range = sorted(list(set([args["min_ph"], args["max_ph"]])))\n+        ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")"\n+        print("(CORRECT) " + ph_range_str.ljust(10) + " " + args["smiles"] + " => " + " AND ".join([l[0] for l in output]))\n+\n+def run(**kwargs):\n+    """A helpful, importable function for those who want to call Dimorphite-DL\n+    from another Python script rather than the command line. Note that this\n+    function accepts keyword arguments that match the command-line parameters\n+    exactly. If you want to pass and return a list of RDKit Mol objects, import\n+    run_with_mol_list() instead.\n+\n+    :param **kwargs: For a complete description, run dimorphite_dl.py from the\n+        command line with the -h option.\n+    :type kwargs: dict\n+    """\n+\n+    # Run the main function with the specified arguments.\n+    main(kwargs)\n+\n+def run_with_mol_list(mol_lst, **kwargs):\n+    """A helpful, importable function for those who want to call Dimorphite-DL\n+    from another Python script rather than the command line. Note that this\n+    function is for passing Dimorphite-DL a list of RDKit Mol objects, together\n+    with command-line parameters. If you want to use only the same parameters\n+    that you would use from the command line, import run() instead.\n+\n+    :param mol_lst: A list of rdkit.Chem.rdchem.Mol objects.\n+    :type mol_lst: list\n+    :raises Exception: If the **kwargs includes "smiles", "smiles_file",\n+                       "output_file", or "test" parameters.\n+    :return: A list of properly protonated rdkit.Chem.rdchem.Mol objects.\n+    :rtype: list\n+    """\n+\n+    # Do a quick check to make sure the user input makes sense.\n+    for bad_arg in ["smiles", "smiles_file", "output_file", "test"]:\n+        if bad_arg in kwargs:\n+            msg = "You\'re using Dimorphite-DL\'s run_with_mol_list(mol_lst, " + \\\n+                   "**kwargs) function, but you also passed the \\"" + \\\n+                   bad_arg + "\\" argument. Did you mean to use the " + \\\n+                   "run(**kwargs) function instead?"\n+            print(msg)\n+            raise Exception(msg)\n+\n+    # Set the return_as_list flag so main() will return the protonated smiles\n+    # as a list.\n+    kwargs["return_as_list"] = True\n+\n+    # Having reviewed the code, it will be very difficult to rewrite it so\n+    # that a list of Mol objects can be used directly. Intead, convert this\n+    # list of mols to smiles and pass that. Not efficient, but it will work.\n+    protonated_smiles_and_props = []\n+    for m in mol_lst:\n+        props = m.GetPropsAsDict()\n+        kwargs["smiles"] = Chem.MolToSmiles(m, isomericSmiles=True)\n+        protonated_smiles_and_props.extend(\n+            [(s.split("\\t")[0], props) for s in main(kwargs)]\n+        )\n+\n+    # Now convert the list of protonated smiles strings back to RDKit Mol\n+    # objects. Also, add back in the properties from the original mol objects.\n+    mols = []\n+    for s, props in protonated_smiles_and_props:\n+        m = Chem.MolFromSmiles(s)\n+        if m:\n+            for prop, val in props.items():\n+                if type(val) is int:\n+                    m.SetIntProp(prop, val)\n+                elif type(val) is float:\n+                    m.SetDoubleProp(prop, val)\n+                elif type(val) is bool:\n+                    m.SetBoolProp(prop, val)\n+                else:\n+                    m.SetProp(prop, str(val))\n+            mols.append(m)\n+        else:\n+            UtilFuncs.eprint("WARNING: Could not process molecule with SMILES string " + s + " and properties " + str(props))\n+\n+    return mols\n+\n+if __name__ == "__main__":\n+    main()\n'
b
diff -r 617d4555d8d3 -r 414edd9ea77d site_substructures.smarts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/site_substructures.smarts Tue Mar 10 12:58:24 2020 -0400
[
@@ -0,0 +1,39 @@
+*Azide [N+0:1]=[N+:2]=[N+0:3]-[H] 2 4.65 0.07071067811865513
+Nitro [C,c,N,n,O,o:1]-[NX3:2](=[O:3])-[O:4]-[H] 3 -1000.0 0
+AmidineGuanidine1 [N:1]-[C:2](-[N:3])=[NX2:4]-[H:5] 3 12.025333333333334 1.5941046150769165
+AmidineGuanidine2 [C:1](-[N:2])=[NX2+0:3] 2 10.035538461538462 2.1312826469414716
+Sulfate [SX4:1](=[O:2])(=[O:3])([O:4]-[C,c,N,n:5])-[OX2:6]-[H] 5 -2.36 1.3048043093561141
+Sulfonate [SX4:1](=[O:2])(=[O:3])(-[C,c,N,n:4])-[OX2:5]-[H] 4 -1.8184615384615386 1.4086213481855594
+Sulfinic_acid [SX3:1](=[O:2])-[O:3]-[H] 2 1.7933333333333332 0.4372070447739835
+Phenyl_carboxyl [c,n,o:1]-[C:2](=[O:3])-[O:4]-[H] 3 3.463441968255319 1.2518054407928614
+Carboxyl [C:1](=[O:2])-[O:3]-[H] 2 3.456652971502591 1.2871420886834017
+Thioic_acid [C,c,N,n:1](=[O,S:2])-[SX2,OX2:3]-[H] 2 0.678267 1.497048763660801
+Phenyl_Thiol [c,n:1]-[SX2:2]-[H] 1 4.978235294117647 2.6137000480499806
+Thiol [C,N:1]-[SX2:2]-[H] 1 9.12448275862069 1.3317968158171463
+Phosphate [PX4:1](=[O:2])(-[OX2:3]-[H])(-[O+0:4])-[OX2:5]-[H] 2 2.4182608695652172 1.1091177991945305 5 6.5055 0.9512787792174668
+Phosphonate [PX4:1](=[O:2])(-[OX2:3]-[H])(-[C,c,N,n:4])-[OX2:5]-[H] 2 1.8835714285714287 0.5925999820080644 5 7.247254901960784 0.8511476450801531
+Phenol [c,n,o:1]-[O:2]-[H] 1 7.065359866910526 3.277356122295936
+Peroxide1 [O:1]([$(C=O),$(C[Cl]),$(CF),$(C[Br]),$(CC#N):2])-[O:3]-[H] 2 8.738888888888889 0.7562592839596507
+Peroxide2 [C:1]-[O:2]-[O:3]-[H] 2 11.978235294117647 0.8697645895163075
+O=C-C=C-OH [O:1]=[C;R:2]-[C;R:3]=[C;R:4]-[O:5]-[H] 4 3.554 0.803339458581667
+Vinyl_alcohol [C:1]=[C:2]-[O:3]-[H] 2 8.871850714285713 1.660200255394124
+Alcohol [C:1]-[O:2]-[H] 1 14.780384615384616 2.546464970533435
+N-hydroxyamide [C:1](=[O:2])-[N:3]-[O:4]-[H] 3 9.301904761904762 1.2181897185891002
+*Ringed_imide1 [O,S:1]=[C;R:2]([$([#8]),$([#7]),$([#16]),$([#6][Cl]),$([#6]F),$([#6][Br]):3])-[N;R:4]([C;R:5]=[O,S:6])-[H] 3 6.4525 0.5555627777308341
+*Ringed_imide2 [O,S:1]=[C;R:2]-[N;R:3]([C;R:4]=[O,S:5])-[H] 2 8.681666666666667 1.8657779975741713
+*Imide [F,Cl,Br,S,s,P,p:1][#6:2][CX3:3](=[O,S:4])-[NX3+0:5]([CX3:6]=[O,S:7])-[H] 4 2.466666666666667 1.4843629385474877
+*Imide2 [O,S:1]=[CX3:2]-[NX3+0:3]([CX3:4]=[O,S:5])-[H] 2 10.23 1.1198214143335534
+*Amide_electronegative [C:1](=[O:2])-[N:3](-[Br,Cl,I,F,S,O,N,P:4])-[H] 2 3.4896 2.688124315081677
+*Amide [C:1](=[O:2])-[N:3]-[H] 2 12.00611111111111 4.512491341218857
+*Sulfonamide [SX4:1](=[O:2])(=[O:3])-[NX3+0:4]-[H] 3 7.9160326086956525 1.9842121316708763
+Anilines_primary [c:1]-[NX3+0:2]([H:3])[H:4] 1 3.899298673194805 2.068768503987161
+Anilines_secondary [c:1]-[NX3+0:2]([H:3])[!H:4] 1 4.335408163265306 2.1768842022330843
+Anilines_tertiary [c:1]-[NX3+0:2]([!H:3])[!H:4] 1 4.16690685045614 2.005865735782679
+Aromatic_nitrogen_unprotonated [n+0&H0:1] 0 4.3535441240733945 2.0714072661859584
+Amines_primary_secondary_tertiary [C:1]-[NX3+0:2] 1 8.159107682388349 2.5183597445318147
+Phosphinic_acid [PX4:1](=[O:2])(-[C,c,N,n,F,Cl,Br,I:3])(-[C,c,N,n,F,Cl,Br,I:4])-[OX2:5]-[H] 4 2.9745 0.6867886750744557
+Phosphate_diester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[O+0:5]-[C,c,N,n,F,Cl,Br,I:4])-[OX2:6]-[H] 6 2.7280434782608696 2.5437448856908316
+Phosphonate_ester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[C,c,N,n,F,Cl,Br,I:5])-[OX2:6]-[H] 5 2.0868 0.4503028610465036
+Primary_hydroxyl_amine [C,c:1]-[O:2]-[NH2:3] 2 4.035714285714286 0.8463816543155368
+*Indole_pyrrole [c;R:1]1[c;R:2][c;R:3][c;R:4][n;R:5]1[H] 4 14.52875 4.06702491591416
+*Aromatic_nitrogen_protonated [n:1]-[H] 0 7.17 2.94602395490212
b
diff -r 617d4555d8d3 -r 414edd9ea77d test-data/mols.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mols.smi Tue Mar 10 12:58:24 2020 -0400
[
@@ -0,0 +1,3 @@
+NCCCCC(N)C(=O)O lysine
+O=C(O)C(N)C alanine
+N[C@@H](CC1=CC=CC=C1)C(O)=O phenylanaline