Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f" |
added:
dimorphite_dl.py rdconf.py rdconf.xml rdkit_descriptors.py sdf_to_tab.py site_substructures.smarts test-data/CID_3037.sdf test-data/CID_3037.tab test-data/ligand.sdf test-data/ligand.tab test-data/mol.pdb test-data/mol_pdb_charges.tab test-data/mols.smi test-data/rdconf_output.sdf test-data/rdkit_descriptors_result1.csv test-data/rdkit_descriptors_result1.tab test-data/rdkit_descriptors_subset.tab test-data/staurosporine.smi test-data/sucos_cluster.sdf |
b |
diff -r 000000000000 -r 5c501eb8d56c dimorphite_dl.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dimorphite_dl.py Sat Dec 04 16:39:31 2021 +0000 |
[ |
b'@@ -0,0 +1,1311 @@\n+# flake8: noqa\n+# Copyright 2018 Jacob D. Durrant\n+#\n+# Licensed under the Apache License, Version 2.0 (the "License");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an "AS IS" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+\n+"""\n+This script identifies and enumerates the possible protonation sites of SMILES\n+strings.\n+"""\n+\n+from __future__ import print_function\n+\n+import argparse\n+import os\n+import sys\n+\n+try:\n+ # Python2\n+ from StringIO import StringIO\n+except ImportError:\n+ # Python3\n+ from io import StringIO\n+\n+# Always let the user know a help file is available.\n+print("\\nFor help, use: python dimorphite_dl.py --help")\n+\n+# And always report citation information.\n+print("\\nIf you use Dimorphite-DL in your research, please cite:")\n+print("Ropp PJ, Kaminsky JC, Yablonski S, Durrant JD (2019) Dimorphite-DL: An")\n+print("open-source program for enumerating the ionization states of drug-like small")\n+print("molecules. J Cheminform 11:14. doi:10.1186/s13321-019-0336-9.\\n")\n+\n+try:\n+ import rdkit\n+ from rdkit import Chem\n+ from rdkit.Chem import AllChem\n+except Exception:\n+ msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/"\n+ print(msg)\n+ raise Exception(msg)\n+\n+\n+def main(params=None):\n+ """The main definition run when you call the script from the commandline.\n+\n+ :param params: The parameters to use. Entirely optional. If absent,\n+ defaults to None, in which case argments will be taken from\n+ those given at the command line.\n+ :param params: dict, optional\n+ :return: Returns a list of the SMILES strings return_as_list parameter is\n+ True. Otherwise, returns None.\n+ """\n+\n+ parser = ArgParseFuncs.get_args()\n+ args = vars(parser.parse_args())\n+\n+ # Add in any parameters in params.\n+ if params is not None:\n+ for k, v in params.items():\n+ args[k] = v\n+\n+ # If being run from the command line, print out all parameters.\n+ if __name__ == "__main__":\n+ print("\\nPARAMETERS:\\n")\n+ for k in sorted(args.keys()):\n+ print(k.rjust(13) + ": " + str(args[k]))\n+ print("")\n+\n+ if args["test"]:\n+ # Run tests.\n+ TestFuncs.test()\n+ else:\n+ # Run protonation\n+ if "output_file" in args and args["output_file"] is not None:\n+ # An output file was specified, so write to that.\n+ with open(args["output_file"], "w") as file:\n+ for protonated_smi in Protonate(args):\n+ file.write(protonated_smi + "\\n")\n+ elif "return_as_list" in args and args["return_as_list"]:\n+ return list(Protonate(args))\n+ else:\n+ # No output file specified. Just print it to the screen.\n+ for protonated_smi in Protonate(args):\n+ print(protonated_smi)\n+\n+\n+class MyParser(argparse.ArgumentParser):\n+ """Overwrite default parse so it displays help file on error. See\n+ https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu"""\n+\n+ def error(self, message):\n+ """Overwrites the default error message.\n+\n+ :param message: The default error message.\n+ """\n+\n+ self.print_help()\n+ msg = "ERROR: %s\\n\\n" % message\n+ print(msg)\n+ raise Exception(msg)\n+\n+ def print_help(self, file=None):\n+ """Overwrite the default print_help function\n+\n+ :param file: Output file, defaults to None\n+ """\n+\n+ print("")\n+\n+ if file is None:\n+ '..b'[1] for l in output])\n+ )\n+ print(msg)\n+ raise Exception(msg)\n+\n+ ph_range = sorted(list(set([args["min_ph"], args["max_ph"]])))\n+ ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")"\n+ print(\n+ "(CORRECT) "\n+ + ph_range_str.ljust(10)\n+ + " "\n+ + args["smiles"]\n+ + " => "\n+ + " AND ".join([l[0] for l in output])\n+ )\n+\n+\n+def run(**kwargs):\n+ """A helpful, importable function for those who want to call Dimorphite-DL\n+ from another Python script rather than the command line. Note that this\n+ function accepts keyword arguments that match the command-line parameters\n+ exactly. If you want to pass and return a list of RDKit Mol objects, import\n+ run_with_mol_list() instead.\n+\n+ :param **kwargs: For a complete description, run dimorphite_dl.py from the\n+ command line with the -h option.\n+ :type kwargs: dict\n+ """\n+\n+ # Run the main function with the specified arguments.\n+ main(kwargs)\n+\n+\n+def run_with_mol_list(mol_lst, **kwargs):\n+ """A helpful, importable function for those who want to call Dimorphite-DL\n+ from another Python script rather than the command line. Note that this\n+ function is for passing Dimorphite-DL a list of RDKit Mol objects, together\n+ with command-line parameters. If you want to use only the same parameters\n+ that you would use from the command line, import run() instead.\n+\n+ :param mol_lst: A list of rdkit.Chem.rdchem.Mol objects.\n+ :type mol_lst: list\n+ :raises Exception: If the **kwargs includes "smiles", "smiles_file",\n+ "output_file", or "test" parameters.\n+ :return: A list of properly protonated rdkit.Chem.rdchem.Mol objects.\n+ :rtype: list\n+ """\n+\n+ # Do a quick check to make sure the user input makes sense.\n+ for bad_arg in ["smiles", "smiles_file", "output_file", "test"]:\n+ if bad_arg in kwargs:\n+ msg = (\n+ "You\'re using Dimorphite-DL\'s run_with_mol_list(mol_lst, "\n+ + \'**kwargs) function, but you also passed the "\'\n+ + bad_arg\n+ + \'" argument. Did you mean to use the \'\n+ + "run(**kwargs) function instead?"\n+ )\n+ print(msg)\n+ raise Exception(msg)\n+\n+ # Set the return_as_list flag so main() will return the protonated smiles\n+ # as a list.\n+ kwargs["return_as_list"] = True\n+\n+ # Having reviewed the code, it will be very difficult to rewrite it so\n+ # that a list of Mol objects can be used directly. Intead, convert this\n+ # list of mols to smiles and pass that. Not efficient, but it will work.\n+ protonated_smiles_and_props = []\n+ for m in mol_lst:\n+ props = m.GetPropsAsDict()\n+ kwargs["smiles"] = Chem.MolToSmiles(m, isomericSmiles=True)\n+ protonated_smiles_and_props.extend(\n+ [(s.split("\\t")[0], props) for s in main(kwargs)]\n+ )\n+\n+ # Now convert the list of protonated smiles strings back to RDKit Mol\n+ # objects. Also, add back in the properties from the original mol objects.\n+ mols = []\n+ for s, props in protonated_smiles_and_props:\n+ m = Chem.MolFromSmiles(s)\n+ if m:\n+ for prop, val in props.items():\n+ if type(val) is int:\n+ m.SetIntProp(prop, val)\n+ elif type(val) is float:\n+ m.SetDoubleProp(prop, val)\n+ elif type(val) is bool:\n+ m.SetBoolProp(prop, val)\n+ else:\n+ m.SetProp(prop, str(val))\n+ mols.append(m)\n+ else:\n+ UtilFuncs.eprint(\n+ "WARNING: Could not process molecule with SMILES string "\n+ + s\n+ + " and properties "\n+ + str(props)\n+ )\n+\n+ return mols\n+\n+\n+if __name__ == "__main__":\n+ main()\n' |
b |
diff -r 000000000000 -r 5c501eb8d56c rdconf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rdconf.py Sat Dec 04 16:39:31 2021 +0000 |
[ |
@@ -0,0 +1,229 @@ +#!/usr/bin/python3 + +import gzip +import os +import sys +from optparse import OptionParser + +from rdkit.Chem import AllChem as Chem + +""" +This script was originally written by David Koes, University of Pittsburgh: +https://github.com/dkoes/rdkit-scripts/blob/master/rdconf.py +It is licensed under the MIT licence. + +Given a smiles file, generate 3D conformers in output sdf. +Energy minimizes and filters conformers to meet energy window and rms constraints. + +Some time ago I compared this to alternative conformer generators and +it was quite competitive (especially after RDKit's UFF implementation +added OOP terms). +""" + + +# convert smiles to sdf +def getRMS(mol, c1, c2): + rms = Chem.GetBestRMS(mol, mol, c1, c2) + return rms + + +parser = OptionParser(usage="Usage: %prog [options] <input>.smi <output>.sdf") +parser.add_option( + "--maxconfs", + dest="maxconfs", + action="store", + help="maximum number of conformers to generate per a molecule (default 20)", + default="20", + type="int", + metavar="CNT", +) +parser.add_option( + "--sample_multiplier", + dest="sample", + action="store", + help="sample N*maxconfs conformers and choose the maxconformers with lowest energy (default 1)", + default="1", + type="float", + metavar="N", +) +parser.add_option( + "--seed", + dest="seed", + action="store", + help="random seed (default 9162006)", + default="9162006", + type="int", + metavar="s", +) +parser.add_option( + "--rms_threshold", + dest="rms", + action="store", + help="filter based on rms (default 0.7)", + default="0.7", + type="float", + metavar="R", +) +parser.add_option( + "--energy_window", + dest="energy", + action="store", + help="filter based on energy difference with lowest energy conformer", + default="10", + type="float", + metavar="E", +) +parser.add_option( + "-v", + "--verbose", + dest="verbose", + action="store_true", + default=False, + help="verbose output", +) +parser.add_option( + "--mmff", + dest="mmff", + action="store_true", + default=False, + help="use MMFF forcefield instead of UFF", +) +parser.add_option( + "--nomin", + dest="nomin", + action="store_true", + default=False, + help="don't perform energy minimization (bad idea)", +) +parser.add_option( + "--etkdg", + dest="etkdg", + action="store_true", + default=False, + help="use new ETKDG knowledge-based method instead of distance geometry", +) + + +(options, args) = parser.parse_args() + +if len(args) < 2: + parser.error("Need input and output") + sys.exit(-1) + +input = args[0] +output = args[1] +smifile = open(input) +if options.verbose: + print("Generating a maximum of", options.maxconfs, "per a mol") + +if options.etkdg and not Chem.ETKDG: + print("ETKDB does not appear to be implemented. Please upgrade RDKit.") + sys.exit(1) + +split = os.path.splitext(output) +if split[1] == ".gz": + outf = gzip.open(output, "wt+") + output = split[0] # strip .gz +else: + outf = open(output, "w+") + + +if os.path.splitext(output)[1] == ".pdb": + sdwriter = Chem.PDBWriter(outf) +else: + sdwriter = Chem.SDWriter(outf) + +if sdwriter is None: + print("Could not open ".output) + sys.exit(-1) + +for line in smifile: + toks = line.split() + smi = toks[0] + name = " ".join(toks[1:]) + + pieces = smi.split(".") + if len(pieces) > 1: + smi = max(pieces, key=len) # take largest component by length + print("Taking largest component: %s\t%s" % (smi, name)) + + mol = Chem.MolFromSmiles(smi) + if mol is not None: + if options.verbose: + print(smi) + try: + Chem.SanitizeMol(mol) + mol = Chem.AddHs(mol) + mol.SetProp("_Name", name) + + if options.etkdg: + cids = Chem.EmbedMultipleConfs( + mol, int(options.sample * options.maxconfs), Chem.ETKDG() + ) + else: + cids = Chem.EmbedMultipleConfs( + mol, int(options.sample * options.maxconfs), randomSeed=options.seed + ) + if options.verbose: + print(len(cids), "conformers found") + cenergy = [] + for conf in cids: + # not passing confID only minimizes the first conformer + if options.nomin: + cenergy.append(conf) + elif options.mmff: + converged = Chem.MMFFOptimizeMolecule(mol, confId=conf) + mp = Chem.MMFFGetMoleculeProperties(mol) + cenergy.append( + Chem.MMFFGetMoleculeForceField( + mol, mp, confId=conf + ).CalcEnergy() + ) + else: + converged = not Chem.UFFOptimizeMolecule(mol, confId=conf) + cenergy.append( + Chem.UFFGetMoleculeForceField(mol, confId=conf).CalcEnergy() + ) + if options.verbose: + print("Convergence of conformer", conf, converged) + + mol = Chem.RemoveHs(mol) + sortedcids = sorted(cids, key=lambda cid: cenergy[cid]) + if len(sortedcids) > 0: + mine = cenergy[sortedcids[0]] + else: + mine = 0 + if options.rms == 0: + cnt = 0 + for conf in sortedcids: + if cnt >= options.maxconfs: + break + if (options.energy < 0) or cenergy[conf] - mine <= options.energy: + sdwriter.write(mol, conf) + cnt += 1 + else: + written = {} + for conf in sortedcids: + if len(written) >= options.maxconfs: + break + # check rmsd + passed = True + for seenconf in written.keys(): + rms = getRMS(mol, seenconf, conf) + if (rms < options.rms) or ( + options.energy > 0 and cenergy[conf] - mine > options.energy + ): + passed = False + break + if passed: + written[conf] = True + sdwriter.write(mol, conf) + except (KeyboardInterrupt, SystemExit): + raise + except Exception as e: + print("Exception", e) + else: + print("ERROR:", smi) + +sdwriter.close() +outf.close() |
b |
diff -r 000000000000 -r 5c501eb8d56c rdconf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rdconf.xml Sat Dec 04 16:39:31 2021 +0000 |
[ |
@@ -0,0 +1,99 @@ +<tool id="rdconf" name="RDConf: Low-energy ligand conformer search" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@"> + <description>using RDKit</description> + <macros> + <token name="@TOOL_VERSION@">2020.03.4</token> + <token name="@GALAXY_VERSION@">0</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">rdkit</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python '$__tool_directory__/rdconf.py' + '$infile' + --maxconfs '$CNT' + --sample_multiplier '$N' + --seed '$s' + --rms_threshold '$R' + --energy_window '$E' + $mmff + $nomin + $etkdg + '$outfile' + ]]></command> + <inputs> + <param name="infile" type="data" format="smi" label="Input file" help="Input file in SMILES format."/> + <param name="CNT" type="integer" value="20" label="Maximum number of conformers to generate per molecule"/> + <param name="N" type="integer" value="1" label="Sample an integer multiple of the maximum conformer value and choose the conformers with lowest energy"/> + <param name="s" type="integer" value="9162006" label="Random seed"/> + <param name="R" type="float" value="0.7" min="0" label="RMSD threshold" help="If a conformer is too similar to those already generated (below the RMSD threshold), it is discarded and regenerated."/> + <param name="E" type="float" value="10" label="Energy window threshold (kcal/mol)."/> + <param name="mmff" type="boolean" value="" truevalue="--mmff" falsevalue="" label="Use MMFF forcefield (default UFF)"/> + <param name="nomin" type="boolean" value="" truevalue="--nomin" falsevalue="" label="Skip minimization" help="Not recommended"/> + <param name="etkdg" type="boolean" value="" truevalue="--etkdg" falsevalue="" label="Use new ETKDG knowledge-based method (default distance geometry)"/> + </inputs> + + <outputs> + <data name="outfile" format="sdf" label="Output for ${tool.name}"/> + </outputs> + + <tests> + <test> + <param name="infile" value="staurosporine.smi" ftype="smi"/> + <param name="CNT" value="100"/> + <param name="N" value="1"/> + <param name="s" value="100"/> + <param name="R" value="0.7"/> + <param name="E" value="10"/> + <param name="mmff" value="--mmff" /> + <param name="nomin" value="--nomin" /> + <param name="etkdg" value="--etkdg" /> + <output name="outfile" ftype="sdf" file="rdconf_output.sdf"/> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What this tool does** + + +This tool generates low-energy conformers for a set of input molecules, using the chemistry toolkit RDKit. It is based on a script written by David Koes. + +----- + +.. class:: infomark + +**Input** + +| - Molecules in `SMI format`_ +| - A number of other parameters can be set; the most important include the number of conformers (default 20) to generate and the minimum RMSD difference (default 0.7) between them. + +.. _SMI format: https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system + +----- + +.. class:: infomark + +**Output** + +`SD-file`_ containing generated conformers. + +.. _SD-file: http://en.wikipedia.org/wiki/Chemical_table_file + +]]></help> + <citations> + <citation type="bibtex"> + @article{rdkit, + author = {Greg Landrum and others}, + title = {RDKit: Open-source cheminformatics}, + url ={http://www.rdkit.org} + }</citation> + <citation type="bibtex"> + @article{rdconf, + author = {David Koes}, + title = {RDConf: Low-energy ligand conformer search}, + url ={https://github.com/dkoes/rdkit-scripts} + }</citation> + </citations> +</tool> + |
b |
diff -r 000000000000 -r 5c501eb8d56c rdkit_descriptors.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rdkit_descriptors.py Sat Dec 04 16:39:31 2021 +0000 |
[ |
@@ -0,0 +1,121 @@ +#!/usr/bin/env python + +import argparse +import inspect +import sys + +from rdkit import Chem +from rdkit.Chem import Descriptors + + +def get_supplier(infile, format="smiles"): + """ + Returns a generator over a SMILES or InChI file. Every element is of RDKit + molecule and has its original string as _Name property. + """ + with open(infile) as handle: + for line in handle: + line = line.strip() + if format == "smiles": + mol = Chem.MolFromSmiles(line, sanitize=True) + elif format == "inchi": + mol = Chem.inchi.MolFromInchi( + line, + sanitize=True, + removeHs=True, + logLevel=None, + treatWarningAsError=False, + ) + if mol is None: + yield False + else: + mol.SetProp("_Name", line.split("\t")[0]) + yield mol + + +def get_rdkit_descriptor_functions(): + """ + Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) + """ + ret = [ + (name, f) + for name, f in inspect.getmembers(Descriptors) + if inspect.isfunction(f) and not name.startswith("_") + ] + # some which are not in the official Descriptors module we need to add manually + ret.extend([("FormalCharge", Chem.GetFormalCharge), ("SSSR", Chem.GetSSSR)]) + ret.sort() + return ret + + +def descriptors(mol, functions): + """ + Calculates the descriptors of a given molecule. + """ + for name, function in functions: + yield (name, function(mol)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--infile", required=True, help="Path to the input file.") + parser.add_argument("--iformat", help="Specify the input file format.") + + parser.add_argument( + "-o", + "--outfile", + type=argparse.FileType("w+"), + default=sys.stdout, + help="path to the result file, default is stdout", + ) + + parser.add_argument( + "-s", + "--select", + default=None, + help="select a subset of comma-separated descriptors to use", + ) + + parser.add_argument( + "--header", + dest="header", + action="store_true", + default=False, + help="Write header line.", + ) + + args = parser.parse_args() + + if args.iformat == "sdf": + supplier = Chem.SDMolSupplier(args.infile) + elif args.iformat == "smi": + supplier = get_supplier(args.infile, format="smiles") + elif args.iformat == "inchi": + supplier = get_supplier(args.infile, format="inchi") + elif args.iformat == "pdb": + supplier = [Chem.MolFromPDBFile(args.infile)] + elif args.iformat == "mol2": + supplier = [Chem.MolFromMol2File(args.infile)] + + functions = get_rdkit_descriptor_functions() + if args.select and args.select != "None": + selected = args.select.split(",") + functions = [(name, f) for name, f in functions if name in selected] + + if args.header: + args.outfile.write( + "%s\n" % "\t".join(["MoleculeID"] + [name for name, f in functions]) + ) + + for mol in supplier: + if not mol: + continue + descs = descriptors(mol, functions) + try: + molecule_id = mol.GetProp("_Name") + except KeyError: + molecule_id = Chem.MolToSmiles(mol) + args.outfile.write( + "%s\n" + % "\t".join([molecule_id] + [str(round(res, 6)) for name, res in descs]) + ) |
b |
diff -r 000000000000 -r 5c501eb8d56c sdf_to_tab.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sdf_to_tab.py Sat Dec 04 16:39:31 2021 +0000 |
[ |
@@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +import argparse + +import pandas as pd +from rdkit import Chem + + +def sdf_to_tab(vars): + mols = Chem.SDMolSupplier(vars.inp, sanitize=False) + df = pd.DataFrame() # for output + + for n in range(len(mols)): + if mols[n]: + d = mols[n].GetPropsAsDict() + # filter dict for desired props + if vars.props.strip() == "": # none specified, return all + d = { + prop: val + for (prop, val) in d.items() + if not any(x in str(val) for x in ["\n", "\t"]) + } # remove items containing newlines or tabs + else: + d = { + prop: val + for (prop, val) in d.items() + if prop in vars.props.replace(" ", "").split(",") + } # remove items not requested via CLI + if vars.name: + d["SDFMoleculeName"] = mols[n].GetProp("_Name") + if vars.smiles: + d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False) + d["Index"] = int(n) + + df = df.append(d, ignore_index=True) + else: + print("Molecule could not be read - skipped.") + + df = df.astype({"Index": int}).set_index("Index") + sorted_cols = sorted(df.columns.values.tolist()) + df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols) + + +def main(): + parser = argparse.ArgumentParser(description="Convert SDF to tabular") + parser.add_argument("--inp", "-i", help="The input file", required=True) + parser.add_argument("--out", "-o", help="The output file", required=True) + parser.add_argument( + "--props", + "-p", + help="Properties to filter (leave blank for all)", + required=True, + ) + parser.add_argument( + "--header", + "-t", + action="store_true", + help="Write property name as the first row.", + ) + parser.add_argument( + "--smiles", "-s", action="store_true", help="Include SMILES in output." + ) + parser.add_argument( + "--name", "-n", action="store_true", help="Include molecule name in output." + ) + sdf_to_tab(parser.parse_args()) + + +if __name__ == "__main__": + main() |
b |
diff -r 000000000000 -r 5c501eb8d56c site_substructures.smarts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/site_substructures.smarts Sat Dec 04 16:39:31 2021 +0000 |
[ |
@@ -0,0 +1,39 @@ +*Azide [N+0:1]=[N+:2]=[N+0:3]-[H] 2 4.65 0.07071067811865513 +Nitro [C,c,N,n,O,o:1]-[NX3:2](=[O:3])-[O:4]-[H] 3 -1000.0 0 +AmidineGuanidine1 [N:1]-[C:2](-[N:3])=[NX2:4]-[H:5] 3 12.025333333333334 1.5941046150769165 +AmidineGuanidine2 [C:1](-[N:2])=[NX2+0:3] 2 10.035538461538462 2.1312826469414716 +Sulfate [SX4:1](=[O:2])(=[O:3])([O:4]-[C,c,N,n:5])-[OX2:6]-[H] 5 -2.36 1.3048043093561141 +Sulfonate [SX4:1](=[O:2])(=[O:3])(-[C,c,N,n:4])-[OX2:5]-[H] 4 -1.8184615384615386 1.4086213481855594 +Sulfinic_acid [SX3:1](=[O:2])-[O:3]-[H] 2 1.7933333333333332 0.4372070447739835 +Phenyl_carboxyl [c,n,o:1]-[C:2](=[O:3])-[O:4]-[H] 3 3.463441968255319 1.2518054407928614 +Carboxyl [C:1](=[O:2])-[O:3]-[H] 2 3.456652971502591 1.2871420886834017 +Thioic_acid [C,c,N,n:1](=[O,S:2])-[SX2,OX2:3]-[H] 2 0.678267 1.497048763660801 +Phenyl_Thiol [c,n:1]-[SX2:2]-[H] 1 4.978235294117647 2.6137000480499806 +Thiol [C,N:1]-[SX2:2]-[H] 1 9.12448275862069 1.3317968158171463 +Phosphate [PX4:1](=[O:2])(-[OX2:3]-[H])(-[O+0:4])-[OX2:5]-[H] 2 2.4182608695652172 1.1091177991945305 5 6.5055 0.9512787792174668 +Phosphonate [PX4:1](=[O:2])(-[OX2:3]-[H])(-[C,c,N,n:4])-[OX2:5]-[H] 2 1.8835714285714287 0.5925999820080644 5 7.247254901960784 0.8511476450801531 +Phenol [c,n,o:1]-[O:2]-[H] 1 7.065359866910526 3.277356122295936 +Peroxide1 [O:1]([$(C=O),$(C[Cl]),$(CF),$(C[Br]),$(CC#N):2])-[O:3]-[H] 2 8.738888888888889 0.7562592839596507 +Peroxide2 [C:1]-[O:2]-[O:3]-[H] 2 11.978235294117647 0.8697645895163075 +O=C-C=C-OH [O:1]=[C;R:2]-[C;R:3]=[C;R:4]-[O:5]-[H] 4 3.554 0.803339458581667 +Vinyl_alcohol [C:1]=[C:2]-[O:3]-[H] 2 8.871850714285713 1.660200255394124 +Alcohol [C:1]-[O:2]-[H] 1 14.780384615384616 2.546464970533435 +N-hydroxyamide [C:1](=[O:2])-[N:3]-[O:4]-[H] 3 9.301904761904762 1.2181897185891002 +*Ringed_imide1 [O,S:1]=[C;R:2]([$([#8]),$([#7]),$([#16]),$([#6][Cl]),$([#6]F),$([#6][Br]):3])-[N;R:4]([C;R:5]=[O,S:6])-[H] 3 6.4525 0.5555627777308341 +*Ringed_imide2 [O,S:1]=[C;R:2]-[N;R:3]([C;R:4]=[O,S:5])-[H] 2 8.681666666666667 1.8657779975741713 +*Imide [F,Cl,Br,S,s,P,p:1][#6:2][CX3:3](=[O,S:4])-[NX3+0:5]([CX3:6]=[O,S:7])-[H] 4 2.466666666666667 1.4843629385474877 +*Imide2 [O,S:1]=[CX3:2]-[NX3+0:3]([CX3:4]=[O,S:5])-[H] 2 10.23 1.1198214143335534 +*Amide_electronegative [C:1](=[O:2])-[N:3](-[Br,Cl,I,F,S,O,N,P:4])-[H] 2 3.4896 2.688124315081677 +*Amide [C:1](=[O:2])-[N:3]-[H] 2 12.00611111111111 4.512491341218857 +*Sulfonamide [SX4:1](=[O:2])(=[O:3])-[NX3+0:4]-[H] 3 7.9160326086956525 1.9842121316708763 +Anilines_primary [c:1]-[NX3+0:2]([H:3])[H:4] 1 3.899298673194805 2.068768503987161 +Anilines_secondary [c:1]-[NX3+0:2]([H:3])[!H:4] 1 4.335408163265306 2.1768842022330843 +Anilines_tertiary [c:1]-[NX3+0:2]([!H:3])[!H:4] 1 4.16690685045614 2.005865735782679 +Aromatic_nitrogen_unprotonated [n+0&H0:1] 0 4.3535441240733945 2.0714072661859584 +Amines_primary_secondary_tertiary [C:1]-[NX3+0:2] 1 8.159107682388349 2.5183597445318147 +Phosphinic_acid [PX4:1](=[O:2])(-[C,c,N,n,F,Cl,Br,I:3])(-[C,c,N,n,F,Cl,Br,I:4])-[OX2:5]-[H] 4 2.9745 0.6867886750744557 +Phosphate_diester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[O+0:5]-[C,c,N,n,F,Cl,Br,I:4])-[OX2:6]-[H] 6 2.7280434782608696 2.5437448856908316 +Phosphonate_ester [PX4:1](=[O:2])(-[OX2:3]-[C,c,N,n,F,Cl,Br,I:4])(-[C,c,N,n,F,Cl,Br,I:5])-[OX2:6]-[H] 5 2.0868 0.4503028610465036 +Primary_hydroxyl_amine [C,c:1]-[O:2]-[NH2:3] 2 4.035714285714286 0.8463816543155368 +*Indole_pyrrole [c;R:1]1[c;R:2][c;R:3][c;R:4][n;R:5]1[H] 4 14.52875 4.06702491591416 +*Aromatic_nitrogen_protonated [n:1]-[H] 0 7.17 2.94602395490212 |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/CID_3037.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_3037.sdf Sat Dec 04 16:39:31 2021 +0000 |
b |
@@ -0,0 +1,220 @@ +3037 + -OEChem-08231108593D + + 27 28 0 0 0 0 0 0 0999 V2000 + -4.8550 1.3401 0.2120 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8529 -1.3406 0.2121 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1809 -2.1668 -0.3789 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1788 2.1664 -0.3787 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0011 -0.0002 1.4744 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2222 -0.2738 0.6597 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2377 0.2772 0.6480 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2586 -1.3462 -0.2316 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2565 1.3457 -0.2314 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3343 0.5568 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3322 -0.5574 0.7972 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4069 -1.5879 -0.9855 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4048 1.5875 -0.9852 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4827 0.3152 0.0433 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4807 -0.3156 0.0435 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.5190 -0.7571 -0.8481 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5170 0.7568 -0.8478 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.1548 0.8649 2.1342 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1601 -0.8435 2.1593 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3089 1.3938 1.4913 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3053 -1.3909 1.4943 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4415 -2.4213 -1.6818 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4469 2.4191 -1.6835 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.4070 -0.9574 -1.4422 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4050 0.9570 -1.4418 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2961 -2.2262 0.4641 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3872 2.8487 -1.0397 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 14 1 0 0 0 0 + 2 15 1 0 0 0 0 + 3 8 1 0 0 0 0 + 3 26 1 0 0 0 0 + 4 9 1 0 0 0 0 + 4 27 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 1 0 0 0 0 + 5 18 1 0 0 0 0 + 5 19 1 0 0 0 0 + 6 8 2 0 0 0 0 + 6 10 1 0 0 0 0 + 7 9 2 0 0 0 0 + 7 11 1 0 0 0 0 + 8 12 1 0 0 0 0 + 9 13 1 0 0 0 0 + 10 14 2 0 0 0 0 + 10 20 1 0 0 0 0 + 11 15 2 0 0 0 0 + 11 21 1 0 0 0 0 + 12 16 2 0 0 0 0 + 12 22 1 0 0 0 0 + 13 17 2 0 0 0 0 + 13 23 1 0 0 0 0 + 14 16 1 0 0 0 0 + 15 17 1 0 0 0 0 + 16 24 1 0 0 0 0 + 17 25 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +3037 + +> <PUBCHEM_CONFORMER_RMSD> +0.6 + +> <PUBCHEM_CONFORMER_DIVERSEORDER> +8 +10 +12 +1 +7 +5 +11 +3 +6 +9 +4 +2 + +> <PUBCHEM_MMFF94_PARTIAL_CHARGES> +25 +1 -0.18 +10 -0.15 +11 -0.15 +12 -0.15 +13 -0.15 +14 0.18 +15 0.18 +16 -0.15 +17 -0.15 +2 -0.18 +20 0.15 +21 0.15 +22 0.15 +23 0.15 +24 0.15 +25 0.15 +26 0.45 +27 0.45 +3 -0.53 +4 -0.53 +5 0.29 +6 -0.14 +7 -0.14 +8 0.08 +9 0.08 + +> <PUBCHEM_EFFECTIVE_ROTOR_COUNT> +2 + +> <PUBCHEM_PHARMACOPHORE_FEATURES> +4 +1 3 donor +1 4 donor +6 6 8 10 12 14 16 rings +6 7 9 11 13 15 17 rings + +> <PUBCHEM_HEAVY_ATOM_COUNT> +17 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +5 + +> <PUBCHEM_CONFORMER_ID> +00000BDD00000008 + +> <PUBCHEM_MMFF94_ENERGY> +44.6858 + +> <PUBCHEM_FEATURE_SELFOVERLAP> +20.297 + +> <PUBCHEM_SHAPE_FINGERPRINT> +10062212 137 18261117369936506423 +104564 63 17986963035811110412 +11458722 120 18339359768245870841 +11471102 22 5472872458301843344 +11578080 2 18190204380446433792 +116883 192 18265608969609498196 +12236239 1 18410856576819659107 +12592029 89 18338223951597366363 +13549 16 18410575084668353682 +13693222 15 6555421915516066822 +13764800 53 14189033175566991199 +14115302 16 18186237320680093898 +14341114 328 10087642619424135543 +14787075 74 9511159855286719151 +14993402 34 18410855451538227223 +15099037 51 18340768233908588503 +15207287 21 15719111361650760302 +15375358 24 15647053767618106914 +15775835 57 18272650117329930317 +16945 1 17906452130063974618 +17834072 14 15936410035134206066 +18186145 218 17132117918276567720 +19422 9 18271525295227750719 +20279233 1 15719389529571237654 +20645476 183 18339080393619327415 +23402539 116 18186809105365620101 +23402655 69 18342736308283284156 +23559900 14 17603590712323212176 +25 1 17561083592297532664 +26918003 58 6266902359448424189 +296302 2 15213020427345972082 +3082319 5 18338798905472319583 +34934 24 18341891845236497020 +633830 44 17703790310130762689 +74978 22 18266740181857992718 +7832392 63 18340206284835898173 +81228 2 15720767252053392762 +9981440 41 17403743242177431832 + +> <PUBCHEM_SHAPE_MULTIPOLES> +341.85 +8.38 +1.9 +1.1 +0.02 +0 +-1.15 +1.94 +-0.01 +0 +-0.39 +-4.15 +0.01 +0 + +> <PUBCHEM_SHAPE_SELFOVERLAP> +722.787 + +> <PUBCHEM_SHAPE_VOLUME> +193 + +> <PUBCHEM_COORDINATE_TYPE> +2 +5 +255 + +$$$$ + |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/CID_3037.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CID_3037.tab Sat Dec 04 16:39:31 2021 +0000 |
b |
@@ -0,0 +1,2 @@ +Index PUBCHEM_ATOM_DEF_STEREO_COUNT PUBCHEM_ATOM_UDEF_STEREO_COUNT PUBCHEM_BOND_DEF_STEREO_COUNT PUBCHEM_BOND_UDEF_STEREO_COUNT PUBCHEM_CACTVS_TAUTO_COUNT PUBCHEM_COMPONENT_COUNT PUBCHEM_COMPOUND_CID PUBCHEM_CONFORMER_ID PUBCHEM_CONFORMER_RMSD PUBCHEM_EFFECTIVE_ROTOR_COUNT PUBCHEM_FEATURE_SELFOVERLAP PUBCHEM_HEAVY_ATOM_COUNT PUBCHEM_ISOTOPIC_ATOM_COUNT PUBCHEM_MMFF94_ENERGY PUBCHEM_SHAPE_SELFOVERLAP PUBCHEM_SHAPE_VOLUME SDFMoleculeName +0 0.0 0.0 0.0 0.0 5.0 1.0 3037.0 00000BDD00000008 0.6 2.0 20.297 17.0 0.0 44.6858 722.787 193.0 3037 |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/ligand.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ligand.sdf Sat Dec 04 16:39:31 2021 +0000 |
b |
b"@@ -0,0 +1,612 @@\n+pose1\n+ OpenBabel09021916093D\n+\n+ 16 16 0 0 0 0 0 0 0 0999 V2000\n+ 30.7270 -43.7450 76.1540 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.2320 -43.8770 76.1820 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.9030 -44.5770 75.1900 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.2880 -44.7130 75.1870 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.9550 -45.4820 74.0800 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 35.0120 -44.1090 76.2270 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.3700 -43.3850 77.2450 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 35.1200 -42.7180 78.3740 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.9820 -43.2890 77.1920 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 36.4390 -44.2310 76.2340 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 36.8300 -44.9440 76.7890 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 37.2890 -43.4500 75.5420 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 37.0090 -42.3660 75.0480 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 38.6760 -43.9990 75.3480 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 38.7980 -44.7930 74.1950 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 38.6010 -46.1780 74.4680 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 1 2 1 0 0 0 0\n+ 2 9 2 0 0 0 0\n+ 3 2 1 0 0 0 0\n+ 4 3 2 0 0 0 0\n+ 4 6 1 0 0 0 0\n+ 5 4 1 0 0 0 0\n+ 6 10 1 0 0 0 0\n+ 6 7 2 0 0 0 0\n+ 7 8 1 0 0 0 0\n+ 9 7 1 0 0 0 0\n+ 10 11 1 0 0 0 0\n+ 12 10 1 0 0 0 0\n+ 13 12 2 0 0 0 0\n+ 14 12 1 0 0 0 0\n+ 15 16 1 0 0 0 0\n+ 15 14 1 0 0 0 0\n+M END\n+> <MODEL>\n+1\n+\n+> <REMARK>\n+ VINA RESULT: -4.9 0.000 0.000\n+ Name = \n+ 7 active torsions:\n+ status: ('A' for Active; 'I' for Inactive)\n+ 1 A between atoms: C_1 and C_5\n+ 2 A between atoms: C_2 and C_12\n+ 3 A between atoms: C_2 and O_15\n+ 4 A between atoms: C_3 and O_15\n+ 5 A between atoms: C_4 and C_7\n+ 6 A between atoms: C_8 and N_13\n+ 7 A between atoms: C_9 and C_11\n+ x y z vdW Elec q Type\n+ _______ _______ _______ _____ _____ ______ ____\n+\n+> <TORSDO>\n+F 3\n+\n+> <SCORE>\n+-4.9\n+\n+> <RMSD_LB>\n+0.000\n+\n+> <RMSD_UB>\n+0.000\n+\n+$$$$\n+pose2\n+ OpenBabel09021916093D\n+\n+ 16 16 0 0 0 0 0 0 0 0999 V2000\n+ 30.7330 -43.6060 76.3350 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.2340 -43.7750 76.2880 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 33.0410 -43.2270 77.2740 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.4260 -43.3690 77.2600 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 35.2450 -42.7520 78.3610 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 35.0040 -44.0990 76.2100 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.2200 -44.6770 75.1980 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.8090 -45.4740 74.0590 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.8400 -44.4960 75.2690 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 36.4270 -44.2630 76.1860 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 36.8070 -45.0000 76.7160 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 37.2860 -43.4900 75.4970 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 37.0290 -42.3860 75.0350 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 38.6520 -44.0730 75.2620 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 38.7120 -44.8940 74.1230 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 38.5590 -46.2750 74.4420 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2 1 1 0 0 0 0\n+ 2 3 2 0 0 0 0\n+ 4 3 1 0 0 0 0\n+ 4 5 1 0 0 0 0\n+ 6 4 2 0 0 0 0\n+ 7 9 2 0 0 0 0\n+ 7 6 1 0 0 0 0\n+ 8 7 1 0 0 0 0\n+ 9 2 1 0 0 0 0\n+ 10 6 1 0 0 0 0\n+ 10 11 1 0 0 0 0\n+ 12 10 1 0 0 0 0\n+ 13 12 2 0 0 0 0\n+ 14 12 1 0 0 0 0\n+ 15 16 1 0 0 0 0\n+ 15 14 1 0 0 0 0\n+M END\n+> <MODEL>\n+2\n+\n+> <REMARK>\n+ VINA RESULT: -4.9 0.118 2.246\n+ Name = \n+ 7 active torsio"..b"330 -42.9960 74.1500 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 29.4010 -44.8100 72.4170 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.0440 -43.8770 74.8450 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.6940 -43.1840 74.5860 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 31.8900 -44.1170 76.1600 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 31.3600 -45.1120 76.6390 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.3840 -43.0420 77.0880 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.0560 -41.7460 76.6550 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.0630 -40.8000 77.7210 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 1 2 1 0 0 0 0\n+ 2 3 2 0 0 0 0\n+ 2 9 1 0 0 0 0\n+ 3 4 1 0 0 0 0\n+ 4 5 1 0 0 0 0\n+ 4 6 2 0 0 0 0\n+ 6 10 1 0 0 0 0\n+ 7 6 1 0 0 0 0\n+ 7 8 1 0 0 0 0\n+ 9 7 2 0 0 0 0\n+ 10 12 1 0 0 0 0\n+ 11 10 1 0 0 0 0\n+ 12 13 2 0 0 0 0\n+ 12 14 1 0 0 0 0\n+ 15 14 1 0 0 0 0\n+ 15 16 1 0 0 0 0\n+M END\n+> <MODEL>\n+8\n+\n+> <REMARK>\n+ VINA RESULT: -4.3 3.964 5.892\n+ Name = \n+ 7 active torsions:\n+ status: ('A' for Active; 'I' for Inactive)\n+ 1 A between atoms: C_1 and C_5\n+ 2 A between atoms: C_2 and C_12\n+ 3 A between atoms: C_2 and O_15\n+ 4 A between atoms: C_3 and O_15\n+ 5 A between atoms: C_4 and C_7\n+ 6 A between atoms: C_8 and N_13\n+ 7 A between atoms: C_9 and C_11\n+ x y z vdW Elec q Type\n+ _______ _______ _______ _____ _____ ______ ____\n+\n+> <TORSDO>\n+F 3\n+\n+> <SCORE>\n+-4.3\n+\n+> <RMSD_LB>\n+3.964\n+\n+> <RMSD_UB>\n+5.892\n+\n+$$$$\n+pose9\n+ OpenBabel09021916093D\n+\n+ 16 16 0 0 0 0 0 0 0 0999 V2000\n+ 36.2810 -45.5880 74.7420 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.7970 -45.3090 74.6830 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.0060 -45.8780 73.6960 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.6360 -45.6430 73.6170 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 31.8340 -46.2940 72.5230 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.0540 -44.8050 74.5810 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.8200 -44.2170 75.6010 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 32.2240 -43.3170 76.6570 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 34.1850 -44.4900 75.6220 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 30.6450 -44.5600 74.5200 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 30.0630 -45.1230 75.0810 H 0 0 0 0 0 0 0 0 0 0 0 0\n+ 30.0510 -43.6230 73.7590 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 30.5710 -43.0480 72.8120 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 28.6490 -43.2470 74.1530 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 27.7000 -44.2290 73.8180 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 27.2950 -44.1460 72.4540 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2 1 1 0 0 0 0\n+ 2 9 2 0 0 0 0\n+ 3 2 1 0 0 0 0\n+ 4 3 2 0 0 0 0\n+ 4 6 1 0 0 0 0\n+ 5 4 1 0 0 0 0\n+ 6 7 2 0 0 0 0\n+ 7 9 1 0 0 0 0\n+ 7 8 1 0 0 0 0\n+ 10 6 1 0 0 0 0\n+ 10 11 1 0 0 0 0\n+ 12 14 1 0 0 0 0\n+ 12 10 1 0 0 0 0\n+ 13 12 2 0 0 0 0\n+ 15 14 1 0 0 0 0\n+ 16 15 1 0 0 0 0\n+M END\n+> <MODEL>\n+9\n+\n+> <REMARK>\n+ VINA RESULT: -4.3 3.971 6.363\n+ Name = \n+ 7 active torsions:\n+ status: ('A' for Active; 'I' for Inactive)\n+ 1 A between atoms: C_1 and C_5\n+ 2 A between atoms: C_2 and C_12\n+ 3 A between atoms: C_2 and O_15\n+ 4 A between atoms: C_3 and O_15\n+ 5 A between atoms: C_4 and C_7\n+ 6 A between atoms: C_8 and N_13\n+ 7 A between atoms: C_9 and C_11\n+ x y z vdW Elec q Type\n+ _______ _______ _______ _____ _____ ______ ____\n+\n+> <TORSDO>\n+F 3\n+\n+> <SCORE>\n+-4.3\n+\n+> <RMSD_LB>\n+3.971\n+\n+> <RMSD_UB>\n+6.363\n+\n+$$$$\n" |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/ligand.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ligand.tab Sat Dec 04 16:39:31 2021 +0000 |
[ |
@@ -0,0 +1,10 @@ +Index RMSD_LB RMSD_UB SCORE SMILES +0 0.0 0.0 -4.9 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C +1 0.118 2.246 -4.9 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C +2 2.96 5.795 -4.9 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C +3 2.958 5.379 -4.8 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C +4 2.763 5.379 -4.5 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C +5 3.106 4.85 -4.4 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C +6 2.847 5.816 -4.4 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C +7 3.964 5.892 -4.3 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C +8 3.971 6.363 -4.3 [H]N(C(=O)COC)C1=C(C)C=C(C)C=C1C |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/mol.pdb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mol.pdb Sat Dec 04 16:39:31 2021 +0000 |
b |
@@ -0,0 +1,72 @@ +COMPND CNCC(O)CCCc1ccccc1 +AUTHOR GENERATED BY OPEN BABEL 3.1.0 +HETATM 1 C UNL 1 9.206 6.617 23.375 1.00 0.00 C +HETATM 2 N UNL 1 9.288 5.239 22.843 1.00 0.00 N1+ +HETATM 3 C UNL 1 9.901 4.245 23.787 1.00 0.00 C +HETATM 4 C UNL 1 9.552 2.837 23.275 1.00 0.00 C +HETATM 5 O UNL 1 10.280 2.666 22.057 1.00 0.00 O +HETATM 6 C UNL 1 9.877 1.780 24.331 1.00 0.00 C +HETATM 7 C UNL 1 9.398 0.387 23.922 1.00 0.00 C +HETATM 8 C UNL 1 10.424 -0.687 24.293 1.00 0.00 C +HETATM 9 C UNL 1 11.616 -0.605 23.380 1.00 0.00 C +HETATM 10 C UNL 1 11.773 -1.516 22.327 1.00 0.00 C +HETATM 11 C UNL 1 12.918 -1.487 21.533 1.00 0.00 C +HETATM 12 C UNL 1 13.916 -0.552 21.786 1.00 0.00 C +HETATM 13 C UNL 1 13.767 0.367 22.824 1.00 0.00 C +HETATM 14 C UNL 1 12.623 0.342 23.620 1.00 0.00 C +HETATM 15 H UNL 1 8.759 7.256 22.643 1.00 0.00 H +HETATM 16 H UNL 1 10.189 6.970 23.605 1.00 0.00 H +HETATM 17 H UNL 1 8.609 6.620 24.264 1.00 0.00 H +HETATM 18 H UNL 1 9.849 5.259 21.991 1.00 0.00 H +HETATM 19 H UNL 1 8.329 4.932 22.679 1.00 0.00 H +HETATM 20 H UNL 1 9.504 4.384 24.771 1.00 0.00 H +HETATM 21 H UNL 1 10.962 4.375 23.832 1.00 0.00 H +HETATM 22 H UNL 1 8.505 2.722 23.087 1.00 0.00 H +HETATM 23 H UNL 1 11.228 2.771 22.229 1.00 0.00 H +HETATM 24 H UNL 1 9.401 2.052 25.249 1.00 0.00 H +HETATM 25 H UNL 1 10.941 1.741 24.440 1.00 0.00 H +HETATM 26 H UNL 1 9.242 0.370 22.864 1.00 0.00 H +HETATM 27 H UNL 1 8.487 0.178 24.443 1.00 0.00 H +HETATM 28 H UNL 1 9.974 -1.653 24.199 1.00 0.00 H +HETATM 29 H UNL 1 10.746 -0.530 25.301 1.00 0.00 H +HETATM 30 H UNL 1 11.037 -2.214 22.138 1.00 0.00 H +HETATM 31 H UNL 1 13.025 -2.159 20.758 1.00 0.00 H +HETATM 32 H UNL 1 14.769 -0.538 21.204 1.00 0.00 H +HETATM 33 H UNL 1 14.504 1.066 23.003 1.00 0.00 H +HETATM 34 H UNL 1 12.517 1.022 24.389 1.00 0.00 H +CONECT 1 2 15 16 17 +CONECT 2 1 3 18 19 +CONECT 3 2 4 20 21 +CONECT 4 3 5 6 22 +CONECT 5 4 23 +CONECT 6 4 7 24 25 +CONECT 7 6 8 26 27 +CONECT 8 7 9 28 29 +CONECT 9 8 10 10 14 +CONECT 10 9 9 11 30 +CONECT 11 10 12 12 31 +CONECT 12 11 11 13 32 +CONECT 13 12 14 14 33 +CONECT 14 9 13 13 34 +CONECT 15 1 +CONECT 16 1 +CONECT 17 1 +CONECT 18 2 +CONECT 19 2 +CONECT 20 3 +CONECT 21 3 +CONECT 22 4 +CONECT 23 5 +CONECT 24 6 +CONECT 25 6 +CONECT 26 7 +CONECT 27 7 +CONECT 28 8 +CONECT 29 8 +CONECT 30 10 +CONECT 31 11 +CONECT 32 12 +CONECT 33 13 +CONECT 34 14 +MASTER 0 0 0 0 0 0 0 0 34 0 34 0 +END |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/mol_pdb_charges.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mol_pdb_charges.tab Sat Dec 04 16:39:31 2021 +0000 |
b |
@@ -0,0 +1,1 @@ +CNCC(O)CCCc1ccccc1 1 |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/mols.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mols.smi Sat Dec 04 16:39:31 2021 +0000 |
[ |
@@ -0,0 +1,3 @@ +NCCCCC(N)C(=O)O lysine +O=C(O)C(N)C alanine +N[C@@H](CC1=CC=CC=C1)C(O)=O phenylanaline |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/rdconf_output.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rdconf_output.sdf Sat Dec 04 16:39:31 2021 +0000 |
b |
@@ -0,0 +1,166 @@ +staurosporine + RDKit 3D + + 35 42 0 0 0 0 0 0 0 0999 V2000 + -2.1656 1.4438 -2.0402 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.5064 0.5224 -1.0006 C 0 0 1 0 0 0 0 0 0 0 0 0 + -2.5333 0.2902 0.0771 C 0 0 1 0 0 0 0 0 0 0 0 0 + -3.0448 -1.1355 -0.1222 C 0 0 1 0 0 0 0 0 0 0 0 0 + -1.8499 -2.0325 0.1086 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9248 -1.7530 -1.0664 C 0 0 1 0 0 0 0 0 0 0 0 0 + -1.2333 -0.6270 -1.7456 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4794 -1.8256 -0.6986 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2443 -2.8970 -0.6134 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9172 -4.2121 -0.8557 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8835 -5.2113 -0.7023 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1496 -4.8403 -0.3079 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4981 -3.5086 -0.0573 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5336 -2.5530 -0.2153 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5282 -1.1902 -0.0555 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4701 -0.2506 0.3363 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1276 1.0747 0.4177 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8462 1.4665 0.1077 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2026 2.7102 0.1107 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7041 3.9456 0.4421 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8338 5.0265 0.3768 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4727 4.8205 -0.0097 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9490 3.5396 -0.3412 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0910 2.4739 -0.2786 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.2989 1.1681 -0.5288 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9108 0.5646 -0.2791 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2388 -0.7709 -0.3642 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2788 1.9083 0.8642 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.3369 0.9521 1.0170 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8631 -0.3694 0.7084 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6279 -1.3464 0.7907 O 0 0 0 0 0 0 0 0 0 0 0 0 + -4.1928 -1.4531 0.6537 N 0 0 0 0 0 0 0 0 0 0 0 0 + -5.2852 -0.5939 0.2385 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.8857 0.2845 1.3121 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4801 1.0939 2.2570 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 6 + 2 3 1 0 + 3 4 1 0 + 4 5 1 0 + 6 5 1 1 + 6 7 1 0 + 6 8 1 0 + 8 9 1 0 + 9 10 2 0 + 10 11 1 0 + 11 12 2 0 + 12 13 1 0 + 13 14 2 0 + 14 15 1 0 + 15 16 2 0 + 16 17 1 0 + 17 18 2 0 + 18 19 1 0 + 19 20 2 0 + 20 21 1 0 + 21 22 2 0 + 22 23 1 0 + 23 24 2 0 + 24 25 1 0 + 25 26 1 0 + 26 27 2 0 + 17 28 1 0 + 28 29 1 0 + 29 30 1 0 + 30 31 2 0 + 4 32 1 1 + 32 33 1 0 + 3 34 1 1 + 34 35 1 0 + 7 2 1 0 + 25 2 1 0 + 27 8 1 0 + 14 9 1 0 + 27 15 1 0 + 30 16 1 0 + 26 18 1 0 + 24 19 1 0 +M END +$$$$ +staurosporine + RDKit 3D + + 35 42 0 0 0 0 0 0 0 0999 V2000 + -2.3068 0.9355 -2.4621 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6484 0.1936 -1.2955 C 0 0 1 0 0 0 0 0 0 0 0 0 + -2.6628 -0.4491 -0.4739 C 0 0 1 0 0 0 0 0 0 0 0 0 + -2.1102 -1.3841 0.5829 C 0 0 1 0 0 0 0 0 0 0 0 0 + -1.3580 -2.4343 -0.1341 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6301 -1.9615 -1.3703 C 0 0 1 0 0 0 0 0 0 0 0 0 + -1.0034 -0.8456 -1.9685 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7323 -1.7671 -0.8854 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6653 -2.7175 -0.8097 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6030 -4.0416 -1.1929 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7336 -4.8209 -0.9993 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8672 -4.2655 -0.4408 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9097 -2.9234 -0.0599 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7941 -2.1557 -0.2513 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5524 -0.8186 0.0257 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.2591 0.2074 0.5765 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6138 1.4431 0.6965 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3053 1.6277 0.2726 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4639 2.7123 0.2701 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6421 4.0028 0.7116 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3869 4.9173 0.5986 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.5824 4.5165 0.0416 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7175 3.2124 -0.3878 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7245 2.2626 -0.2991 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6365 0.9796 -0.6340 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6078 0.6133 -0.2727 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2251 -0.6244 -0.3999 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5496 2.4028 1.3130 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.7624 1.6652 1.5409 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5952 0.3088 1.0910 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.5219 -0.5061 1.1939 O 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3679 -0.6619 1.5643 N 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2073 0.2673 2.2885 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.6735 0.2555 0.0846 O 0 0 0 0 0 0 0 0 0 0 0 0 + -4.9519 -0.0773 -0.2685 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 6 + 2 3 1 0 + 3 4 1 0 + 4 5 1 0 + 6 5 1 1 + 6 7 1 0 + 6 8 1 0 + 8 9 1 0 + 9 10 2 0 + 10 11 1 0 + 11 12 2 0 + 12 13 1 0 + 13 14 2 0 + 14 15 1 0 + 15 16 2 0 + 16 17 1 0 + 17 18 2 0 + 18 19 1 0 + 19 20 2 0 + 20 21 1 0 + 21 22 2 0 + 22 23 1 0 + 23 24 2 0 + 24 25 1 0 + 25 26 1 0 + 26 27 2 0 + 17 28 1 0 + 28 29 1 0 + 29 30 1 0 + 30 31 2 0 + 4 32 1 1 + 32 33 1 0 + 3 34 1 1 + 34 35 1 0 + 7 2 1 0 + 25 2 1 0 + 27 8 1 0 + 14 9 1 0 + 27 15 1 0 + 30 16 1 0 + 26 18 1 0 + 24 19 1 0 +M END +$$$$ |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/rdkit_descriptors_result1.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rdkit_descriptors_result1.csv Sat Dec 04 16:39:31 2021 +0000 |
b |
@@ -0,0 +1,2 @@ +BalabanJ BertzCT Chi0 Chi0n Chi0v Chi1 Chi1n Chi1v Chi2n Chi2v Chi3n Chi3v Chi4n Chi4v EState_VSA1 EState_VSA10 EState_VSA11 EState_VSA2 EState_VSA3 EState_VSA4 EState_VSA5 EState_VSA6 EState_VSA7 EState_VSA8 EState_VSA9 ExactMolWt FpDensityMorgan1 FpDensityMorgan2 FpDensityMorgan3 FractionCSP3 HallKierAlpha HeavyAtomCount HeavyAtomMolWt Ipc Kappa1 Kappa2 Kappa3 LabuteASA MaxAbsEStateIndex MaxAbsPartialCharge MaxEStateIndex MaxPartialCharge MinAbsEStateIndex MinAbsPartialCharge MinEStateIndex MinPartialCharge MolLogP MolMR MolWt NHOHCount NOCount NumAliphaticCarbocycles NumAliphaticHeterocycles NumAliphaticRings NumAromaticCarbocycles NumAromaticHeterocycles NumAromaticRings NumHAcceptors NumHDonors NumHeteroatoms NumRadicalElectrons NumRotatableBonds NumSaturatedCarbocycles NumSaturatedHeterocycles NumSaturatedRings NumValenceElectrons PEOE_VSA1 PEOE_VSA10 PEOE_VSA11 PEOE_VSA12 PEOE_VSA13 PEOE_VSA14 PEOE_VSA2 PEOE_VSA3 PEOE_VSA4 PEOE_VSA5 PEOE_VSA6 PEOE_VSA7 PEOE_VSA8 PEOE_VSA9 RingCount SMR_VSA1 SMR_VSA10 SMR_VSA2 SMR_VSA3 SMR_VSA4 SMR_VSA5 SMR_VSA6 SMR_VSA7 SMR_VSA8 SMR_VSA9 SlogP_VSA1 SlogP_VSA10 SlogP_VSA11 SlogP_VSA12 SlogP_VSA2 SlogP_VSA3 SlogP_VSA4 SlogP_VSA5 SlogP_VSA6 SlogP_VSA7 SlogP_VSA8 SlogP_VSA9 TPSA VSA_EState1 VSA_EState10 VSA_EState2 VSA_EState3 VSA_EState4 VSA_EState5 VSA_EState6 VSA_EState7 VSA_EState8 VSA_EState9 fr_Al_COO fr_Al_OH fr_Al_OH_noTert fr_ArN fr_Ar_COO fr_Ar_N fr_Ar_NH fr_Ar_OH fr_COO fr_COO2 fr_C_O fr_C_O_noCOO fr_C_S fr_HOCCN fr_Imine fr_NH0 fr_NH1 fr_NH2 fr_N_O fr_Ndealkylation1 fr_Ndealkylation2 fr_Nhpyrrole fr_SH fr_aldehyde fr_alkyl_carbamate fr_alkyl_halide fr_allylic_oxid fr_amide fr_amidine fr_aniline fr_aryl_methyl fr_azide fr_azo fr_barbitur fr_benzene fr_benzodiazepine fr_bicyclic fr_diazo fr_dihydropyridine fr_epoxide fr_ester fr_ether fr_furan fr_guanido fr_halogen fr_hdrzine fr_hdrzone fr_imidazole fr_imide fr_isocyan fr_isothiocyan fr_ketone fr_ketone_Topliss fr_lactam fr_lactone fr_methoxy fr_morpholine fr_nitrile fr_nitro fr_nitro_arom fr_nitro_arom_nonortho fr_nitroso fr_oxazole fr_oxime fr_para_hydroxylation fr_phenol fr_phenol_noOrthoHbond fr_phos_acid fr_phos_ester fr_piperdine fr_piperzine fr_priamide fr_prisulfonamd fr_pyridine fr_quatN fr_sulfide fr_sulfonamd fr_sulfone fr_term_acetylene fr_tetrazole fr_thiazole fr_thiocyan fr_thiophene fr_unbrch_alkane fr_urea qed +3037 2.370227579270102 503.6108804181844 12.413849083443592 8.821564533342674 10.333422425379583 8.0585506480638 5.008352593120903 5.764281539139358 3.7228452481073373 4.595716809051308 2.463985083856104 2.934179289431582 1.5965258413271721 1.9859262940770028 0.0 10.213054789681411 0.0 11.49902366656781 27.592991233802653 0.0 12.13273413692322 24.26546827384644 0.0 0.0 23.20187978046503 268.00578492 0.7647058823529411 1.1764705882352942 1.588235294117647 0.07692307692307693 -1.38 17 259.04699999999997 6943.445199590422 12.08686679380967 4.861181105580097 2.8426724700782957 109.048439398113 9.68320845930965 0.5076617533400031 9.68320845930965 0.11870889965789788 0.14701436130007584 0.11870889965789788 0.14701436130007584 -0.5076617533400031 3.9954000000000027 69.03960000000004 269.127 2 2 0 0 0 2 0 2 2 2 4 0 2 0 0 0 88 10.213054789681411 11.49902366656781 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 23.20187978046503 47.52510539416365 16.466088250408664 0.0 2 10.213054789681411 23.20187978046503 0.0 0.0 0.0 6.4208216229260096 0.0 57.5703720216463 0.0 11.49902366656781 0.0 0.0 11.49902366656781 23.20187978046503 10.213054789681411 6.4208216229260096 0.0 11.126902983393991 36.39820241076966 10.045266627482652 0.0 0.0 40.46 0.0 11.70886971249405 0.0 0.0 0.0 0.0 0.0 0.0 0.0 32.01335250972817 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.8647127178367139 |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/rdkit_descriptors_result1.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rdkit_descriptors_result1.tab Sat Dec 04 16:39:31 2021 +0000 |
b |
@@ -0,0 +1,2 @@ +MoleculeID BalabanJ BertzCT Chi0 Chi0n Chi0v Chi1 Chi1n Chi1v Chi2n Chi2v Chi3n Chi3v Chi4n Chi4v EState_VSA1 EState_VSA10 EState_VSA11 EState_VSA2 EState_VSA3 EState_VSA4 EState_VSA5 EState_VSA6 EState_VSA7 EState_VSA8 EState_VSA9 ExactMolWt FormalCharge FpDensityMorgan1 FpDensityMorgan2 FpDensityMorgan3 FractionCSP3 HallKierAlpha HeavyAtomCount HeavyAtomMolWt Ipc Kappa1 Kappa2 Kappa3 LabuteASA MaxAbsEStateIndex MaxAbsPartialCharge MaxEStateIndex MaxPartialCharge MinAbsEStateIndex MinAbsPartialCharge MinEStateIndex MinPartialCharge MolLogP MolMR MolWt NHOHCount NOCount NumAliphaticCarbocycles NumAliphaticHeterocycles NumAliphaticRings NumAromaticCarbocycles NumAromaticHeterocycles NumAromaticRings NumHAcceptors NumHDonors NumHeteroatoms NumRadicalElectrons NumRotatableBonds NumSaturatedCarbocycles NumSaturatedHeterocycles NumSaturatedRings NumValenceElectrons PEOE_VSA1 PEOE_VSA10 PEOE_VSA11 PEOE_VSA12 PEOE_VSA13 PEOE_VSA14 PEOE_VSA2 PEOE_VSA3 PEOE_VSA4 PEOE_VSA5 PEOE_VSA6 PEOE_VSA7 PEOE_VSA8 PEOE_VSA9 RingCount SMR_VSA1 SMR_VSA10 SMR_VSA2 SMR_VSA3 SMR_VSA4 SMR_VSA5 SMR_VSA6 SMR_VSA7 SMR_VSA8 SMR_VSA9 SSSR SlogP_VSA1 SlogP_VSA10 SlogP_VSA11 SlogP_VSA12 SlogP_VSA2 SlogP_VSA3 SlogP_VSA4 SlogP_VSA5 SlogP_VSA6 SlogP_VSA7 SlogP_VSA8 SlogP_VSA9 TPSA VSA_EState1 VSA_EState10 VSA_EState2 VSA_EState3 VSA_EState4 VSA_EState5 VSA_EState6 VSA_EState7 VSA_EState8 VSA_EState9 fr_Al_COO fr_Al_OH fr_Al_OH_noTert fr_ArN fr_Ar_COO fr_Ar_N fr_Ar_NH fr_Ar_OH fr_COO fr_COO2 fr_C_O fr_C_O_noCOO fr_C_S fr_HOCCN fr_Imine fr_NH0 fr_NH1 fr_NH2 fr_N_O fr_Ndealkylation1 fr_Ndealkylation2 fr_Nhpyrrole fr_SH fr_aldehyde fr_alkyl_carbamate fr_alkyl_halide fr_allylic_oxid fr_amide fr_amidine fr_aniline fr_aryl_methyl fr_azide fr_azo fr_barbitur fr_benzene fr_benzodiazepine fr_bicyclic fr_diazo fr_dihydropyridine fr_epoxide fr_ester fr_ether fr_furan fr_guanido fr_halogen fr_hdrzine fr_hdrzone fr_imidazole fr_imide fr_isocyan fr_isothiocyan fr_ketone fr_ketone_Topliss fr_lactam fr_lactone fr_methoxy fr_morpholine fr_nitrile fr_nitro fr_nitro_arom fr_nitro_arom_nonortho fr_nitroso fr_oxazole fr_oxime fr_para_hydroxylation fr_phenol fr_phenol_noOrthoHbond fr_phos_acid fr_phos_ester fr_piperdine fr_piperzine fr_priamide fr_prisulfonamd fr_pyridine fr_quatN fr_sulfide fr_sulfonamd fr_sulfone fr_term_acetylene fr_tetrazole fr_thiazole fr_thiocyan fr_thiophene fr_unbrch_alkane fr_urea qed +3037 2.370228 503.61088 12.413849 8.821565 10.333422 8.058551 5.008353 5.764282 3.722845 4.595717 2.463985 2.934179 1.596526 1.985926 0.0 10.213055 0.0 11.499024 27.592991 0.0 12.132734 24.265468 0.0 0.0 23.20188 268.005785 0 0.764706 1.176471 1.588235 0.076923 -1.38 17 259.047 6943.4452 12.086867 4.861181 2.842672 109.048439 9.683208 0.507662 9.683208 0.118709 0.147014 0.118709 0.147014 -0.507662 3.9954 69.0396 269.127 2 2 0 0 0 2 0 2 2 2 4 0 2 0 0 0 88 10.213055 11.499024 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 23.20188 47.525105 16.466088 0.0 2 10.213055 23.20188 0.0 0.0 0.0 6.420822 0.0 57.570372 0.0 11.499024 2 0.0 0.0 11.499024 23.20188 10.213055 6.420822 0.0 11.126903 36.398202 10.045267 0.0 0.0 40.46 0.0 11.70887 0.0 20.448487 1.29642 0.294029 9.600621 0.373796 0.0 0.0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.864713 |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/rdkit_descriptors_subset.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rdkit_descriptors_subset.tab Sat Dec 04 16:39:31 2021 +0000 |
b |
@@ -0,0 +1,2 @@ +MoleculeID FormalCharge MolWt qed +3037 0 269.127 0.864713 |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/staurosporine.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/staurosporine.smi Sat Dec 04 16:39:31 2021 +0000 |
[ |
@@ -0,0 +1,1 @@ +C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)NC)OC staurosporine |
b |
diff -r 000000000000 -r 5c501eb8d56c test-data/sucos_cluster.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sucos_cluster.sdf Sat Dec 04 16:39:31 2021 +0000 |
b |
b'@@ -0,0 +1,1384 @@\n+\n+ RDKit 3D\n+\n+ 19 20 0 0 0 0 0 0 0 0999 V2000\n+ 25.8690 10.6750 17.9260 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 24.7660 11.4770 18.5500 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 24.1650 12.4880 17.8120 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.1480 13.2570 18.3640 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.7300 13.0010 19.6620 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.6190 13.8450 20.2480 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.3280 11.9670 20.4090 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.9000 11.7560 21.6900 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.1990 10.4520 22.2010 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.3800 10.3250 23.4930 C 0 0 1 0 0 0 0 0 0 0 0 0\n+ 22.7510 9.1510 24.2170 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.5930 11.5910 24.3570 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.2540 11.3630 25.7760 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.5710 12.5910 26.5330 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.1760 12.4110 28.0100 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 20.7810 12.0710 28.1350 O 0 0 0 0 0 0 0 0 0 0 0 0\n+ 20.5060 10.8660 27.4060 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 20.8170 11.0700 25.9210 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 24.3600 11.2090 19.8480 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2 1 1 0\n+ 3 2 2 0\n+ 4 3 1 0\n+ 5 4 2 0\n+ 6 5 1 0\n+ 7 5 1 0\n+ 8 7 1 0\n+ 9 8 1 0\n+ 10 9 1 0\n+ 10 11 1 1\n+ 12 10 1 0\n+ 13 12 1 0\n+ 14 13 1 0\n+ 15 14 1 0\n+ 16 15 1 0\n+ 17 16 1 0\n+ 18 17 1 0\n+ 18 13 1 0\n+ 19 7 2 0\n+ 19 2 1 0\n+M END\n+$$$$\n+\n+ RDKit 3D\n+\n+ 11 12 0 0 0 0 0 0 0 0999 V2000\n+ 22.8090 9.2070 24.2800 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.4710 10.1080 23.3190 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.8900 9.6340 22.1960 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.5680 10.5170 21.2440 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.7580 11.8790 21.3690 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.3130 12.3920 22.5320 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.5210 13.7530 22.7260 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.0810 14.2110 23.9020 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.4470 13.3190 24.8860 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.2450 11.9700 24.7110 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.6750 11.4850 23.5440 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2 1 1 0\n+ 3 2 2 0\n+ 4 3 1 0\n+ 5 4 2 0\n+ 6 5 1 0\n+ 7 6 2 0\n+ 8 7 1 0\n+ 9 8 2 0\n+ 10 9 1 0\n+ 11 2 1 0\n+ 11 10 2 0\n+ 11 6 1 0\n+M END\n+$$$$\n+\n+ RDKit 3D\n+\n+ 11 12 0 0 0 0 0 0 0 0999 V2000\n+ 22.7770 9.1670 24.9740 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.4280 10.0140 24.0880 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.6770 11.3690 24.2670 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.2930 12.2600 23.2950 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.6430 11.7880 22.1650 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.2220 12.6270 21.1460 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 20.5740 12.1120 20.0440 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 20.3400 10.7540 19.9450 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 20.7580 9.9010 20.9470 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.4090 10.3980 22.0610 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.7960 9.5320 23.0070 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2 1 1 0\n+ 3 2 2 0\n+ 4 3 1 0\n+ 5 4 2 0\n+ 6 5 1 0\n+ 7 6 2 0\n+ 8 7 1 0\n+ 9 8 2 0\n+ 10 9 1 0\n+ 10 5 1 0\n+ 11 10 2 0\n+ 11 2 1 0\n+M END\n+$$$$\n+\n+ RDKit 3D\n+\n+ 15 16 0 0 0 0 0 0 0 0999 V2000\n+ 22.4510 9.0090 24.4670 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.5180 10.3550 24.1060 C 0 0 0 0 0 0 0 0 0 0'..b' 0 0 0\n+ 22.2050 9.8930 22.6900 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.0060 12.0970 24.2220 S 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.8910 10.6980 21.5060 C 0 0 2 0 0 0 0 0 0 0 0 0\n+ 22.8690 9.5480 24.8150 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.4800 12.1250 21.9040 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.5630 12.8770 22.6580 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.6270 10.3910 23.7940 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.1060 10.7640 20.5410 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 24.3720 10.2320 20.8590 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 25.4690 10.3640 20.0140 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 25.3420 11.0350 18.7800 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 24.1030 11.5640 18.3850 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.0130 11.4340 19.2760 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.8840 12.0230 18.8800 F 0 0 0 0 0 0 0 0 0 0 0 0\n+ 26.4280 11.2380 18.0020 F 0 0 0 0 0 0 0 0 0 0 0 0\n+ 4 1 1 6\n+ 4 2 1 0\n+ 6 4 1 0\n+ 7 6 1 0\n+ 7 3 1 0\n+ 8 5 1 0\n+ 8 2 2 0\n+ 8 3 1 0\n+ 9 4 1 0\n+ 10 9 2 0\n+ 11 10 1 0\n+ 12 11 2 0\n+ 13 12 1 0\n+ 14 9 1 0\n+ 14 13 2 0\n+ 15 14 1 0\n+ 16 12 1 0\n+M END\n+$$$$\n+\n+ RDKit 3D\n+\n+ 11 12 0 0 0 0 0 0 0 0999 V2000\n+ 23.8920 10.9750 19.7100 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.1430 9.6540 19.8400 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.7820 11.7530 20.9950 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.6770 9.8870 20.1680 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.1750 12.7870 22.6090 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.3660 10.1400 23.6660 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.3180 11.9530 21.3510 C 0 0 2 0 0 0 0 0 0 0 0 0\n+ 21.5250 10.6490 21.4640 C 0 0 2 0 0 0 0 0 0 0 0 0\n+ 22.7380 9.3100 24.6900 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 21.9210 9.6990 22.5310 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 22.5620 11.8400 24.1130 S 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2 1 1 0\n+ 3 1 1 0\n+ 4 2 1 0\n+ 7 3 1 1\n+ 7 5 1 0\n+ 8 4 1 6\n+ 8 7 1 0\n+ 9 6 1 0\n+ 10 8 1 0\n+ 10 6 2 0\n+ 11 6 1 0\n+ 11 5 1 0\n+M END\n+$$$$\n+\n+ RDKit 3D\n+\n+ 18 20 0 0 0 0 0 0 0 0999 V2000\n+ 28.6970 10.5240 20.4750 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 27.9220 11.6800 20.9560 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 28.7880 12.6650 21.6350 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 27.0810 12.2010 19.9710 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 27.4190 13.2930 19.2740 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 26.6450 13.8320 18.3200 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 25.4810 13.2870 17.9980 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 25.0290 12.1840 18.6310 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.7860 11.6150 18.3040 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 23.3430 10.5040 18.9720 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 24.1080 9.9180 19.9810 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 25.3250 10.4360 20.3200 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 25.8200 11.5870 19.6640 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 27.1160 14.9710 17.6690 N 0 0 0 0 0 0 0 0 0 0 0 0\n+ 27.3480 16.1150 18.5710 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 27.6660 17.2890 17.6050 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 26.8670 16.9430 16.3140 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 26.3130 15.5200 16.5590 C 0 0 0 0 0 0 0 0 0 0 0 0\n+ 2 1 1 0\n+ 3 2 1 0\n+ 4 2 1 0\n+ 5 4 2 0\n+ 6 5 1 0\n+ 7 6 2 0\n+ 8 7 1 0\n+ 9 8 2 0\n+ 10 9 1 0\n+ 11 10 2 0\n+ 12 11 1 0\n+ 13 12 2 0\n+ 13 8 1 0\n+ 13 4 1 0\n+ 14 6 1 0\n+ 15 14 1 0\n+ 16 15 1 0\n+ 17 16 1 0\n+ 18 14 1 0\n+ 18 17 1 0\n+M END\n+$$$$\n' |