Galaxy |

Changeset 6:4beb3e026bbb (2021-12-04)

Previous changeset 5:351fbd750a6d (2021-02-17)

Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"

modified:
dimorphite_dl.py
rdkit_descriptors.py
sdf_to_tab.py

added:
rdconf.py
test-data/rdconf_output.sdf
test-data/staurosporine.smi

diff -r 351fbd750a6d -r 4beb3e026bbb dimorphite_dl.py
--- a/dimorphite_dl.py Wed Feb 17 13:00:12 2021 +0000
+++ b/dimorphite_dl.py Sat Dec 04 16:39:05 2021 +0000

[

b'@@ -19,8 +19,9 @@\n """\n \n from __future__ import print_function\n+\n+import argparse\n import os\n-import argparse\n import sys\n \n try:\n@@ -43,11 +44,12 @@\n import rdkit\n from rdkit import Chem\n from rdkit.Chem import AllChem\n-except:\n+except Exception:\n msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/"\n print(msg)\n raise Exception(msg)\n \n+\n def main(params=None):\n """The main definition run when you call the script from the commandline.\n \n@@ -84,13 +86,14 @@\n with open(args["output_file"], "w") as file:\n for protonated_smi in Protonate(args):\n file.write(protonated_smi + "\\n")\n- elif "return_as_list" in args and args["return_as_list"] == True:\n+ elif "return_as_list" in args and args["return_as_list"]:\n return list(Protonate(args))\n else:\n # No output file specified. Just print it to the screen.\n for protonated_smi in Protonate(args):\n print(protonated_smi)\n \n+\n class MyParser(argparse.ArgumentParser):\n """Overwrite default parse so it displays help file on error. See\n https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu"""\n@@ -117,15 +120,18 @@\n if file is None:\n file = sys.stdout\n self._print_message(self.format_help(), file)\n- print("""\n+ print(\n+ """\n examples:\n python dimorphite_dl.py --smiles_file sample_molecules.smi\n python dimorphite_dl.py --smiles "CCC(=O)O" --min_ph -3.0 --max_ph -2.0\n python dimorphite_dl.py --smiles "CCCN" --min_ph -3.0 --max_ph -2.0 --output_file output.smi\n python dimorphite_dl.py --smiles_file sample_molecules.smi --pka_precision 2.0 --label_states\n- python dimorphite_dl.py --test""")\n+ python dimorphite_dl.py --test"""\n+ )\n print("")\n \n+\n class ArgParseFuncs:\n """A namespace for storing functions that are useful for processing\n command-line arguments. To keep things organized."""\n@@ -137,27 +143,57 @@\n :return: A parser object.\n """\n \n- parser = MyParser(description="Dimorphite 1.2: Creates models of " +\n- "appropriately protonated small moleucles. " +\n- "Apache 2.0 License. Copyright 2018 Jacob D. " +\n- "Durrant.")\n- parser.add_argument(\'--min_ph\', metavar=\'MIN\', type=float, default=6.4,\n- help=\'minimum pH to consider (default: 6.4)\')\n- parser.add_argument(\'--max_ph\', metavar=\'MAX\', type=float, default=8.4,\n- help=\'maximum pH to consider (default: 8.4)\')\n- parser.add_argument(\'--pka_precision\', metavar=\'PRE\', type=float, default=1.0,\n- help=\'pKa precision factor (number of standard devations, default: 1.0)\')\n- parser.add_argument(\'--smiles\', metavar=\'SMI\', type=str,\n- help=\'SMILES string to protonate\')\n- parser.add_argument(\'--smiles_file\', metavar="FILE", type=str,\n- help=\'file that contains SMILES strings to protonate\')\n- parser.add_argument(\'--output_file\', metavar="FILE", type=str,\n- help=\'output file to write protonated SMILES (optional)\')\n- parser.add_argument(\'--label_states\', action="store_true",\n- help=\'label protonated SMILES with target state \' + \\\n- \'(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").\')\n- parser.add_argument(\'--test\', action="store_true",\n- help=\'run unit tests (for debugging)\')\n+ parser = MyParser(\n+ description="Dimorphite 1.2: Creates models of "\n+ + "appropriately protonated small moleucles. "\n+ + "Apache 2.0 License. Copyright 2018 Jacob D. "\n+ + "Durrant."\n+ )'..b'put) != num_states:\n+ msg = (\n+ args["smiles"]\n+ + " should have "\n+ + str(num_states)\n+ + " states at at pH "\n+ + str(args["min_ph"])\n+ + ": "\n+ + str(output)\n+ )\n print(msg)\n raise Exception(msg)\n \n- if (len(set([l[0] for l in output]) - set(expected_output)) != 0):\n- msg = args["smiles"] + " is not " + " AND ".join(expected_output) + \\\n- " at pH " + str(args["min_ph"]) + " - " + str(args["max_ph"]) + \\\n- "; it is " + " AND ".join([l[0] for l in output])\n+ if len(set([l[0] for l in output]) - set(expected_output)) != 0:\n+ msg = (\n+ args["smiles"]\n+ + " is not "\n+ + " AND ".join(expected_output)\n+ + " at pH "\n+ + str(args["min_ph"])\n+ + " - "\n+ + str(args["max_ph"])\n+ + "; it is "\n+ + " AND ".join([l[0] for l in output])\n+ )\n print(msg)\n raise Exception(msg)\n \n- if (len(set([l[1] for l in output]) - set(labels)) != 0):\n- msg = args["smiles"] + " not labeled as " + " AND ".join(labels) + \\\n- "; it is " + " AND ".join([l[1] for l in output])\n+ if len(set([l[1] for l in output]) - set(labels)) != 0:\n+ msg = (\n+ args["smiles"]\n+ + " not labeled as "\n+ + " AND ".join(labels)\n+ + "; it is "\n+ + " AND ".join([l[1] for l in output])\n+ )\n print(msg)\n raise Exception(msg)\n \n ph_range = sorted(list(set([args["min_ph"], args["max_ph"]])))\n ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")"\n- print("(CORRECT) " + ph_range_str.ljust(10) + " " + args["smiles"] + " => " + " AND ".join([l[0] for l in output]))\n+ print(\n+ "(CORRECT) "\n+ + ph_range_str.ljust(10)\n+ + " "\n+ + args["smiles"]\n+ + " => "\n+ + " AND ".join([l[0] for l in output])\n+ )\n+\n \n def run(**kwargs):\n """A helpful, importable function for those who want to call Dimorphite-DL\n@@ -1019,6 +1236,7 @@\n # Run the main function with the specified arguments.\n main(kwargs)\n \n+\n def run_with_mol_list(mol_lst, **kwargs):\n """A helpful, importable function for those who want to call Dimorphite-DL\n from another Python script rather than the command line. Note that this\n@@ -1037,10 +1255,13 @@\n # Do a quick check to make sure the user input makes sense.\n for bad_arg in ["smiles", "smiles_file", "output_file", "test"]:\n if bad_arg in kwargs:\n- msg = "You\'re using Dimorphite-DL\'s run_with_mol_list(mol_lst, " + \\\n- "**kwargs) function, but you also passed the \\"" + \\\n- bad_arg + "\\" argument. Did you mean to use the " + \\\n- "run(**kwargs) function instead?"\n+ msg = (\n+ "You\'re using Dimorphite-DL\'s run_with_mol_list(mol_lst, "\n+ + \'**kwargs) function, but you also passed the "\'\n+ + bad_arg\n+ + \'" argument. Did you mean to use the \'\n+ + "run(**kwargs) function instead?"\n+ )\n print(msg)\n raise Exception(msg)\n \n@@ -1076,9 +1297,15 @@\n m.SetProp(prop, str(val))\n mols.append(m)\n else:\n- UtilFuncs.eprint("WARNING: Could not process molecule with SMILES string " + s + " and properties " + str(props))\n+ UtilFuncs.eprint(\n+ "WARNING: Could not process molecule with SMILES string "\n+ + s\n+ + " and properties "\n+ + str(props)\n+ )\n \n return mols\n \n+\n if __name__ == "__main__":\n main()\n'

diff -r 351fbd750a6d -r 4beb3e026bbb rdconf.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rdconf.py Sat Dec 04 16:39:05 2021 +0000

[

@@ -0,0 +1,229 @@
+#!/usr/bin/python3
+
+import gzip
+import os
+import sys
+from optparse import OptionParser
+
+from rdkit.Chem import AllChem as Chem
+
+"""
+This script was originally written by David Koes, University of Pittsburgh:
+https://github.com/dkoes/rdkit-scripts/blob/master/rdconf.py
+It is licensed under the MIT licence.
+
+Given a smiles file, generate 3D conformers in output sdf.
+Energy minimizes and filters conformers to meet energy window and rms constraints.
+
+Some time ago I compared this to alternative conformer generators and
+it was quite competitive (especially after RDKit's UFF implementation
+added OOP terms).
+"""
+
+
+# convert smiles to sdf
+def getRMS(mol, c1, c2):
+    rms = Chem.GetBestRMS(mol, mol, c1, c2)
+    return rms
+
+
+parser = OptionParser(usage="Usage: %prog [options] <input>.smi <output>.sdf")
+parser.add_option(
+    "--maxconfs",
+    dest="maxconfs",
+    action="store",
+    help="maximum number of conformers to generate per a molecule (default 20)",
+    default="20",
+    type="int",
+    metavar="CNT",
+)
+parser.add_option(
+    "--sample_multiplier",
+    dest="sample",
+    action="store",
+    help="sample N*maxconfs conformers and choose the maxconformers with lowest energy (default 1)",
+    default="1",
+    type="float",
+    metavar="N",
+)
+parser.add_option(
+    "--seed",
+    dest="seed",
+    action="store",
+    help="random seed (default 9162006)",
+    default="9162006",
+    type="int",
+    metavar="s",
+)
+parser.add_option(
+    "--rms_threshold",
+    dest="rms",
+    action="store",
+    help="filter based on rms (default 0.7)",
+    default="0.7",
+    type="float",
+    metavar="R",
+)
+parser.add_option(
+    "--energy_window",
+    dest="energy",
+    action="store",
+    help="filter based on energy difference with lowest energy conformer",
+    default="10",
+    type="float",
+    metavar="E",
+)
+parser.add_option(
+    "-v",
+    "--verbose",
+    dest="verbose",
+    action="store_true",
+    default=False,
+    help="verbose output",
+)
+parser.add_option(
+    "--mmff",
+    dest="mmff",
+    action="store_true",
+    default=False,
+    help="use MMFF forcefield instead of UFF",
+)
+parser.add_option(
+    "--nomin",
+    dest="nomin",
+    action="store_true",
+    default=False,
+    help="don't perform energy minimization (bad idea)",
+)
+parser.add_option(
+    "--etkdg",
+    dest="etkdg",
+    action="store_true",
+    default=False,
+    help="use new ETKDG knowledge-based method instead of distance geometry",
+)
+
+
+(options, args) = parser.parse_args()
+
+if len(args) < 2:
+    parser.error("Need input and output")
+    sys.exit(-1)
+
+input = args[0]
+output = args[1]
+smifile = open(input)
+if options.verbose:
+    print("Generating a maximum of", options.maxconfs, "per a mol")
+
+if options.etkdg and not Chem.ETKDG:
+    print("ETKDB does not appear to be implemented.  Please upgrade RDKit.")
+    sys.exit(1)
+
+split = os.path.splitext(output)
+if split[1] == ".gz":
+    outf = gzip.open(output, "wt+")
+    output = split[0]  # strip .gz
+else:
+    outf = open(output, "w+")
+
+
+if os.path.splitext(output)[1] == ".pdb":
+    sdwriter = Chem.PDBWriter(outf)
+else:
+    sdwriter = Chem.SDWriter(outf)
+
+if sdwriter is None:
+    print("Could not open ".output)
+    sys.exit(-1)
+
+for line in smifile:
+    toks = line.split()
+    smi = toks[0]
+    name = " ".join(toks[1:])
+
+    pieces = smi.split(".")
+    if len(pieces) > 1:
+        smi = max(pieces, key=len)  # take largest component by length
+        print("Taking largest component: %s\t%s" % (smi, name))
+
+    mol = Chem.MolFromSmiles(smi)
+    if mol is not None:
+        if options.verbose:
+            print(smi)
+        try:
+            Chem.SanitizeMol(mol)
+            mol = Chem.AddHs(mol)
+            mol.SetProp("_Name", name)
+
+            if options.etkdg:
+                cids = Chem.EmbedMultipleConfs(
+                    mol, int(options.sample * options.maxconfs), Chem.ETKDG()
+                )
+            else:
+                cids = Chem.EmbedMultipleConfs(
+                    mol, int(options.sample * options.maxconfs), randomSeed=options.seed
+                )
+            if options.verbose:
+                print(len(cids), "conformers found")
+            cenergy = []
+            for conf in cids:
+                # not passing confID only minimizes the first conformer
+                if options.nomin:
+                    cenergy.append(conf)
+                elif options.mmff:
+                    converged = Chem.MMFFOptimizeMolecule(mol, confId=conf)
+                    mp = Chem.MMFFGetMoleculeProperties(mol)
+                    cenergy.append(
+                        Chem.MMFFGetMoleculeForceField(
+                            mol, mp, confId=conf
+                        ).CalcEnergy()
+                    )
+                else:
+                    converged = not Chem.UFFOptimizeMolecule(mol, confId=conf)
+                    cenergy.append(
+                        Chem.UFFGetMoleculeForceField(mol, confId=conf).CalcEnergy()
+                    )
+                if options.verbose:
+                    print("Convergence of conformer", conf, converged)
+
+            mol = Chem.RemoveHs(mol)
+            sortedcids = sorted(cids, key=lambda cid: cenergy[cid])
+            if len(sortedcids) > 0:
+                mine = cenergy[sortedcids[0]]
+            else:
+                mine = 0
+            if options.rms == 0:
+                cnt = 0
+                for conf in sortedcids:
+                    if cnt >= options.maxconfs:
+                        break
+                    if (options.energy < 0) or cenergy[conf] - mine <= options.energy:
+                        sdwriter.write(mol, conf)
+                        cnt += 1
+            else:
+                written = {}
+                for conf in sortedcids:
+                    if len(written) >= options.maxconfs:
+                        break
+                    # check rmsd
+                    passed = True
+                    for seenconf in written.keys():
+                        rms = getRMS(mol, seenconf, conf)
+                        if (rms < options.rms) or (
+                            options.energy > 0 and cenergy[conf] - mine > options.energy
+                        ):
+                            passed = False
+                            break
+                    if passed:
+                        written[conf] = True
+                        sdwriter.write(mol, conf)
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except Exception as e:
+            print("Exception", e)
+    else:
+        print("ERROR:", smi)
+
+sdwriter.close()
+outf.close()

diff -r 351fbd750a6d -r 4beb3e026bbb rdkit_descriptors.py
--- a/rdkit_descriptors.py Wed Feb 17 13:00:12 2021 +0000
+++ b/rdkit_descriptors.py Sat Dec 04 16:39:05 2021 +0000

[

@@ -8,7 +8,7 @@
from rdkit.Chem import Descriptors

-def get_supplier(infile, format='smiles'):
+def get_supplier(infile, format="smiles"):
     """
     Returns a generator over a SMILES or InChI file. Every element is of RDKit
     molecule and has its original string as _Name property.
@@ -16,14 +16,20 @@
     with open(infile) as handle:
         for line in handle:
             line = line.strip()
-            if format == 'smiles':
+            if format == "smiles":
                 mol = Chem.MolFromSmiles(line, sanitize=True)
-            elif format == 'inchi':
-                mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False)
+            elif format == "inchi":
+                mol = Chem.inchi.MolFromInchi(
+                    line,
+                    sanitize=True,
+                    removeHs=True,
+                    logLevel=None,
+                    treatWarningAsError=False,
+                )
             if mol is None:
                 yield False
             else:
-                mol.SetProp('_Name', line.split('\t')[0])
+                mol.SetProp("_Name", line.split("\t")[0])
                 yield mol

@@ -31,9 +37,13 @@
     """
     Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function)
     """
-    ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')]
+    ret = [
+        (name, f)
+        for name, f in inspect.getmembers(Descriptors)
+        if inspect.isfunction(f) and not name.startswith("_")
+    ]
     # some which are not in the official Descriptors module we need to add manually
-    ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)])
+    ret.extend([("FormalCharge", Chem.GetFormalCharge), ("SSSR", Chem.GetSSSR)])
     ret.sort()
     return ret

@@ -48,40 +58,54 @@

if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--infile', required=True, help='Path to the input file.')
+    parser.add_argument("-i", "--infile", required=True, help="Path to the input file.")
     parser.add_argument("--iformat", help="Specify the input file format.")

-    parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'),
-                        default=sys.stdout,
-                        help="path to the result file, default is stdout")
+    parser.add_argument(
+        "-o",
+        "--outfile",
+        type=argparse.FileType("w+"),
+        default=sys.stdout,
+        help="path to the result file, default is stdout",
+    )

-    parser.add_argument('-s', '--select', default=None,
-                        help="select a subset of comma-separated descriptors to use")
+    parser.add_argument(
+        "-s",
+        "--select",
+        default=None,
+        help="select a subset of comma-separated descriptors to use",
+    )

-    parser.add_argument("--header", dest="header", action="store_true",
-                        default=False,
-                        help="Write header line.")
+    parser.add_argument(
+        "--header",
+        dest="header",
+        action="store_true",
+        default=False,
+        help="Write header line.",
+    )

     args = parser.parse_args()

-    if args.iformat == 'sdf':
+    if args.iformat == "sdf":
         supplier = Chem.SDMolSupplier(args.infile)
-    elif args.iformat == 'smi':
-        supplier = get_supplier(args.infile, format='smiles')
-    elif args.iformat == 'inchi':
-        supplier = get_supplier(args.infile, format='inchi')
-    elif args.iformat == 'pdb':
+    elif args.iformat == "smi":
+        supplier = get_supplier(args.infile, format="smiles")
+    elif args.iformat == "inchi":
+        supplier = get_supplier(args.infile, format="inchi")
+    elif args.iformat == "pdb":
         supplier = [Chem.MolFromPDBFile(args.infile)]
-    elif args.iformat == 'mol2':
+    elif args.iformat == "mol2":
         supplier = [Chem.MolFromMol2File(args.infile)]

     functions = get_rdkit_descriptor_functions()
-    if args.select and args.select != 'None':
-        selected = args.select.split(',')
+    if args.select and args.select != "None":
+        selected = args.select.split(",")
         functions = [(name, f) for name, f in functions if name in selected]

     if args.header:
-        args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions]))
+        args.outfile.write(
+            "%s\n" % "\t".join(["MoleculeID"] + [name for name, f in functions])
+        )

     for mol in supplier:
         if not mol:
@@ -91,4 +115,7 @@
             molecule_id = mol.GetProp("_Name")
         except KeyError:
             molecule_id = Chem.MolToSmiles(mol)
-        args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs]))
+        args.outfile.write(
+            "%s\n"
+            % "\t".join([molecule_id] + [str(round(res, 6)) for name, res in descs])
+        )

diff -r 351fbd750a6d -r 4beb3e026bbb sdf_to_tab.py
--- a/sdf_to_tab.py Wed Feb 17 13:00:12 2021 +0000
+++ b/sdf_to_tab.py Sat Dec 04 16:39:05 2021 +0000

[

@@ -13,36 +13,55 @@
         if mols[n]:
             d = mols[n].GetPropsAsDict()
             # filter dict for desired props
-            if vars.props.strip() == '':  # none specified, return all
-                d = {prop: val for (prop, val) in d.items() if not any(x in str(val) for x in ['\n', '\t'])}  # remove items containing newlines or tabs
+            if vars.props.strip() == "":  # none specified, return all
+                d = {
+                    prop: val
+                    for (prop, val) in d.items()
+                    if not any(x in str(val) for x in ["\n", "\t"])
+                }  # remove items containing newlines or tabs
             else:
-                d = {prop: val for (prop, val) in d.items() if prop in vars.props.replace(' ', '').split(',')}  # remove items not requested via CLI
+                d = {
+                    prop: val
+                    for (prop, val) in d.items()
+                    if prop in vars.props.replace(" ", "").split(",")
+                }  # remove items not requested via CLI
             if vars.name:
-                d['SDFMoleculeName'] = mols[n].GetProp('_Name')
+                d["SDFMoleculeName"] = mols[n].GetProp("_Name")
             if vars.smiles:
-                d['SMILES'] = Chem.MolToSmiles(mols[n], isomericSmiles=False)
-            d['Index'] = int(n)
+                d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False)
+            d["Index"] = int(n)

             df = df.append(d, ignore_index=True)
         else:
             print("Molecule could not be read - skipped.")

-    df = df.astype({'Index': int}).set_index('Index')
+    df = df.astype({"Index": int}).set_index("Index")
     sorted_cols = sorted(df.columns.values.tolist())
-    df.to_csv(vars.out, sep='\t', header=vars.header, columns=sorted_cols)
+    df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols)

def main():
     parser = argparse.ArgumentParser(description="Convert SDF to tabular")
-    parser.add_argument('--inp', '-i', help="The input file", required=True)
-    parser.add_argument('--out', '-o', help="The output file", required=True)
-    parser.add_argument('--props', '-p', help="Properties to filter (leave blank for all)", required=True)
-    parser.add_argument('--header', '-t', action='store_true',
-                        help="Write property name as the first row.")
-    parser.add_argument('--smiles', '-s', action='store_true',
-                        help="Include SMILES in output.")
-    parser.add_argument('--name', '-n', action='store_true',
-                        help="Include molecule name in output.")
+    parser.add_argument("--inp", "-i", help="The input file", required=True)
+    parser.add_argument("--out", "-o", help="The output file", required=True)
+    parser.add_argument(
+        "--props",
+        "-p",
+        help="Properties to filter (leave blank for all)",
+        required=True,
+    )
+    parser.add_argument(
+        "--header",
+        "-t",
+        action="store_true",
+        help="Write property name as the first row.",
+    )
+    parser.add_argument(
+        "--smiles", "-s", action="store_true", help="Include SMILES in output."
+    )
+    parser.add_argument(
+        "--name", "-n", action="store_true", help="Include molecule name in output."
+    )
     sdf_to_tab(parser.parse_args())

diff -r 351fbd750a6d -r 4beb3e026bbb test-data/rdconf_output.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rdconf_output.sdf Sat Dec 04 16:39:05 2021 +0000

@@ -0,0 +1,166 @@
+staurosporine
+     RDKit          3D
+
+ 35 42  0  0  0  0  0  0  0  0999 V2000
+   -2.1656    1.4438   -2.0402 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.5064    0.5224   -1.0006 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -2.5333    0.2902    0.0771 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -3.0448   -1.1355   -0.1222 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -1.8499   -2.0325    0.1086 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.9248   -1.7530   -1.0664 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -1.2333   -0.6270   -1.7456 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.4794   -1.8256   -0.6986 N   0  0  0  0  0  0  0  0  0  0  0  0
+    1.2443   -2.8970   -0.6134 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.9172   -4.2121   -0.8557 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.8835   -5.2113   -0.7023 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.1496   -4.8403   -0.3079 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.4981   -3.5086   -0.0573 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.5336   -2.5530   -0.2153 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.5282   -1.1902   -0.0555 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.4701   -0.2506    0.3363 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.1276    1.0747    0.4177 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.8462    1.4665    0.1077 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.2026    2.7102    0.1107 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.7041    3.9456    0.4421 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.8338    5.0265    0.3768 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.4727    4.8205   -0.0097 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.9490    3.5396   -0.3412 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.0910    2.4739   -0.2786 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.2989    1.1681   -0.5288 N   0  0  0  0  0  0  0  0  0  0  0  0
+    0.9108    0.5646   -0.2791 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.2388   -0.7709   -0.3642 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.2788    1.9083    0.8642 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.3369    0.9521    1.0170 N   0  0  0  0  0  0  0  0  0  0  0  0
+    4.8631   -0.3694    0.7084 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.6279   -1.3464    0.7907 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.1928   -1.4531    0.6537 N   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.2852   -0.5939    0.2385 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.8857    0.2845    1.3121 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.4801    1.0939    2.2570 C   0  0  0  0  0  0  0  0  0  0  0  0
+  2  1  1  6
+  2  3  1  0
+  3  4  1  0
+  4  5  1  0
+  6  5  1  1
+  6  7  1  0
+  6  8  1  0
+  8  9  1  0
+  9 10  2  0
+ 10 11  1  0
+ 11 12  2  0
+ 12 13  1  0
+ 13 14  2  0
+ 14 15  1  0
+ 15 16  2  0
+ 16 17  1  0
+ 17 18  2  0
+ 18 19  1  0
+ 19 20  2  0
+ 20 21  1  0
+ 21 22  2  0
+ 22 23  1  0
+ 23 24  2  0
+ 24 25  1  0
+ 25 26  1  0
+ 26 27  2  0
+ 17 28  1  0
+ 28 29  1  0
+ 29 30  1  0
+ 30 31  2  0
+  4 32  1  1
+ 32 33  1  0
+  3 34  1  1
+ 34 35  1  0
+  7  2  1  0
+ 25  2  1  0
+ 27  8  1  0
+ 14  9  1  0
+ 27 15  1  0
+ 30 16  1  0
+ 26 18  1  0
+ 24 19  1  0
+M  END
+$$$$
+staurosporine
+     RDKit          3D
+
+ 35 42  0  0  0  0  0  0  0  0999 V2000
+   -2.3068    0.9355   -2.4621 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.6484    0.1936   -1.2955 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -2.6628   -0.4491   -0.4739 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -2.1102   -1.3841    0.5829 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -1.3580   -2.4343   -0.1341 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.6301   -1.9615   -1.3703 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -1.0034   -0.8456   -1.9685 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.7323   -1.7671   -0.8854 N   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6653   -2.7175   -0.8097 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6030   -4.0416   -1.1929 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.7336   -4.8209   -0.9993 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.8672   -4.2655   -0.4408 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.9097   -2.9234   -0.0599 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.7941   -2.1557   -0.2513 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.5524   -0.8186    0.0257 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.2591    0.2074    0.5765 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.6138    1.4431    0.6965 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.3053    1.6277    0.2726 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.4639    2.7123    0.2701 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.6421    4.0028    0.7116 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.3869    4.9173    0.5986 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.5824    4.5165    0.0416 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.7175    3.2124   -0.3878 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.7245    2.2626   -0.2991 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.6365    0.9796   -0.6340 N   0  0  0  0  0  0  0  0  0  0  0  0
+    0.6078    0.6133   -0.2727 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.2251   -0.6244   -0.3999 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.5496    2.4028    1.3130 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.7624    1.6652    1.5409 N   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5952    0.3088    1.0910 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.5219   -0.5061    1.1939 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.3679   -0.6619    1.5643 N   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.2073    0.2673    2.2885 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.6735    0.2555    0.0846 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.9519   -0.0773   -0.2685 C   0  0  0  0  0  0  0  0  0  0  0  0
+  2  1  1  6
+  2  3  1  0
+  3  4  1  0
+  4  5  1  0
+  6  5  1  1
+  6  7  1  0
+  6  8  1  0
+  8  9  1  0
+  9 10  2  0
+ 10 11  1  0
+ 11 12  2  0
+ 12 13  1  0
+ 13 14  2  0
+ 14 15  1  0
+ 15 16  2  0
+ 16 17  1  0
+ 17 18  2  0
+ 18 19  1  0
+ 19 20  2  0
+ 20 21  1  0
+ 21 22  2  0
+ 22 23  1  0
+ 23 24  2  0
+ 24 25  1  0
+ 25 26  1  0
+ 26 27  2  0
+ 17 28  1  0
+ 28 29  1  0
+ 29 30  1  0
+ 30 31  2  0
+  4 32  1  1
+ 32 33  1  0
+  3 34  1  1
+ 34 35  1  0
+  7  2  1  0
+ 25  2  1  0
+ 27  8  1  0
+ 14  9  1  0
+ 27 15  1  0
+ 30 16  1  0
+ 26 18  1  0
+ 24 19  1  0
+M  END
+$$$$

diff -r 351fbd750a6d -r 4beb3e026bbb test-data/staurosporine.smi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/staurosporine.smi Sat Dec 04 16:39:05 2021 +0000

[

@@ -0,0 +1,1 @@
+C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)NC)OC staurosporine