diff rdkit_descriptors.py @ 5:67ee76f0e497 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author bgruening
date Sat, 04 Dec 2021 16:40:23 +0000
parents bbbf5fb356dd
children
line wrap: on
line diff
--- a/rdkit_descriptors.py	Wed Feb 17 12:59:24 2021 +0000
+++ b/rdkit_descriptors.py	Sat Dec 04 16:40:23 2021 +0000
@@ -8,7 +8,7 @@
 from rdkit.Chem import Descriptors
 
 
-def get_supplier(infile, format='smiles'):
+def get_supplier(infile, format="smiles"):
     """
     Returns a generator over a SMILES or InChI file. Every element is of RDKit
     molecule and has its original string as _Name property.
@@ -16,14 +16,20 @@
     with open(infile) as handle:
         for line in handle:
             line = line.strip()
-            if format == 'smiles':
+            if format == "smiles":
                 mol = Chem.MolFromSmiles(line, sanitize=True)
-            elif format == 'inchi':
-                mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False)
+            elif format == "inchi":
+                mol = Chem.inchi.MolFromInchi(
+                    line,
+                    sanitize=True,
+                    removeHs=True,
+                    logLevel=None,
+                    treatWarningAsError=False,
+                )
             if mol is None:
                 yield False
             else:
-                mol.SetProp('_Name', line.split('\t')[0])
+                mol.SetProp("_Name", line.split("\t")[0])
                 yield mol
 
 
@@ -31,9 +37,13 @@
     """
     Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function)
     """
-    ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')]
+    ret = [
+        (name, f)
+        for name, f in inspect.getmembers(Descriptors)
+        if inspect.isfunction(f) and not name.startswith("_")
+    ]
     # some which are not in the official Descriptors module we need to add manually
-    ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)])
+    ret.extend([("FormalCharge", Chem.GetFormalCharge), ("SSSR", Chem.GetSSSR)])
     ret.sort()
     return ret
 
@@ -48,40 +58,54 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--infile', required=True, help='Path to the input file.')
+    parser.add_argument("-i", "--infile", required=True, help="Path to the input file.")
     parser.add_argument("--iformat", help="Specify the input file format.")
 
-    parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'),
-                        default=sys.stdout,
-                        help="path to the result file, default is stdout")
+    parser.add_argument(
+        "-o",
+        "--outfile",
+        type=argparse.FileType("w+"),
+        default=sys.stdout,
+        help="path to the result file, default is stdout",
+    )
 
-    parser.add_argument('-s', '--select', default=None,
-                        help="select a subset of comma-separated descriptors to use")
+    parser.add_argument(
+        "-s",
+        "--select",
+        default=None,
+        help="select a subset of comma-separated descriptors to use",
+    )
 
-    parser.add_argument("--header", dest="header", action="store_true",
-                        default=False,
-                        help="Write header line.")
+    parser.add_argument(
+        "--header",
+        dest="header",
+        action="store_true",
+        default=False,
+        help="Write header line.",
+    )
 
     args = parser.parse_args()
 
-    if args.iformat == 'sdf':
+    if args.iformat == "sdf":
         supplier = Chem.SDMolSupplier(args.infile)
-    elif args.iformat == 'smi':
-        supplier = get_supplier(args.infile, format='smiles')
-    elif args.iformat == 'inchi':
-        supplier = get_supplier(args.infile, format='inchi')
-    elif args.iformat == 'pdb':
+    elif args.iformat == "smi":
+        supplier = get_supplier(args.infile, format="smiles")
+    elif args.iformat == "inchi":
+        supplier = get_supplier(args.infile, format="inchi")
+    elif args.iformat == "pdb":
         supplier = [Chem.MolFromPDBFile(args.infile)]
-    elif args.iformat == 'mol2':
+    elif args.iformat == "mol2":
         supplier = [Chem.MolFromMol2File(args.infile)]
 
     functions = get_rdkit_descriptor_functions()
-    if args.select and args.select != 'None':
-        selected = args.select.split(',')
+    if args.select and args.select != "None":
+        selected = args.select.split(",")
         functions = [(name, f) for name, f in functions if name in selected]
 
     if args.header:
-        args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions]))
+        args.outfile.write(
+            "%s\n" % "\t".join(["MoleculeID"] + [name for name, f in functions])
+        )
 
     for mol in supplier:
         if not mol:
@@ -91,4 +115,7 @@
             molecule_id = mol.GetProp("_Name")
         except KeyError:
             molecule_id = Chem.MolToSmiles(mol)
-        args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs]))
+        args.outfile.write(
+            "%s\n"
+            % "\t".join([molecule_id] + [str(round(res, 6)) for name, res in descs])
+        )