Mercurial > repos > bgruening > ctb_rdkit_descriptors
diff rdkit_descriptors.py @ 9:0993ac4f4a23 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:40:00 +0000 |
parents | a1c53f0533b0 |
children |
line wrap: on
line diff
--- a/rdkit_descriptors.py Wed Feb 17 12:59:43 2021 +0000 +++ b/rdkit_descriptors.py Sat Dec 04 16:40:00 2021 +0000 @@ -8,7 +8,7 @@ from rdkit.Chem import Descriptors -def get_supplier(infile, format='smiles'): +def get_supplier(infile, format="smiles"): """ Returns a generator over a SMILES or InChI file. Every element is of RDKit molecule and has its original string as _Name property. @@ -16,14 +16,20 @@ with open(infile) as handle: for line in handle: line = line.strip() - if format == 'smiles': + if format == "smiles": mol = Chem.MolFromSmiles(line, sanitize=True) - elif format == 'inchi': - mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False) + elif format == "inchi": + mol = Chem.inchi.MolFromInchi( + line, + sanitize=True, + removeHs=True, + logLevel=None, + treatWarningAsError=False, + ) if mol is None: yield False else: - mol.SetProp('_Name', line.split('\t')[0]) + mol.SetProp("_Name", line.split("\t")[0]) yield mol @@ -31,9 +37,13 @@ """ Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) """ - ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')] + ret = [ + (name, f) + for name, f in inspect.getmembers(Descriptors) + if inspect.isfunction(f) and not name.startswith("_") + ] # some which are not in the official Descriptors module we need to add manually - ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)]) + ret.extend([("FormalCharge", Chem.GetFormalCharge), ("SSSR", Chem.GetSSSR)]) ret.sort() return ret @@ -48,40 +58,54 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') + parser.add_argument("-i", "--infile", required=True, help="Path to the input file.") parser.add_argument("--iformat", help="Specify the input file format.") - parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), - default=sys.stdout, - help="path to the result file, default is stdout") + parser.add_argument( + "-o", + "--outfile", + type=argparse.FileType("w+"), + default=sys.stdout, + help="path to the result file, default is stdout", + ) - parser.add_argument('-s', '--select', default=None, - help="select a subset of comma-separated descriptors to use") + parser.add_argument( + "-s", + "--select", + default=None, + help="select a subset of comma-separated descriptors to use", + ) - parser.add_argument("--header", dest="header", action="store_true", - default=False, - help="Write header line.") + parser.add_argument( + "--header", + dest="header", + action="store_true", + default=False, + help="Write header line.", + ) args = parser.parse_args() - if args.iformat == 'sdf': + if args.iformat == "sdf": supplier = Chem.SDMolSupplier(args.infile) - elif args.iformat == 'smi': - supplier = get_supplier(args.infile, format='smiles') - elif args.iformat == 'inchi': - supplier = get_supplier(args.infile, format='inchi') - elif args.iformat == 'pdb': + elif args.iformat == "smi": + supplier = get_supplier(args.infile, format="smiles") + elif args.iformat == "inchi": + supplier = get_supplier(args.infile, format="inchi") + elif args.iformat == "pdb": supplier = [Chem.MolFromPDBFile(args.infile)] - elif args.iformat == 'mol2': + elif args.iformat == "mol2": supplier = [Chem.MolFromMol2File(args.infile)] functions = get_rdkit_descriptor_functions() - if args.select and args.select != 'None': - selected = args.select.split(',') + if args.select and args.select != "None": + selected = args.select.split(",") functions = [(name, f) for name, f in functions if name in selected] if args.header: - args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions])) + args.outfile.write( + "%s\n" % "\t".join(["MoleculeID"] + [name for name, f in functions]) + ) for mol in supplier: if not mol: @@ -91,4 +115,7 @@ molecule_id = mol.GetProp("_Name") except KeyError: molecule_id = Chem.MolToSmiles(mol) - args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs])) + args.outfile.write( + "%s\n" + % "\t".join([molecule_id] + [str(round(res, 6)) for name, res in descs]) + )