Mercurial > repos > bgruening > ctb_rdkit_descriptors
diff rdkit_descriptors.py @ 8:a1c53f0533b0 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
author | bgruening |
---|---|
date | Wed, 17 Feb 2021 12:59:43 +0000 |
parents | 6674260c1459 |
children | 0993ac4f4a23 |
line wrap: on
line diff
--- a/rdkit_descriptors.py Tue Jul 28 08:43:19 2020 -0400 +++ b/rdkit_descriptors.py Wed Feb 17 12:59:43 2021 +0000 @@ -1,44 +1,49 @@ #!/usr/bin/env python -from rdkit.Chem import Descriptors -from rdkit import Chem -import sys, os, re import argparse import inspect +import sys -def get_supplier( infile, format = 'smiles' ): +from rdkit import Chem +from rdkit.Chem import Descriptors + + +def get_supplier(infile, format='smiles'): """ - Returns a generator over a SMILES or InChI file. Every element is of RDKit + Returns a generator over a SMILES or InChI file. Every element is of RDKit molecule and has its original string as _Name property. """ with open(infile) as handle: for line in handle: line = line.strip() if format == 'smiles': - mol = Chem.MolFromSmiles( line, sanitize=True ) + mol = Chem.MolFromSmiles(line, sanitize=True) elif format == 'inchi': - mol = Chem.inchi.MolFromInchi( line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False ) + mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False) if mol is None: yield False else: - mol.SetProp( '_Name', line.split('\t')[0] ) + mol.SetProp('_Name', line.split('\t')[0]) yield mol + def get_rdkit_descriptor_functions(): """ Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) """ - ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ] + ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')] + # some which are not in the official Descriptors module we need to add manually + ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)]) ret.sort() return ret -def descriptors( mol, functions ): +def descriptors(mol, functions): """ Calculates the descriptors of a given molecule. """ for name, function in functions: - yield (name, function( mol )) + yield (name, function(mol)) if __name__ == "__main__": @@ -46,31 +51,44 @@ parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') parser.add_argument("--iformat", help="Specify the input file format.") - parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), - default=sys.stdout, help="path to the result file, default it sdtout") + parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), + default=sys.stdout, + help="path to the result file, default is stdout") + + parser.add_argument('-s', '--select', default=None, + help="select a subset of comma-separated descriptors to use") parser.add_argument("--header", dest="header", action="store_true", - default=False, - help="Write header line.") + default=False, + help="Write header line.") args = parser.parse_args() if args.iformat == 'sdf': - supplier = Chem.SDMolSupplier( args.infile ) - elif args.iformat =='smi': - supplier = get_supplier( args.infile, format = 'smiles' ) + supplier = Chem.SDMolSupplier(args.infile) + elif args.iformat == 'smi': + supplier = get_supplier(args.infile, format='smiles') elif args.iformat == 'inchi': - supplier = get_supplier( args.infile, format = 'inchi' ) + supplier = get_supplier(args.infile, format='inchi') + elif args.iformat == 'pdb': + supplier = [Chem.MolFromPDBFile(args.infile)] + elif args.iformat == 'mol2': + supplier = [Chem.MolFromMol2File(args.infile)] functions = get_rdkit_descriptor_functions() + if args.select and args.select != 'None': + selected = args.select.split(',') + functions = [(name, f) for name, f in functions if name in selected] if args.header: - args.outfile.write( '%s\n' % '\t'.join( ['MoleculeID'] + [name for name, f in functions] ) ) + args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions])) for mol in supplier: if not mol: continue - descs = descriptors( mol, functions ) - molecule_id = mol.GetProp("_Name") - args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(round(res, 6)) for name, res in descs] ) ) - + descs = descriptors(mol, functions) + try: + molecule_id = mol.GetProp("_Name") + except KeyError: + molecule_id = Chem.MolToSmiles(mol) + args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs]))