Mercurial > repos > bgruening > ctb_rdkit_descriptors
comparison rdkit_descriptors.py @ 8:a1c53f0533b0 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
author | bgruening |
---|---|
date | Wed, 17 Feb 2021 12:59:43 +0000 |
parents | 6674260c1459 |
children | 0993ac4f4a23 |
comparison
equal
deleted
inserted
replaced
7:cf725c82c865 | 8:a1c53f0533b0 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 from rdkit.Chem import Descriptors | |
4 from rdkit import Chem | |
5 import sys, os, re | |
6 import argparse | 3 import argparse |
7 import inspect | 4 import inspect |
5 import sys | |
8 | 6 |
9 def get_supplier( infile, format = 'smiles' ): | 7 from rdkit import Chem |
8 from rdkit.Chem import Descriptors | |
9 | |
10 | |
11 def get_supplier(infile, format='smiles'): | |
10 """ | 12 """ |
11 Returns a generator over a SMILES or InChI file. Every element is of RDKit | 13 Returns a generator over a SMILES or InChI file. Every element is of RDKit |
12 molecule and has its original string as _Name property. | 14 molecule and has its original string as _Name property. |
13 """ | 15 """ |
14 with open(infile) as handle: | 16 with open(infile) as handle: |
15 for line in handle: | 17 for line in handle: |
16 line = line.strip() | 18 line = line.strip() |
17 if format == 'smiles': | 19 if format == 'smiles': |
18 mol = Chem.MolFromSmiles( line, sanitize=True ) | 20 mol = Chem.MolFromSmiles(line, sanitize=True) |
19 elif format == 'inchi': | 21 elif format == 'inchi': |
20 mol = Chem.inchi.MolFromInchi( line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False ) | 22 mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False) |
21 if mol is None: | 23 if mol is None: |
22 yield False | 24 yield False |
23 else: | 25 else: |
24 mol.SetProp( '_Name', line.split('\t')[0] ) | 26 mol.SetProp('_Name', line.split('\t')[0]) |
25 yield mol | 27 yield mol |
28 | |
26 | 29 |
27 def get_rdkit_descriptor_functions(): | 30 def get_rdkit_descriptor_functions(): |
28 """ | 31 """ |
29 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) | 32 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) |
30 """ | 33 """ |
31 ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ] | 34 ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')] |
35 # some which are not in the official Descriptors module we need to add manually | |
36 ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)]) | |
32 ret.sort() | 37 ret.sort() |
33 return ret | 38 return ret |
34 | 39 |
35 | 40 |
36 def descriptors( mol, functions ): | 41 def descriptors(mol, functions): |
37 """ | 42 """ |
38 Calculates the descriptors of a given molecule. | 43 Calculates the descriptors of a given molecule. |
39 """ | 44 """ |
40 for name, function in functions: | 45 for name, function in functions: |
41 yield (name, function( mol )) | 46 yield (name, function(mol)) |
42 | 47 |
43 | 48 |
44 if __name__ == "__main__": | 49 if __name__ == "__main__": |
45 parser = argparse.ArgumentParser() | 50 parser = argparse.ArgumentParser() |
46 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') | 51 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') |
47 parser.add_argument("--iformat", help="Specify the input file format.") | 52 parser.add_argument("--iformat", help="Specify the input file format.") |
48 | 53 |
49 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), | 54 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), |
50 default=sys.stdout, help="path to the result file, default it sdtout") | 55 default=sys.stdout, |
56 help="path to the result file, default is stdout") | |
57 | |
58 parser.add_argument('-s', '--select', default=None, | |
59 help="select a subset of comma-separated descriptors to use") | |
51 | 60 |
52 parser.add_argument("--header", dest="header", action="store_true", | 61 parser.add_argument("--header", dest="header", action="store_true", |
53 default=False, | 62 default=False, |
54 help="Write header line.") | 63 help="Write header line.") |
55 | 64 |
56 args = parser.parse_args() | 65 args = parser.parse_args() |
57 | 66 |
58 if args.iformat == 'sdf': | 67 if args.iformat == 'sdf': |
59 supplier = Chem.SDMolSupplier( args.infile ) | 68 supplier = Chem.SDMolSupplier(args.infile) |
60 elif args.iformat =='smi': | 69 elif args.iformat == 'smi': |
61 supplier = get_supplier( args.infile, format = 'smiles' ) | 70 supplier = get_supplier(args.infile, format='smiles') |
62 elif args.iformat == 'inchi': | 71 elif args.iformat == 'inchi': |
63 supplier = get_supplier( args.infile, format = 'inchi' ) | 72 supplier = get_supplier(args.infile, format='inchi') |
73 elif args.iformat == 'pdb': | |
74 supplier = [Chem.MolFromPDBFile(args.infile)] | |
75 elif args.iformat == 'mol2': | |
76 supplier = [Chem.MolFromMol2File(args.infile)] | |
64 | 77 |
65 functions = get_rdkit_descriptor_functions() | 78 functions = get_rdkit_descriptor_functions() |
79 if args.select and args.select != 'None': | |
80 selected = args.select.split(',') | |
81 functions = [(name, f) for name, f in functions if name in selected] | |
66 | 82 |
67 if args.header: | 83 if args.header: |
68 args.outfile.write( '%s\n' % '\t'.join( ['MoleculeID'] + [name for name, f in functions] ) ) | 84 args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions])) |
69 | 85 |
70 for mol in supplier: | 86 for mol in supplier: |
71 if not mol: | 87 if not mol: |
72 continue | 88 continue |
73 descs = descriptors( mol, functions ) | 89 descs = descriptors(mol, functions) |
74 molecule_id = mol.GetProp("_Name") | 90 try: |
75 args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(round(res, 6)) for name, res in descs] ) ) | 91 molecule_id = mol.GetProp("_Name") |
76 | 92 except KeyError: |
93 molecule_id = Chem.MolToSmiles(mol) | |
94 args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs])) |