comparison rdkit_descriptors.py @ 5:351fbd750a6d draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
author bgruening
date Wed, 17 Feb 2021 13:00:12 +0000
parents 06828e0cc8a7
children 4beb3e026bbb
comparison
equal deleted inserted replaced
4:55553120df69 5:351fbd750a6d
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 from rdkit.Chem import Descriptors
4 from rdkit import Chem
5 import sys, os, re
6 import argparse 3 import argparse
7 import inspect 4 import inspect
5 import sys
8 6
9 def get_supplier( infile, format = 'smiles' ): 7 from rdkit import Chem
8 from rdkit.Chem import Descriptors
9
10
11 def get_supplier(infile, format='smiles'):
10 """ 12 """
11 Returns a generator over a SMILES or InChI file. Every element is of RDKit 13 Returns a generator over a SMILES or InChI file. Every element is of RDKit
12 molecule and has its original string as _Name property. 14 molecule and has its original string as _Name property.
13 """ 15 """
14 with open(infile) as handle: 16 with open(infile) as handle:
15 for line in handle: 17 for line in handle:
16 line = line.strip() 18 line = line.strip()
17 if format == 'smiles': 19 if format == 'smiles':
18 mol = Chem.MolFromSmiles( line, sanitize=True ) 20 mol = Chem.MolFromSmiles(line, sanitize=True)
19 elif format == 'inchi': 21 elif format == 'inchi':
20 mol = Chem.inchi.MolFromInchi( line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False ) 22 mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False)
21 if mol is None: 23 if mol is None:
22 yield False 24 yield False
23 else: 25 else:
24 mol.SetProp( '_Name', line.split('\t')[0] ) 26 mol.SetProp('_Name', line.split('\t')[0])
25 yield mol 27 yield mol
28
26 29
27 def get_rdkit_descriptor_functions(): 30 def get_rdkit_descriptor_functions():
28 """ 31 """
29 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) 32 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function)
30 """ 33 """
31 ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ] 34 ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')]
35 # some which are not in the official Descriptors module we need to add manually
36 ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)])
32 ret.sort() 37 ret.sort()
33 return ret 38 return ret
34 39
35 40
36 def descriptors( mol, functions ): 41 def descriptors(mol, functions):
37 """ 42 """
38 Calculates the descriptors of a given molecule. 43 Calculates the descriptors of a given molecule.
39 """ 44 """
40 for name, function in functions: 45 for name, function in functions:
41 yield (name, function( mol )) 46 yield (name, function(mol))
42 47
43 48
44 if __name__ == "__main__": 49 if __name__ == "__main__":
45 parser = argparse.ArgumentParser() 50 parser = argparse.ArgumentParser()
46 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') 51 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.')
47 parser.add_argument("--iformat", help="Specify the input file format.") 52 parser.add_argument("--iformat", help="Specify the input file format.")
48 53
49 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), 54 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'),
50 default=sys.stdout, help="path to the result file, default it sdtout") 55 default=sys.stdout,
56 help="path to the result file, default is stdout")
57
58 parser.add_argument('-s', '--select', default=None,
59 help="select a subset of comma-separated descriptors to use")
51 60
52 parser.add_argument("--header", dest="header", action="store_true", 61 parser.add_argument("--header", dest="header", action="store_true",
53 default=False, 62 default=False,
54 help="Write header line.") 63 help="Write header line.")
55 64
56 args = parser.parse_args() 65 args = parser.parse_args()
57 66
58 if args.iformat == 'sdf': 67 if args.iformat == 'sdf':
59 supplier = Chem.SDMolSupplier( args.infile ) 68 supplier = Chem.SDMolSupplier(args.infile)
60 elif args.iformat =='smi': 69 elif args.iformat == 'smi':
61 supplier = get_supplier( args.infile, format = 'smiles' ) 70 supplier = get_supplier(args.infile, format='smiles')
62 elif args.iformat == 'inchi': 71 elif args.iformat == 'inchi':
63 supplier = get_supplier( args.infile, format = 'inchi' ) 72 supplier = get_supplier(args.infile, format='inchi')
73 elif args.iformat == 'pdb':
74 supplier = [Chem.MolFromPDBFile(args.infile)]
75 elif args.iformat == 'mol2':
76 supplier = [Chem.MolFromMol2File(args.infile)]
64 77
65 functions = get_rdkit_descriptor_functions() 78 functions = get_rdkit_descriptor_functions()
79 if args.select and args.select != 'None':
80 selected = args.select.split(',')
81 functions = [(name, f) for name, f in functions if name in selected]
66 82
67 if args.header: 83 if args.header:
68 args.outfile.write( '%s\n' % '\t'.join( ['MoleculeID'] + [name for name, f in functions] ) ) 84 args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions]))
69 85
70 for mol in supplier: 86 for mol in supplier:
71 if not mol: 87 if not mol:
72 continue 88 continue
73 descs = descriptors( mol, functions ) 89 descs = descriptors(mol, functions)
74 molecule_id = mol.GetProp("_Name") 90 try:
75 args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(round(res, 6)) for name, res in descs] ) ) 91 molecule_id = mol.GetProp("_Name")
76 92 except KeyError:
93 molecule_id = Chem.MolToSmiles(mol)
94 args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs]))