Mercurial > repos > bgruening > ctb_rdkit_descriptors
comparison rdkit_descriptors.py @ 9:0993ac4f4a23 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:40:00 +0000 |
parents | a1c53f0533b0 |
children |
comparison
equal
deleted
inserted
replaced
8:a1c53f0533b0 | 9:0993ac4f4a23 |
---|---|
6 | 6 |
7 from rdkit import Chem | 7 from rdkit import Chem |
8 from rdkit.Chem import Descriptors | 8 from rdkit.Chem import Descriptors |
9 | 9 |
10 | 10 |
11 def get_supplier(infile, format='smiles'): | 11 def get_supplier(infile, format="smiles"): |
12 """ | 12 """ |
13 Returns a generator over a SMILES or InChI file. Every element is of RDKit | 13 Returns a generator over a SMILES or InChI file. Every element is of RDKit |
14 molecule and has its original string as _Name property. | 14 molecule and has its original string as _Name property. |
15 """ | 15 """ |
16 with open(infile) as handle: | 16 with open(infile) as handle: |
17 for line in handle: | 17 for line in handle: |
18 line = line.strip() | 18 line = line.strip() |
19 if format == 'smiles': | 19 if format == "smiles": |
20 mol = Chem.MolFromSmiles(line, sanitize=True) | 20 mol = Chem.MolFromSmiles(line, sanitize=True) |
21 elif format == 'inchi': | 21 elif format == "inchi": |
22 mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False) | 22 mol = Chem.inchi.MolFromInchi( |
23 line, | |
24 sanitize=True, | |
25 removeHs=True, | |
26 logLevel=None, | |
27 treatWarningAsError=False, | |
28 ) | |
23 if mol is None: | 29 if mol is None: |
24 yield False | 30 yield False |
25 else: | 31 else: |
26 mol.SetProp('_Name', line.split('\t')[0]) | 32 mol.SetProp("_Name", line.split("\t")[0]) |
27 yield mol | 33 yield mol |
28 | 34 |
29 | 35 |
30 def get_rdkit_descriptor_functions(): | 36 def get_rdkit_descriptor_functions(): |
31 """ | 37 """ |
32 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) | 38 Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) |
33 """ | 39 """ |
34 ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')] | 40 ret = [ |
41 (name, f) | |
42 for name, f in inspect.getmembers(Descriptors) | |
43 if inspect.isfunction(f) and not name.startswith("_") | |
44 ] | |
35 # some which are not in the official Descriptors module we need to add manually | 45 # some which are not in the official Descriptors module we need to add manually |
36 ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)]) | 46 ret.extend([("FormalCharge", Chem.GetFormalCharge), ("SSSR", Chem.GetSSSR)]) |
37 ret.sort() | 47 ret.sort() |
38 return ret | 48 return ret |
39 | 49 |
40 | 50 |
41 def descriptors(mol, functions): | 51 def descriptors(mol, functions): |
46 yield (name, function(mol)) | 56 yield (name, function(mol)) |
47 | 57 |
48 | 58 |
49 if __name__ == "__main__": | 59 if __name__ == "__main__": |
50 parser = argparse.ArgumentParser() | 60 parser = argparse.ArgumentParser() |
51 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') | 61 parser.add_argument("-i", "--infile", required=True, help="Path to the input file.") |
52 parser.add_argument("--iformat", help="Specify the input file format.") | 62 parser.add_argument("--iformat", help="Specify the input file format.") |
53 | 63 |
54 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), | 64 parser.add_argument( |
55 default=sys.stdout, | 65 "-o", |
56 help="path to the result file, default is stdout") | 66 "--outfile", |
67 type=argparse.FileType("w+"), | |
68 default=sys.stdout, | |
69 help="path to the result file, default is stdout", | |
70 ) | |
57 | 71 |
58 parser.add_argument('-s', '--select', default=None, | 72 parser.add_argument( |
59 help="select a subset of comma-separated descriptors to use") | 73 "-s", |
74 "--select", | |
75 default=None, | |
76 help="select a subset of comma-separated descriptors to use", | |
77 ) | |
60 | 78 |
61 parser.add_argument("--header", dest="header", action="store_true", | 79 parser.add_argument( |
62 default=False, | 80 "--header", |
63 help="Write header line.") | 81 dest="header", |
82 action="store_true", | |
83 default=False, | |
84 help="Write header line.", | |
85 ) | |
64 | 86 |
65 args = parser.parse_args() | 87 args = parser.parse_args() |
66 | 88 |
67 if args.iformat == 'sdf': | 89 if args.iformat == "sdf": |
68 supplier = Chem.SDMolSupplier(args.infile) | 90 supplier = Chem.SDMolSupplier(args.infile) |
69 elif args.iformat == 'smi': | 91 elif args.iformat == "smi": |
70 supplier = get_supplier(args.infile, format='smiles') | 92 supplier = get_supplier(args.infile, format="smiles") |
71 elif args.iformat == 'inchi': | 93 elif args.iformat == "inchi": |
72 supplier = get_supplier(args.infile, format='inchi') | 94 supplier = get_supplier(args.infile, format="inchi") |
73 elif args.iformat == 'pdb': | 95 elif args.iformat == "pdb": |
74 supplier = [Chem.MolFromPDBFile(args.infile)] | 96 supplier = [Chem.MolFromPDBFile(args.infile)] |
75 elif args.iformat == 'mol2': | 97 elif args.iformat == "mol2": |
76 supplier = [Chem.MolFromMol2File(args.infile)] | 98 supplier = [Chem.MolFromMol2File(args.infile)] |
77 | 99 |
78 functions = get_rdkit_descriptor_functions() | 100 functions = get_rdkit_descriptor_functions() |
79 if args.select and args.select != 'None': | 101 if args.select and args.select != "None": |
80 selected = args.select.split(',') | 102 selected = args.select.split(",") |
81 functions = [(name, f) for name, f in functions if name in selected] | 103 functions = [(name, f) for name, f in functions if name in selected] |
82 | 104 |
83 if args.header: | 105 if args.header: |
84 args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions])) | 106 args.outfile.write( |
107 "%s\n" % "\t".join(["MoleculeID"] + [name for name, f in functions]) | |
108 ) | |
85 | 109 |
86 for mol in supplier: | 110 for mol in supplier: |
87 if not mol: | 111 if not mol: |
88 continue | 112 continue |
89 descs = descriptors(mol, functions) | 113 descs = descriptors(mol, functions) |
90 try: | 114 try: |
91 molecule_id = mol.GetProp("_Name") | 115 molecule_id = mol.GetProp("_Name") |
92 except KeyError: | 116 except KeyError: |
93 molecule_id = Chem.MolToSmiles(mol) | 117 molecule_id = Chem.MolToSmiles(mol) |
94 args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs])) | 118 args.outfile.write( |
119 "%s\n" | |
120 % "\t".join([molecule_id] + [str(round(res, 6)) for name, res in descs]) | |
121 ) |