0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Input: set of molecules with pre-calculated physico-chemical properties
|
|
4 Output: set of molecules that pass all the filters
|
|
5 Copyright 2012, Bjoern Gruening and Xavier Lucas
|
|
6
|
|
7 TODO: AND/OR conditions?
|
|
8 """
|
|
9 import sys, os
|
|
10 import argparse
|
|
11 import cheminfolib
|
|
12 import json
|
|
13 import pybel
|
|
14 import shlex, subprocess
|
|
15
|
|
16 cheminfolib.pybel_stop_logging()
|
|
17
|
|
18 def parse_command_line():
|
|
19 parser = argparse.ArgumentParser()
|
|
20 parser.add_argument('-i', '--input', help='Input file name')
|
|
21 parser.add_argument('-iformat', help='Input file format')
|
|
22 parser.add_argument('-oformat',
|
|
23 default='smi',
|
|
24 help='Output file format')
|
|
25 parser.add_argument('-o', '--output',
|
|
26 help='Output file name',
|
|
27 required=True)
|
|
28 parser.add_argument('--filters',
|
|
29 help="Specify the filters to apply",
|
|
30 required=True,
|
|
31 )
|
|
32 return parser.parse_args()
|
|
33
|
|
34 def filter_precalculated_compounds(args, filters):
|
|
35 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
|
|
36 for mol in pybel.readfile('sdf', args.input):
|
|
37 for key, elem in filters.items():
|
|
38 # map the short description to the larger metadata names stored in the sdf file
|
|
39 property = cheminfolib.ColumnNames[key]
|
|
40 min = elem[0]
|
|
41 max = elem[1]
|
|
42 if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max):
|
|
43 pass
|
|
44 else:
|
|
45 # leave the filter loop, because one filter constrained are not satisfied
|
|
46 break
|
|
47 else:
|
|
48 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound
|
|
49 outfile.write(mol)
|
|
50 outfile.close()
|
|
51
|
|
52 def filter_new_compounds(args, filters):
|
|
53
|
|
54 if args.iformat == args.oformat:
|
|
55 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out
|
|
56 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text
|
|
57 cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output)
|
|
58 else:
|
|
59 cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output)
|
|
60 filter_cmd = ''
|
|
61 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1]
|
|
62 for key, elem in filters.items():
|
|
63 ob_descriptor_name = cheminfolib.OBDescriptor[key][0]
|
|
64 min = elem[0]
|
|
65 max = elem[1]
|
|
66 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max)
|
|
67
|
|
68 args = shlex.split('%s "%s"' % (cmd, filter_cmd))
|
|
69 #print '%s "%s"' % (cmd, filter_cmd)
|
|
70 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout
|
|
71 child = subprocess.Popen(args,
|
|
72 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
73
|
|
74 stdout, stderr = child.communicate()
|
|
75 return_code = child.returncode
|
|
76
|
|
77 if return_code:
|
|
78 sys.stdout.write(stdout)
|
|
79 sys.stderr.write(stderr)
|
|
80 sys.stderr.write("Return error code %i from command:\n" % return_code)
|
|
81 sys.stderr.write("%s\n" % cmd)
|
|
82 else:
|
|
83 sys.stdout.write(stdout)
|
|
84 sys.stdout.write(stderr)
|
|
85
|
|
86
|
|
87 def __main__():
|
|
88 """
|
|
89 Select compounds with certain properties from a small library
|
|
90 """
|
|
91 args = parse_command_line()
|
|
92 # Its a small trick to get the parameters in an easy way from the xml file.
|
|
93 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
|
|
94 # Also the last loop creates a ',{' that is not an valid jason expression.
|
|
95 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}'))
|
|
96 if args.iformat == 'sdf':
|
|
97 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering
|
|
98 mol = pybel.readfile('sdf', args.input).next()
|
|
99 for key, elem in filters.items():
|
|
100 property = cheminfolib.ColumnNames[key]
|
|
101 if not property in mol.data:
|
|
102 break
|
|
103 else:
|
|
104 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule
|
|
105 # assume it is the same for all other molecules and start the precalculated filtering
|
|
106 filter_precalculated_compounds(args, filters)
|
|
107 return True
|
|
108 filter_new_compounds(args, filters)
|
|
109
|
|
110
|
|
111 if __name__ == "__main__" :
|
|
112 __main__()
|