0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Input: Molecules in SDF, SMILES ...
|
|
4 Output: Moleculs filtered with specified substructures.
|
|
5 Copyright 2013, Bjoern Gruening and Xavier Lucas
|
|
6 """
|
|
7 import sys, os
|
|
8 import argparse
|
|
9 import openbabel
|
|
10 openbabel.obErrorLog.StopLogging()
|
|
11 import pybel
|
|
12 import multiprocessing
|
|
13 import tempfile
|
|
14 import subprocess
|
|
15 import shutil
|
|
16
|
|
17 def parse_command_line():
|
|
18 parser = argparse.ArgumentParser()
|
|
19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
|
|
20 parser.add_argument('--iformat', help='Input format.')
|
|
21 parser.add_argument('--fastsearch-index', dest="fastsearch_index",
|
|
22 required=True, help='Path to the openbabel fastsearch index.')
|
|
23 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
|
|
24 parser.add_argument('--oformat',
|
|
25 default='smi', help='Output file format')
|
|
26 parser.add_argument("--max-candidates", dest="max_candidates", type=int,
|
|
27 default=4000, help="The maximum number of candidates.")
|
|
28 parser.add_argument('-p', '--processors', type=int,
|
|
29 default=multiprocessing.cpu_count())
|
|
30 return parser.parse_args()
|
|
31
|
|
32 results = list()
|
|
33 def mp_callback(res):
|
|
34 results.append(res)
|
|
35
|
|
36 def mp_helper( query, args ):
|
|
37 """
|
|
38 Helper function for multiprocessing.
|
|
39 That function is a wrapper around the following command:
|
|
40 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999
|
|
41 """
|
|
42
|
|
43 if args.oformat == 'names':
|
|
44 opts = '-osmi -xt'
|
|
45 else:
|
|
46 opts = '-o%s' % args.oformat
|
|
47
|
|
48 tmp = tempfile.NamedTemporaryFile(delete=False)
|
|
49 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates)
|
|
50
|
|
51 child = subprocess.Popen(cmd.split(),
|
|
52 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
53
|
|
54 stdout, stderr = child.communicate()
|
|
55 return_code = child.returncode
|
|
56
|
|
57 if return_code:
|
|
58 sys.stdout.write(stdout)
|
|
59 sys.stderr.write(stderr)
|
|
60 sys.stderr.write("Return error code %i from command:\n" % return_code)
|
|
61 sys.stderr.write("%s\n" % cmd)
|
|
62 else:
|
|
63 sys.stdout.write(stdout)
|
|
64 sys.stdout.write(stderr)
|
|
65 return (tmp.name, query)
|
|
66
|
|
67
|
|
68 def get_smiles_or_smarts( args ):
|
|
69 """
|
|
70 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats.
|
|
71 """
|
|
72 if args.iformat in ['smi', 'text', 'tabular']:
|
|
73 with open( args.infile ) as text_file:
|
|
74 for line in text_file:
|
|
75 yield line.split('\t')[0].strip()
|
|
76 else:
|
|
77 # inchi or sdf files
|
|
78 for mol in pybel.readfile( args.iformat, args.infile ):
|
|
79 yield mol.wrtie('smiles').split('\t')[0]
|
|
80
|
|
81 def substructure_search( args ):
|
|
82
|
|
83 pool = multiprocessing.Pool( args.processors )
|
|
84 for query in get_smiles_or_smarts( args ):
|
|
85 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback)
|
|
86 #mp_callback( mp_helper(query, args) )
|
|
87 pool.close()
|
|
88 pool.join()
|
|
89
|
|
90 if args.oformat == 'names':
|
|
91 out_handle = open( args.outfile, 'w' )
|
|
92 for result_file, query in results:
|
|
93 with open(result_file) as res_handle:
|
|
94 for line in res_handle:
|
|
95 out_handle.write('%s\t%s\n' % ( line.strip(), query ))
|
|
96 os.remove( result_file )
|
|
97 out_handle.close()
|
|
98 else:
|
|
99 out_handle = open( args.outfile, 'wb' )
|
|
100 for result_file, query in results:
|
|
101 res_handle = open(result_file,'rb')
|
|
102 shutil.copyfileobj( res_handle, out_handle )
|
|
103 res_handle.close()
|
|
104 os.remove( result_file )
|
|
105 out_handle.close()
|
|
106
|
|
107
|
|
108 def __main__():
|
|
109 """
|
|
110 Multiprocessing Open Babel Substructure Search.
|
|
111 """
|
|
112 args = parse_command_line()
|
|
113 substructure_search( args )
|
|
114
|
|
115 if __name__ == "__main__" :
|
|
116 __main__()
|