0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Input: Molecules in SDF, SMILES ...
|
|
4 Output: Molecule file filtered with obgrep.
|
|
5 Copyright 2013, Bjoern Gruening and Xavier Lucas
|
|
6 """
|
|
7 import sys, os
|
|
8 import argparse
|
|
9 import openbabel
|
|
10 openbabel.obErrorLog.StopLogging()
|
|
11 import pybel
|
|
12 import multiprocessing
|
|
13 import tempfile
|
|
14 import subprocess
|
|
15 import shutil
|
|
16 import shlex
|
|
17
|
|
18 def parse_command_line():
|
|
19 parser = argparse.ArgumentParser()
|
|
20 parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
|
|
21 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.')
|
|
22 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
|
|
23 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi")
|
|
24 parser.add_argument("--n-times", dest="n_times", type=int,
|
|
25 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
|
|
26 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count())
|
|
27 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true",
|
|
28 default=False, help="Invert the matching, print non-matching molecules.")
|
|
29 parser.add_argument("--only-name", dest="only_name", action="store_true",
|
|
30 default=False, help="Only print the name of the molecules.")
|
|
31 parser.add_argument("--full-match", dest="full_match", action="store_true",
|
|
32 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
|
|
33 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true",
|
|
34 default=False, help="Print the number of matches.")
|
|
35 return parser.parse_args()
|
|
36
|
|
37 results = list()
|
|
38 def mp_callback(res):
|
|
39 results.append(res)
|
|
40
|
|
41 def mp_helper( query, args ):
|
|
42 """
|
|
43 Helper function for multiprocessing.
|
|
44 That function is a wrapper around obgrep.
|
|
45 """
|
|
46
|
|
47 cmd_list = []
|
|
48 if args.invert_matches:
|
|
49 cmd_list.append('-v')
|
|
50 if args.only_name:
|
|
51 cmd_list.append('-n')
|
|
52 if args.full_match:
|
|
53 cmd_list.append('-f')
|
|
54 if args.number_of_matches:
|
|
55 cmd_list.append('-c')
|
|
56 if args.n_times:
|
|
57 cmd_list.append('-t %s' % str(args.n_times))
|
|
58
|
|
59 tmp = tempfile.NamedTemporaryFile(delete=False)
|
|
60 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile)
|
|
61 child = subprocess.Popen(shlex.split(cmd),
|
|
62 stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
|
|
63
|
|
64 stdout, stderr = child.communicate()
|
|
65 return (tmp.name, query)
|
|
66
|
|
67
|
|
68 def obgrep( args ):
|
|
69
|
|
70 temp_file = tempfile.NamedTemporaryFile()
|
|
71 temp_link = "%s.%s" % (temp_file.name, args.iformat)
|
|
72 temp_file.close()
|
|
73 os.symlink(args.infile, temp_link)
|
|
74 args.infile = temp_link
|
|
75
|
|
76 pool = multiprocessing.Pool( args.processors )
|
|
77 for query in open( args.query ):
|
|
78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback)
|
|
79 #mp_callback( mp_helper(query.strip(), args) )
|
|
80 pool.close()
|
|
81 pool.join()
|
|
82
|
|
83 out_handle = open( args.outfile, 'wb' )
|
|
84 for result_file, query in results:
|
|
85 res_handle = open(result_file,'rb')
|
|
86 shutil.copyfileobj( res_handle, out_handle )
|
|
87 res_handle.close()
|
|
88 os.remove( result_file )
|
|
89 out_handle.close()
|
|
90
|
|
91 os.remove( temp_link )
|
|
92
|
|
93 def __main__():
|
|
94 """
|
|
95 Multiprocessing obgrep search.
|
|
96 """
|
|
97 args = parse_command_line()
|
|
98 obgrep( args )
|
|
99
|
|
100 if __name__ == "__main__" :
|
|
101 __main__()
|