annotate filter/multi_obgrep.py @ 2:125da3a296ca draft default tip

Uploaded
author bgruening
date Wed, 15 Jul 2015 12:13:08 -0400
parents 527ecd2fc500
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
2 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
3 Input: Molecules in SDF, SMILES ...
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
4 Output: Molecule file filtered with obgrep.
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
5 Copyright 2013, Bjoern Gruening and Xavier Lucas
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
6 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
7 import sys, os
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
8 import argparse
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
9 import openbabel
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
10 openbabel.obErrorLog.StopLogging()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
11 import pybel
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
12 import multiprocessing
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
13 import tempfile
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
14 import subprocess
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
15 import shutil
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
16 import shlex
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
17
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
18 def parse_command_line():
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
19 parser = argparse.ArgumentParser()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
20 parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
21 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
22 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
23 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi")
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
24 parser.add_argument("--n-times", dest="n_times", type=int,
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
25 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
26 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count())
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
27 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true",
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
28 default=False, help="Invert the matching, print non-matching molecules.")
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
29 parser.add_argument("--only-name", dest="only_name", action="store_true",
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
30 default=False, help="Only print the name of the molecules.")
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
31 parser.add_argument("--full-match", dest="full_match", action="store_true",
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
32 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
33 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true",
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
34 default=False, help="Print the number of matches.")
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
35 return parser.parse_args()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
36
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
37 results = list()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
38 def mp_callback(res):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
39 results.append(res)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
40
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
41 def mp_helper( query, args ):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
42 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
43 Helper function for multiprocessing.
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
44 That function is a wrapper around obgrep.
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
45 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
46
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
47 cmd_list = []
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
48 if args.invert_matches:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
49 cmd_list.append('-v')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
50 if args.only_name:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
51 cmd_list.append('-n')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
52 if args.full_match:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
53 cmd_list.append('-f')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
54 if args.number_of_matches:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
55 cmd_list.append('-c')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
56 if args.n_times:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
57 cmd_list.append('-t %s' % str(args.n_times))
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
58
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
59 tmp = tempfile.NamedTemporaryFile(delete=False)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
60 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
61 child = subprocess.Popen(shlex.split(cmd),
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
62 stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
63
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
64 stdout, stderr = child.communicate()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
65 return (tmp.name, query)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
66
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
67
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
68 def obgrep( args ):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
69
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
70 temp_file = tempfile.NamedTemporaryFile()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
71 temp_link = "%s.%s" % (temp_file.name, args.iformat)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
72 temp_file.close()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
73 os.symlink(args.infile, temp_link)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
74 args.infile = temp_link
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
75
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
76 pool = multiprocessing.Pool( args.processors )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
77 for query in open( args.query ):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
79 #mp_callback( mp_helper(query.strip(), args) )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
80 pool.close()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
81 pool.join()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
82
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
83 out_handle = open( args.outfile, 'wb' )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
84 for result_file, query in results:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
85 res_handle = open(result_file,'rb')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
86 shutil.copyfileobj( res_handle, out_handle )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
87 res_handle.close()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
88 os.remove( result_file )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
89 out_handle.close()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
90
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
91 os.remove( temp_link )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
92
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
93 def __main__():
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
94 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
95 Multiprocessing obgrep search.
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
96 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
97 args = parse_command_line()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
98 obgrep( args )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
99
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
100 if __name__ == "__main__" :
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
101 __main__()