Mercurial > repos > bgruening > openbabel_remduplicates
comparison multi_obgrep.py @ 0:75d6c2b7907a draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
author | bgruening |
---|---|
date | Sat, 20 May 2017 08:39:17 -0400 |
parents | |
children | 50ca8845e7f5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:75d6c2b7907a |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Input: Molecules in SDF, SMILES ... | |
4 Output: Molecule file filtered with obgrep. | |
5 Copyright 2013, Bjoern Gruening and Xavier Lucas | |
6 """ | |
7 import sys, os | |
8 import argparse | |
9 import openbabel | |
10 openbabel.obErrorLog.StopLogging() | |
11 import pybel | |
12 import multiprocessing | |
13 import tempfile | |
14 import subprocess | |
15 import shutil | |
16 import shlex | |
17 | |
18 def parse_command_line(): | |
19 parser = argparse.ArgumentParser() | |
20 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | |
21 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') | |
22 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | |
23 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") | |
24 parser.add_argument("--n-times", dest="n_times", type=int, | |
25 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") | |
26 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) | |
27 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", | |
28 default=False, help="Invert the matching, print non-matching molecules.") | |
29 parser.add_argument("--only-name", dest="only_name", action="store_true", | |
30 default=False, help="Only print the name of the molecules.") | |
31 parser.add_argument("--full-match", dest="full_match", action="store_true", | |
32 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") | |
33 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", | |
34 default=False, help="Print the number of matches.") | |
35 return parser.parse_args() | |
36 | |
37 results = list() | |
38 def mp_callback(res): | |
39 results.append(res) | |
40 | |
41 def mp_helper( query, args ): | |
42 """ | |
43 Helper function for multiprocessing. | |
44 That function is a wrapper around obgrep. | |
45 """ | |
46 | |
47 cmd_list = [] | |
48 if args.invert_matches: | |
49 cmd_list.append('-v') | |
50 if args.only_name: | |
51 cmd_list.append('-n') | |
52 if args.full_match: | |
53 cmd_list.append('-f') | |
54 if args.number_of_matches: | |
55 cmd_list.append('-c') | |
56 if args.n_times: | |
57 cmd_list.append('-t %s' % str(args.n_times)) | |
58 | |
59 tmp = tempfile.NamedTemporaryFile(delete=False) | |
60 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) | |
61 child = subprocess.Popen(shlex.split(cmd), | |
62 stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) | |
63 | |
64 stdout, stderr = child.communicate() | |
65 return (tmp.name, query) | |
66 | |
67 | |
68 def obgrep( args ): | |
69 | |
70 temp_file = tempfile.NamedTemporaryFile() | |
71 temp_link = "%s.%s" % (temp_file.name, args.iformat) | |
72 temp_file.close() | |
73 os.symlink(args.infile, temp_link) | |
74 args.infile = temp_link | |
75 | |
76 pool = multiprocessing.Pool( args.processors ) | |
77 for query in open( args.query ): | |
78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) | |
79 #mp_callback( mp_helper(query.strip(), args) ) | |
80 pool.close() | |
81 pool.join() | |
82 | |
83 out_handle = open( args.outfile, 'wb' ) | |
84 for result_file, query in results: | |
85 res_handle = open(result_file,'rb') | |
86 shutil.copyfileobj( res_handle, out_handle ) | |
87 res_handle.close() | |
88 os.remove( result_file ) | |
89 out_handle.close() | |
90 | |
91 os.remove( temp_link ) | |
92 | |
93 def __main__(): | |
94 """ | |
95 Multiprocessing obgrep search. | |
96 """ | |
97 args = parse_command_line() | |
98 obgrep( args ) | |
99 | |
100 if __name__ == "__main__" : | |
101 __main__() |