comparison multi_obgrep.py @ 13:417845394cdf draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
author bgruening
date Mon, 19 Oct 2020 14:46:54 +0000
parents a72ae2711a97
children 50eaae9df8d3
comparison
equal deleted inserted replaced
12:a72ae2711a97 13:417845394cdf
2 """ 2 """
3 Input: Molecules in SDF, SMILES ... 3 Input: Molecules in SDF, SMILES ...
4 Output: Molecule file filtered with obgrep. 4 Output: Molecule file filtered with obgrep.
5 Copyright 2013, Bjoern Gruening and Xavier Lucas 5 Copyright 2013, Bjoern Gruening and Xavier Lucas
6 """ 6 """
7 import sys, os
8 import argparse 7 import argparse
9 import multiprocessing 8 import multiprocessing
9 import os
10 import shlex
11 import shutil
12 import subprocess
10 import tempfile 13 import tempfile
11 import subprocess
12 import shutil
13 import shlex
14 14
15 from openbabel import openbabel, pybel 15
16 openbabel.obErrorLog.StopLogging()
17 def parse_command_line(): 16 def parse_command_line():
18 parser = argparse.ArgumentParser() 17 parser = argparse.ArgumentParser()
19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') 18 parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
20 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') 19 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.')
21 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') 20 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
22 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") 21 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi")
23 parser.add_argument("--n-times", dest="n_times", type=int, 22 parser.add_argument("--n-times", dest="n_times", type=int,
24 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") 23 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
25 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) 24 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count())
26 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", 25 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true",
27 default=False, help="Invert the matching, print non-matching molecules.") 26 default=False, help="Invert the matching, print non-matching molecules.")
28 parser.add_argument("--only-name", dest="only_name", action="store_true", 27 parser.add_argument("--only-name", dest="only_name", action="store_true",
29 default=False, help="Only print the name of the molecules.") 28 default=False, help="Only print the name of the molecules.")
30 parser.add_argument("--full-match", dest="full_match", action="store_true", 29 parser.add_argument("--full-match", dest="full_match", action="store_true",
31 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") 30 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
32 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", 31 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true",
33 default=False, help="Print the number of matches.") 32 default=False, help="Print the number of matches.")
34 return parser.parse_args() 33 return parser.parse_args()
35 34
35
36 results = list() 36 results = list()
37
38
37 def mp_callback(res): 39 def mp_callback(res):
38 results.append(res) 40 results.append(res)
39 41
40 def mp_helper( query, args ): 42
43 def mp_helper(query, args):
41 """ 44 """
42 Helper function for multiprocessing. 45 Helper function for multiprocessing.
43 That function is a wrapper around obgrep. 46 That function is a wrapper around obgrep.
44 """ 47 """
45 48
55 if args.n_times: 58 if args.n_times:
56 cmd_list.append('-t %s' % str(args.n_times)) 59 cmd_list.append('-t %s' % str(args.n_times))
57 60
58 tmp = tempfile.NamedTemporaryFile(delete=False) 61 tmp = tempfile.NamedTemporaryFile(delete=False)
59 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) 62 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile)
60 child = subprocess.Popen(shlex.split(cmd), 63 child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
61 stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
62 64
63 stdout, stderr = child.communicate() 65 stdout, stderr = child.communicate()
64 return (tmp.name, query) 66 return (tmp.name, query)
65 67
66 68
67 def obgrep( args ): 69 def obgrep(args):
68
69 temp_file = tempfile.NamedTemporaryFile() 70 temp_file = tempfile.NamedTemporaryFile()
70 temp_link = "%s.%s" % (temp_file.name, args.iformat) 71 temp_link = "%s.%s" % (temp_file.name, args.iformat)
71 temp_file.close() 72 temp_file.close()
72 os.symlink(args.infile, temp_link) 73 os.symlink(args.infile, temp_link)
73 args.infile = temp_link 74 args.infile = temp_link
74 75
75 pool = multiprocessing.Pool( args.processors ) 76 pool = multiprocessing.Pool(args.processors)
76 for query in open( args.query ): 77 for query in open(args.query):
77 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) 78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback)
78 #mp_callback( mp_helper(query.strip(), args) ) 79 # mp_callback(mp_helper(query.strip(), args))
79 pool.close() 80 pool.close()
80 pool.join() 81 pool.join()
81 82
82 out_handle = open( args.outfile, 'wb' ) 83 out_handle = open(args.outfile, 'wb')
83 for result_file, query in results: 84 for result_file, query in results:
84 res_handle = open(result_file,'rb') 85 res_handle = open(result_file, 'rb')
85 shutil.copyfileobj( res_handle, out_handle ) 86 shutil.copyfileobj(res_handle, out_handle)
86 res_handle.close() 87 res_handle.close()
87 os.remove( result_file ) 88 os.remove(result_file)
88 out_handle.close() 89 out_handle.close()
89 90
90 os.remove( temp_link ) 91 os.remove(temp_link)
92
91 93
92 def __main__(): 94 def __main__():
93 """ 95 """
94 Multiprocessing obgrep search. 96 Multiprocessing obgrep search.
95 """ 97 """
96 args = parse_command_line() 98 args = parse_command_line()
97 obgrep( args ) 99 obgrep(args)
98 100
99 if __name__ == "__main__" : 101
102 if __name__ == "__main__":
100 __main__() 103 __main__()