comparison multi_obgrep.py @ 5:8302ab092300 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author bgruening
date Thu, 15 Aug 2024 11:01:11 +0000
parents 49242402887b
children
comparison
equal deleted inserted replaced
4:2c5c7da26e08 5:8302ab092300
13 import tempfile 13 import tempfile
14 14
15 15
16 def parse_command_line(): 16 def parse_command_line():
17 parser = argparse.ArgumentParser() 17 parser = argparse.ArgumentParser()
18 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') 18 parser.add_argument("-i", "--infile", required=True, help="Molecule file.")
19 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') 19 parser.add_argument(
20 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') 20 "-q",
21 "--query",
22 required=True,
23 help="Query file, containing different SMARTS in each line.",
24 )
25 parser.add_argument(
26 "-o", "--outfile", required=True, help="Path to the output file."
27 )
21 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") 28 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi")
22 parser.add_argument("--n-times", dest="n_times", type=int, 29 parser.add_argument(
23 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") 30 "--n-times",
24 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) 31 dest="n_times",
25 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", 32 type=int,
26 default=False, help="Invert the matching, print non-matching molecules.") 33 default=0,
27 parser.add_argument("--only-name", dest="only_name", action="store_true", 34 help="Print a molecule only if the pattern occurs # times inside the molecule.",
28 default=False, help="Only print the name of the molecules.") 35 )
29 parser.add_argument("--full-match", dest="full_match", action="store_true", 36 parser.add_argument(
30 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") 37 "-p", "--processors", type=int, default=multiprocessing.cpu_count()
31 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", 38 )
32 default=False, help="Print the number of matches.") 39 parser.add_argument(
40 "--invert-matches",
41 dest="invert_matches",
42 action="store_true",
43 default=False,
44 help="Invert the matching, print non-matching molecules.",
45 )
46 parser.add_argument(
47 "--only-name",
48 dest="only_name",
49 action="store_true",
50 default=False,
51 help="Only print the name of the molecules.",
52 )
53 parser.add_argument(
54 "--full-match",
55 dest="full_match",
56 action="store_true",
57 default=False,
58 help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.",
59 )
60 parser.add_argument(
61 "--number-of-matches",
62 dest="number_of_matches",
63 action="store_true",
64 default=False,
65 help="Print the number of matches.",
66 )
33 return parser.parse_args() 67 return parser.parse_args()
34 68
35 69
36 results = list() 70 results = list()
37 71
40 results.append(res) 74 results.append(res)
41 75
42 76
43 def mp_helper(query, args): 77 def mp_helper(query, args):
44 """ 78 """
45 Helper function for multiprocessing. 79 Helper function for multiprocessing.
46 That function is a wrapper around obgrep. 80 That function is a wrapper around obgrep.
47 """ 81 """
48 82
49 cmd_list = [] 83 cmd_list = []
50 if args.invert_matches: 84 if args.invert_matches:
51 cmd_list.append('-v') 85 cmd_list.append("-v")
52 if args.only_name: 86 if args.only_name:
53 cmd_list.append('-n') 87 cmd_list.append("-n")
54 if args.full_match: 88 if args.full_match:
55 cmd_list.append('-f') 89 cmd_list.append("-f")
56 if args.number_of_matches: 90 if args.number_of_matches:
57 cmd_list.append('-c') 91 cmd_list.append("-c")
58 if args.n_times: 92 if args.n_times:
59 cmd_list.append('-t %s' % str(args.n_times)) 93 cmd_list.append("-t %s" % str(args.n_times))
60 94
61 tmp = tempfile.NamedTemporaryFile(delete=False) 95 tmp = tempfile.NamedTemporaryFile(delete=False)
62 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) 96 cmd = 'obgrep %s "%s" %s' % (" ".join(cmd_list), query, args.infile)
63 child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) 97 child = subprocess.Popen(
98 shlex.split(cmd), stdout=open(tmp.name, "w+"), stderr=subprocess.PIPE
99 )
64 100
65 stdout, stderr = child.communicate() 101 stdout, stderr = child.communicate()
66 return (tmp.name, query) 102 return (tmp.name, query)
67 103
68 104
78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback) 114 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback)
79 # mp_callback(mp_helper(query.strip(), args)) 115 # mp_callback(mp_helper(query.strip(), args))
80 pool.close() 116 pool.close()
81 pool.join() 117 pool.join()
82 118
83 out_handle = open(args.outfile, 'wb') 119 out_handle = open(args.outfile, "wb")
84 for result_file, query in results: 120 for result_file, query in results:
85 res_handle = open(result_file, 'rb') 121 res_handle = open(result_file, "rb")
86 shutil.copyfileobj(res_handle, out_handle) 122 shutil.copyfileobj(res_handle, out_handle)
87 res_handle.close() 123 res_handle.close()
88 os.remove(result_file) 124 os.remove(result_file)
89 out_handle.close() 125 out_handle.close()
90 126
91 os.remove(temp_link) 127 os.remove(temp_link)
92 128
93 129
94 def __main__(): 130 def __main__():
95 """ 131 """
96 Multiprocessing obgrep search. 132 Multiprocessing obgrep search.
97 """ 133 """
98 args = parse_command_line() 134 args = parse_command_line()
99 obgrep(args) 135 obgrep(args)
100 136
101 137