comparison subsearch.py @ 5:8302ab092300 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author bgruening
date Thu, 15 Aug 2024 11:01:11 +0000
parents 49242402887b
children
comparison
equal deleted inserted replaced
4:2c5c7da26e08 5:8302ab092300
11 import subprocess 11 import subprocess
12 import sys 12 import sys
13 import tempfile 13 import tempfile
14 14
15 from openbabel import openbabel, pybel 15 from openbabel import openbabel, pybel
16
16 openbabel.obErrorLog.StopLogging() 17 openbabel.obErrorLog.StopLogging()
17 18
18 19
19 def parse_command_line(): 20 def parse_command_line():
20 parser = argparse.ArgumentParser() 21 parser = argparse.ArgumentParser()
21 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') 22 parser.add_argument("-i", "--infile", required=True, help="Molecule file.")
22 parser.add_argument('--iformat', help='Input format.') 23 parser.add_argument("--iformat", help="Input format.")
23 parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True, 24 parser.add_argument(
24 help='Path to the openbabel fastsearch index.') 25 "--fastsearch-index",
25 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') 26 dest="fastsearch_index",
26 parser.add_argument('--oformat', default='smi', help='Output file format') 27 required=True,
27 parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000, 28 help="Path to the openbabel fastsearch index.",
28 help="The maximum number of candidates.") 29 )
29 parser.add_argument('-p', '--processors', type=int, 30 parser.add_argument(
30 default=multiprocessing.cpu_count()) 31 "-o", "--outfile", required=True, help="Path to the output file."
32 )
33 parser.add_argument("--oformat", default="smi", help="Output file format")
34 parser.add_argument(
35 "--max-candidates",
36 dest="max_candidates",
37 type=int,
38 default=4000,
39 help="The maximum number of candidates.",
40 )
41 parser.add_argument(
42 "-p", "--processors", type=int, default=multiprocessing.cpu_count()
43 )
31 return parser.parse_args() 44 return parser.parse_args()
32 45
33 46
34 results = list() 47 results = list()
35 48
38 results.append(res) 51 results.append(res)
39 52
40 53
41 def mp_helper(query, args): 54 def mp_helper(query, args):
42 """ 55 """
43 Helper function for multiprocessing. 56 Helper function for multiprocessing.
44 That function is a wrapper around the following command: 57 That function is a wrapper around the following command:
45 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 58 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999
46 """ 59 """
47 60
48 if args.oformat == 'names': 61 if args.oformat == "names":
49 opts = '-osmi -xt' 62 opts = "-osmi -xt"
50 else: 63 else:
51 opts = '-o%s' % args.oformat 64 opts = "-o%s" % args.oformat
52 65
53 tmp = tempfile.NamedTemporaryFile(delete=False) 66 tmp = tempfile.NamedTemporaryFile(delete=False)
54 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) 67 cmd = "obabel -ifs %s -O %s %s -s%s -al %s" % (
68 args.fastsearch_index,
69 tmp.name,
70 opts,
71 query,
72 args.max_candidates,
73 )
55 74
56 child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) 75 child = subprocess.Popen(
76 cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
77 )
57 78
58 stdout, stderr = child.communicate() 79 stdout, stderr = child.communicate()
59 return_code = child.returncode 80 return_code = child.returncode
60 81
61 if return_code: 82 if return_code:
71 92
72 def get_smiles_or_smarts(args): 93 def get_smiles_or_smarts(args):
73 """ 94 """
74 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. 95 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats.
75 """ 96 """
76 if args.iformat in ['smi', 'text', 'tabular']: 97 if args.iformat in ["smi", "text", "tabular"]:
77 with open(args.infile) as text_file: 98 with open(args.infile) as text_file:
78 for line in text_file: 99 for line in text_file:
79 yield line.split('\t')[0].strip() 100 yield line.split("\t")[0].strip()
80 else: 101 else:
81 # inchi or sdf files 102 # inchi or sdf files
82 for mol in pybel.readfile(args.iformat, args.infile): 103 for mol in pybel.readfile(args.iformat, args.infile):
83 yield mol.write('smiles').split('\t')[0] 104 yield mol.write("smiles").split("\t")[0]
84 105
85 106
86 def substructure_search(args): 107 def substructure_search(args):
87 pool = multiprocessing.Pool(args.processors) 108 pool = multiprocessing.Pool(args.processors)
88 for query in get_smiles_or_smarts(args): 109 for query in get_smiles_or_smarts(args):
89 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) 110 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback)
90 # mp_callback(mp_helper(query, args)) 111 # mp_callback(mp_helper(query, args))
91 pool.close() 112 pool.close()
92 pool.join() 113 pool.join()
93 114
94 if args.oformat == 'names': 115 if args.oformat == "names":
95 out_handle = open(args.outfile, 'w') 116 out_handle = open(args.outfile, "w")
96 for result_file, query in results: 117 for result_file, query in results:
97 with open(result_file) as res_handle: 118 with open(result_file) as res_handle:
98 for line in res_handle: 119 for line in res_handle:
99 out_handle.write('%s\t%s\n' % (line.strip(), query)) 120 out_handle.write("%s\t%s\n" % (line.strip(), query))
100 os.remove(result_file) 121 os.remove(result_file)
101 out_handle.close() 122 out_handle.close()
102 else: 123 else:
103 out_handle = open(args.outfile, 'wb') 124 out_handle = open(args.outfile, "wb")
104 for result_file, query in results: 125 for result_file, query in results:
105 res_handle = open(result_file, 'rb') 126 res_handle = open(result_file, "rb")
106 shutil.copyfileobj(res_handle, out_handle) 127 shutil.copyfileobj(res_handle, out_handle)
107 res_handle.close() 128 res_handle.close()
108 os.remove(result_file) 129 os.remove(result_file)
109 out_handle.close() 130 out_handle.close()
110 131
111 132
112 def __main__(): 133 def __main__():
113 """ 134 """
114 Multiprocessing Open Babel Substructure Search. 135 Multiprocessing Open Babel Substructure Search.
115 """ 136 """
116 args = parse_command_line() 137 args = parse_command_line()
117 substructure_search(args) 138 substructure_search(args)
118 139
119 140