Mercurial > repos > bgruening > openbabel_remduplicates
comparison subsearch.py @ 15:c5de6c19eb06 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author | bgruening |
---|---|
date | Thu, 15 Aug 2024 11:00:46 +0000 |
parents | 12aca74f07d7 |
children |
comparison
equal
deleted
inserted
replaced
14:b2569e22b40c | 15:c5de6c19eb06 |
---|---|
11 import subprocess | 11 import subprocess |
12 import sys | 12 import sys |
13 import tempfile | 13 import tempfile |
14 | 14 |
15 from openbabel import openbabel, pybel | 15 from openbabel import openbabel, pybel |
16 | |
16 openbabel.obErrorLog.StopLogging() | 17 openbabel.obErrorLog.StopLogging() |
17 | 18 |
18 | 19 |
19 def parse_command_line(): | 20 def parse_command_line(): |
20 parser = argparse.ArgumentParser() | 21 parser = argparse.ArgumentParser() |
21 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | 22 parser.add_argument("-i", "--infile", required=True, help="Molecule file.") |
22 parser.add_argument('--iformat', help='Input format.') | 23 parser.add_argument("--iformat", help="Input format.") |
23 parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True, | 24 parser.add_argument( |
24 help='Path to the openbabel fastsearch index.') | 25 "--fastsearch-index", |
25 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | 26 dest="fastsearch_index", |
26 parser.add_argument('--oformat', default='smi', help='Output file format') | 27 required=True, |
27 parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000, | 28 help="Path to the openbabel fastsearch index.", |
28 help="The maximum number of candidates.") | 29 ) |
29 parser.add_argument('-p', '--processors', type=int, | 30 parser.add_argument( |
30 default=multiprocessing.cpu_count()) | 31 "-o", "--outfile", required=True, help="Path to the output file." |
32 ) | |
33 parser.add_argument("--oformat", default="smi", help="Output file format") | |
34 parser.add_argument( | |
35 "--max-candidates", | |
36 dest="max_candidates", | |
37 type=int, | |
38 default=4000, | |
39 help="The maximum number of candidates.", | |
40 ) | |
41 parser.add_argument( | |
42 "-p", "--processors", type=int, default=multiprocessing.cpu_count() | |
43 ) | |
31 return parser.parse_args() | 44 return parser.parse_args() |
32 | 45 |
33 | 46 |
34 results = list() | 47 results = list() |
35 | 48 |
38 results.append(res) | 51 results.append(res) |
39 | 52 |
40 | 53 |
41 def mp_helper(query, args): | 54 def mp_helper(query, args): |
42 """ | 55 """ |
43 Helper function for multiprocessing. | 56 Helper function for multiprocessing. |
44 That function is a wrapper around the following command: | 57 That function is a wrapper around the following command: |
45 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 | 58 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 |
46 """ | 59 """ |
47 | 60 |
48 if args.oformat == 'names': | 61 if args.oformat == "names": |
49 opts = '-osmi -xt' | 62 opts = "-osmi -xt" |
50 else: | 63 else: |
51 opts = '-o%s' % args.oformat | 64 opts = "-o%s" % args.oformat |
52 | 65 |
53 tmp = tempfile.NamedTemporaryFile(delete=False) | 66 tmp = tempfile.NamedTemporaryFile(delete=False) |
54 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) | 67 cmd = "obabel -ifs %s -O %s %s -s%s -al %s" % ( |
68 args.fastsearch_index, | |
69 tmp.name, | |
70 opts, | |
71 query, | |
72 args.max_candidates, | |
73 ) | |
55 | 74 |
56 child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 75 child = subprocess.Popen( |
76 cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
77 ) | |
57 | 78 |
58 stdout, stderr = child.communicate() | 79 stdout, stderr = child.communicate() |
59 return_code = child.returncode | 80 return_code = child.returncode |
60 | 81 |
61 if return_code: | 82 if return_code: |
71 | 92 |
72 def get_smiles_or_smarts(args): | 93 def get_smiles_or_smarts(args): |
73 """ | 94 """ |
74 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. | 95 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. |
75 """ | 96 """ |
76 if args.iformat in ['smi', 'text', 'tabular']: | 97 if args.iformat in ["smi", "text", "tabular"]: |
77 with open(args.infile) as text_file: | 98 with open(args.infile) as text_file: |
78 for line in text_file: | 99 for line in text_file: |
79 yield line.split('\t')[0].strip() | 100 yield line.split("\t")[0].strip() |
80 else: | 101 else: |
81 # inchi or sdf files | 102 # inchi or sdf files |
82 for mol in pybel.readfile(args.iformat, args.infile): | 103 for mol in pybel.readfile(args.iformat, args.infile): |
83 yield mol.write('smiles').split('\t')[0] | 104 yield mol.write("smiles").split("\t")[0] |
84 | 105 |
85 | 106 |
86 def substructure_search(args): | 107 def substructure_search(args): |
87 pool = multiprocessing.Pool(args.processors) | 108 pool = multiprocessing.Pool(args.processors) |
88 for query in get_smiles_or_smarts(args): | 109 for query in get_smiles_or_smarts(args): |
89 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) | 110 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) |
90 # mp_callback(mp_helper(query, args)) | 111 # mp_callback(mp_helper(query, args)) |
91 pool.close() | 112 pool.close() |
92 pool.join() | 113 pool.join() |
93 | 114 |
94 if args.oformat == 'names': | 115 if args.oformat == "names": |
95 out_handle = open(args.outfile, 'w') | 116 out_handle = open(args.outfile, "w") |
96 for result_file, query in results: | 117 for result_file, query in results: |
97 with open(result_file) as res_handle: | 118 with open(result_file) as res_handle: |
98 for line in res_handle: | 119 for line in res_handle: |
99 out_handle.write('%s\t%s\n' % (line.strip(), query)) | 120 out_handle.write("%s\t%s\n" % (line.strip(), query)) |
100 os.remove(result_file) | 121 os.remove(result_file) |
101 out_handle.close() | 122 out_handle.close() |
102 else: | 123 else: |
103 out_handle = open(args.outfile, 'wb') | 124 out_handle = open(args.outfile, "wb") |
104 for result_file, query in results: | 125 for result_file, query in results: |
105 res_handle = open(result_file, 'rb') | 126 res_handle = open(result_file, "rb") |
106 shutil.copyfileobj(res_handle, out_handle) | 127 shutil.copyfileobj(res_handle, out_handle) |
107 res_handle.close() | 128 res_handle.close() |
108 os.remove(result_file) | 129 os.remove(result_file) |
109 out_handle.close() | 130 out_handle.close() |
110 | 131 |
111 | 132 |
112 def __main__(): | 133 def __main__(): |
113 """ | 134 """ |
114 Multiprocessing Open Babel Substructure Search. | 135 Multiprocessing Open Babel Substructure Search. |
115 """ | 136 """ |
116 args = parse_command_line() | 137 args = parse_command_line() |
117 substructure_search(args) | 138 substructure_search(args) |
118 | 139 |
119 | 140 |