comparison subsearch.py @ 13:12aca74f07d7 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
author bgruening
date Mon, 19 Oct 2020 14:47:33 +0000
parents 50ca8845e7f5
children c5de6c19eb06
comparison
equal deleted inserted replaced
12:50ca8845e7f5 13:12aca74f07d7
2 """ 2 """
3 Input: Molecules in SDF, SMILES ... 3 Input: Molecules in SDF, SMILES ...
4 Output: Moleculs filtered with specified substructures. 4 Output: Moleculs filtered with specified substructures.
5 Copyright 2013, Bjoern Gruening and Xavier Lucas 5 Copyright 2013, Bjoern Gruening and Xavier Lucas
6 """ 6 """
7 import sys, os
8 import argparse 7 import argparse
9 import multiprocessing 8 import multiprocessing
9 import os
10 import shutil
11 import subprocess
12 import sys
10 import tempfile 13 import tempfile
11 import subprocess
12 import shutil
13 14
14 from openbabel import openbabel, pybel 15 from openbabel import openbabel, pybel
15 openbabel.obErrorLog.StopLogging() 16 openbabel.obErrorLog.StopLogging()
17
16 18
17 def parse_command_line(): 19 def parse_command_line():
18 parser = argparse.ArgumentParser() 20 parser = argparse.ArgumentParser()
19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') 21 parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
20 parser.add_argument('--iformat', help='Input format.') 22 parser.add_argument('--iformat', help='Input format.')
21 parser.add_argument('--fastsearch-index', dest="fastsearch_index", 23 parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True,
22 required=True, help='Path to the openbabel fastsearch index.') 24 help='Path to the openbabel fastsearch index.')
23 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') 25 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
24 parser.add_argument('--oformat', 26 parser.add_argument('--oformat', default='smi', help='Output file format')
25 default='smi', help='Output file format') 27 parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000,
26 parser.add_argument("--max-candidates", dest="max_candidates", type=int, 28 help="The maximum number of candidates.")
27 default=4000, help="The maximum number of candidates.") 29 parser.add_argument('-p', '--processors', type=int,
28 parser.add_argument('-p', '--processors', type=int, 30 default=multiprocessing.cpu_count())
29 default=multiprocessing.cpu_count())
30 return parser.parse_args() 31 return parser.parse_args()
31 32
33
32 results = list() 34 results = list()
35
36
33 def mp_callback(res): 37 def mp_callback(res):
34 results.append(res) 38 results.append(res)
35 39
36 def mp_helper( query, args ): 40
41 def mp_helper(query, args):
37 """ 42 """
38 Helper function for multiprocessing. 43 Helper function for multiprocessing.
39 That function is a wrapper around the following command: 44 That function is a wrapper around the following command:
40 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 45 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999
41 """ 46 """
46 opts = '-o%s' % args.oformat 51 opts = '-o%s' % args.oformat
47 52
48 tmp = tempfile.NamedTemporaryFile(delete=False) 53 tmp = tempfile.NamedTemporaryFile(delete=False)
49 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) 54 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates)
50 55
51 child = subprocess.Popen(cmd.split(), 56 child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
52 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
53 57
54 stdout, stderr = child.communicate() 58 stdout, stderr = child.communicate()
55 return_code = child.returncode 59 return_code = child.returncode
56 60
57 if return_code: 61 if return_code:
63 sys.stdout.write(stdout) 67 sys.stdout.write(stdout)
64 sys.stdout.write(stderr) 68 sys.stdout.write(stderr)
65 return (tmp.name, query) 69 return (tmp.name, query)
66 70
67 71
68 def get_smiles_or_smarts( args ): 72 def get_smiles_or_smarts(args):
69 """ 73 """
70 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. 74 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats.
71 """ 75 """
72 if args.iformat in ['smi', 'text', 'tabular']: 76 if args.iformat in ['smi', 'text', 'tabular']:
73 with open( args.infile ) as text_file: 77 with open(args.infile) as text_file:
74 for line in text_file: 78 for line in text_file:
75 yield line.split('\t')[0].strip() 79 yield line.split('\t')[0].strip()
76 else: 80 else:
77 # inchi or sdf files 81 # inchi or sdf files
78 for mol in pybel.readfile( args.iformat, args.infile ): 82 for mol in pybel.readfile(args.iformat, args.infile):
79 yield mol.write('smiles').split('\t')[0] 83 yield mol.write('smiles').split('\t')[0]
80 84
81 def substructure_search( args ):
82 85
83 pool = multiprocessing.Pool( args.processors ) 86 def substructure_search(args):
84 for query in get_smiles_or_smarts( args ): 87 pool = multiprocessing.Pool(args.processors)
88 for query in get_smiles_or_smarts(args):
85 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) 89 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback)
86 #mp_callback( mp_helper(query, args) ) 90 # mp_callback(mp_helper(query, args))
87 pool.close() 91 pool.close()
88 pool.join() 92 pool.join()
89 93
90 if args.oformat == 'names': 94 if args.oformat == 'names':
91 out_handle = open( args.outfile, 'w' ) 95 out_handle = open(args.outfile, 'w')
92 for result_file, query in results: 96 for result_file, query in results:
93 with open(result_file) as res_handle: 97 with open(result_file) as res_handle:
94 for line in res_handle: 98 for line in res_handle:
95 out_handle.write('%s\t%s\n' % ( line.strip(), query )) 99 out_handle.write('%s\t%s\n' % (line.strip(), query))
96 os.remove( result_file ) 100 os.remove(result_file)
97 out_handle.close() 101 out_handle.close()
98 else: 102 else:
99 out_handle = open( args.outfile, 'wb' ) 103 out_handle = open(args.outfile, 'wb')
100 for result_file, query in results: 104 for result_file, query in results:
101 res_handle = open(result_file,'rb') 105 res_handle = open(result_file, 'rb')
102 shutil.copyfileobj( res_handle, out_handle ) 106 shutil.copyfileobj(res_handle, out_handle)
103 res_handle.close() 107 res_handle.close()
104 os.remove( result_file ) 108 os.remove(result_file)
105 out_handle.close() 109 out_handle.close()
106 110
107 111
108 def __main__(): 112 def __main__():
109 """ 113 """
110 Multiprocessing Open Babel Substructure Search. 114 Multiprocessing Open Babel Substructure Search.
111 """ 115 """
112 args = parse_command_line() 116 args = parse_command_line()
113 substructure_search( args ) 117 substructure_search(args)
114 118
115 if __name__ == "__main__" : 119
120 if __name__ == "__main__":
116 __main__() 121 __main__()