Mercurial > repos > bgruening > openbabel_structure_distance_finder
diff subsearch.py @ 0:c066b5accacf draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 6c84abdd07f292048bf2194073e2e938e94158c4"
author | bgruening |
---|---|
date | Wed, 25 Mar 2020 16:47:13 -0400 |
parents | |
children | 4c9d6b47045c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/subsearch.py Wed Mar 25 16:47:13 2020 -0400 @@ -0,0 +1,116 @@ +#!/usr/bin/env python +""" + Input: Molecules in SDF, SMILES ... + Output: Moleculs filtered with specified substructures. + Copyright 2013, Bjoern Gruening and Xavier Lucas +""" +import sys, os +import argparse +import openbabel +openbabel.obErrorLog.StopLogging() +import pybel +import multiprocessing +import tempfile +import subprocess +import shutil + +def parse_command_line(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--infile', required=True, help='Molecule file.') + parser.add_argument('--iformat', help='Input format.') + parser.add_argument('--fastsearch-index', dest="fastsearch_index", + required=True, help='Path to the openbabel fastsearch index.') + parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') + parser.add_argument('--oformat', + default='smi', help='Output file format') + parser.add_argument("--max-candidates", dest="max_candidates", type=int, + default=4000, help="The maximum number of candidates.") + parser.add_argument('-p', '--processors', type=int, + default=multiprocessing.cpu_count()) + return parser.parse_args() + +results = list() +def mp_callback(res): + results.append(res) + +def mp_helper( query, args ): + """ + Helper function for multiprocessing. + That function is a wrapper around the following command: + obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 + """ + + if args.oformat == 'names': + opts = '-osmi -xt' + else: + opts = '-o%s' % args.oformat + + tmp = tempfile.NamedTemporaryFile(delete=False) + cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) + + child = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + stdout, stderr = child.communicate() + return_code = child.returncode + + if return_code: + sys.stdout.write(stdout) + sys.stderr.write(stderr) + sys.stderr.write("Return error code %i from command:\n" % return_code) + sys.stderr.write("%s\n" % cmd) + else: + sys.stdout.write(stdout) + sys.stdout.write(stderr) + return (tmp.name, query) + + +def get_smiles_or_smarts( args ): + """ + Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. + """ + if args.iformat in ['smi', 'text', 'tabular']: + with open( args.infile ) as text_file: + for line in text_file: + yield line.split('\t')[0].strip() + else: + # inchi or sdf files + for mol in pybel.readfile( args.iformat, args.infile ): + yield mol.write('smiles').split('\t')[0] + +def substructure_search( args ): + + pool = multiprocessing.Pool( args.processors ) + for query in get_smiles_or_smarts( args ): + pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) + #mp_callback( mp_helper(query, args) ) + pool.close() + pool.join() + + if args.oformat == 'names': + out_handle = open( args.outfile, 'w' ) + for result_file, query in results: + with open(result_file) as res_handle: + for line in res_handle: + out_handle.write('%s\t%s\n' % ( line.strip(), query )) + os.remove( result_file ) + out_handle.close() + else: + out_handle = open( args.outfile, 'wb' ) + for result_file, query in results: + res_handle = open(result_file,'rb') + shutil.copyfileobj( res_handle, out_handle ) + res_handle.close() + os.remove( result_file ) + out_handle.close() + + +def __main__(): + """ + Multiprocessing Open Babel Substructure Search. + """ + args = parse_command_line() + substructure_search( args ) + +if __name__ == "__main__" : + __main__()