annotate subsearch/subsearch.py @ 2:125da3a296ca draft default tip

Uploaded
author bgruening
date Wed, 15 Jul 2015 12:13:08 -0400
parents 527ecd2fc500
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
2 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
3 Input: Molecules in SDF, SMILES ...
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
4 Output: Moleculs filtered with specified substructures.
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
5 Copyright 2013, Bjoern Gruening and Xavier Lucas
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
6 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
7 import sys, os
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
8 import argparse
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
9 import openbabel
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
10 openbabel.obErrorLog.StopLogging()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
11 import pybel
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
12 import multiprocessing
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
13 import tempfile
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
14 import subprocess
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
15 import shutil
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
16
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
17 def parse_command_line():
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
18 parser = argparse.ArgumentParser()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
20 parser.add_argument('--iformat', help='Input format.')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
21 parser.add_argument('--fastsearch-index', dest="fastsearch_index",
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
22 required=True, help='Path to the openbabel fastsearch index.')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
23 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
24 parser.add_argument('--oformat',
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
25 default='smi', help='Output file format')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
26 parser.add_argument("--max-candidates", dest="max_candidates", type=int,
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
27 default=4000, help="The maximum number of candidates.")
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
28 parser.add_argument('-p', '--processors', type=int,
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
29 default=multiprocessing.cpu_count())
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
30 return parser.parse_args()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
31
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
32 results = list()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
33 def mp_callback(res):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
34 results.append(res)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
35
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
36 def mp_helper( query, args ):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
37 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
38 Helper function for multiprocessing.
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
39 That function is a wrapper around the following command:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
40 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
41 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
42
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
43 if args.oformat == 'names':
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
44 opts = '-osmi -xt'
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
45 else:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
46 opts = '-o%s' % args.oformat
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
47
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
48 tmp = tempfile.NamedTemporaryFile(delete=False)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
49 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
50
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
51 child = subprocess.Popen(cmd.split(),
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
52 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
53
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
54 stdout, stderr = child.communicate()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
55 return_code = child.returncode
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
56
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
57 if return_code:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
58 sys.stdout.write(stdout)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
59 sys.stderr.write(stderr)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
60 sys.stderr.write("Return error code %i from command:\n" % return_code)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
61 sys.stderr.write("%s\n" % cmd)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
62 else:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
63 sys.stdout.write(stdout)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
64 sys.stdout.write(stderr)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
65 return (tmp.name, query)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
66
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
67
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
68 def get_smiles_or_smarts( args ):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
69 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
70 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats.
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
71 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
72 if args.iformat in ['smi', 'text', 'tabular']:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
73 with open( args.infile ) as text_file:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
74 for line in text_file:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
75 yield line.split('\t')[0].strip()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
76 else:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
77 # inchi or sdf files
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
78 for mol in pybel.readfile( args.iformat, args.infile ):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
79 yield mol.wrtie('smiles').split('\t')[0]
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
80
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
81 def substructure_search( args ):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
82
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
83 pool = multiprocessing.Pool( args.processors )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
84 for query in get_smiles_or_smarts( args ):
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
85 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback)
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
86 #mp_callback( mp_helper(query, args) )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
87 pool.close()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
88 pool.join()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
89
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
90 if args.oformat == 'names':
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
91 out_handle = open( args.outfile, 'w' )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
92 for result_file, query in results:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
93 with open(result_file) as res_handle:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
94 for line in res_handle:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
95 out_handle.write('%s\t%s\n' % ( line.strip(), query ))
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
96 os.remove( result_file )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
97 out_handle.close()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
98 else:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
99 out_handle = open( args.outfile, 'wb' )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
100 for result_file, query in results:
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
101 res_handle = open(result_file,'rb')
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
102 shutil.copyfileobj( res_handle, out_handle )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
103 res_handle.close()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
104 os.remove( result_file )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
105 out_handle.close()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
106
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
107
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
108 def __main__():
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
109 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
110 Multiprocessing Open Babel Substructure Search.
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
111 """
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
112 args = parse_command_line()
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
113 substructure_search( args )
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
114
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
115 if __name__ == "__main__" :
527ecd2fc500 Uploaded
bgruening
parents:
diff changeset
116 __main__()