comparison multi_obgrep.py @ 0:7133973beaf0 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
author bgruening
date Sat, 20 May 2017 08:40:10 -0400
parents
children afd14e10a318
comparison
equal deleted inserted replaced
-1:000000000000 0:7133973beaf0
1 #!/usr/bin/env python
2 """
3 Input: Molecules in SDF, SMILES ...
4 Output: Molecule file filtered with obgrep.
5 Copyright 2013, Bjoern Gruening and Xavier Lucas
6 """
7 import sys, os
8 import argparse
9 import openbabel
10 openbabel.obErrorLog.StopLogging()
11 import pybel
12 import multiprocessing
13 import tempfile
14 import subprocess
15 import shutil
16 import shlex
17
18 def parse_command_line():
19 parser = argparse.ArgumentParser()
20 parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
21 parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.')
22 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
23 parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi")
24 parser.add_argument("--n-times", dest="n_times", type=int,
25 default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
26 parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count())
27 parser.add_argument("--invert-matches", dest="invert_matches", action="store_true",
28 default=False, help="Invert the matching, print non-matching molecules.")
29 parser.add_argument("--only-name", dest="only_name", action="store_true",
30 default=False, help="Only print the name of the molecules.")
31 parser.add_argument("--full-match", dest="full_match", action="store_true",
32 default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
33 parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true",
34 default=False, help="Print the number of matches.")
35 return parser.parse_args()
36
37 results = list()
38 def mp_callback(res):
39 results.append(res)
40
41 def mp_helper( query, args ):
42 """
43 Helper function for multiprocessing.
44 That function is a wrapper around obgrep.
45 """
46
47 cmd_list = []
48 if args.invert_matches:
49 cmd_list.append('-v')
50 if args.only_name:
51 cmd_list.append('-n')
52 if args.full_match:
53 cmd_list.append('-f')
54 if args.number_of_matches:
55 cmd_list.append('-c')
56 if args.n_times:
57 cmd_list.append('-t %s' % str(args.n_times))
58
59 tmp = tempfile.NamedTemporaryFile(delete=False)
60 cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile)
61 child = subprocess.Popen(shlex.split(cmd),
62 stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
63
64 stdout, stderr = child.communicate()
65 return (tmp.name, query)
66
67
68 def obgrep( args ):
69
70 temp_file = tempfile.NamedTemporaryFile()
71 temp_link = "%s.%s" % (temp_file.name, args.iformat)
72 temp_file.close()
73 os.symlink(args.infile, temp_link)
74 args.infile = temp_link
75
76 pool = multiprocessing.Pool( args.processors )
77 for query in open( args.query ):
78 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback)
79 #mp_callback( mp_helper(query.strip(), args) )
80 pool.close()
81 pool.join()
82
83 out_handle = open( args.outfile, 'wb' )
84 for result_file, query in results:
85 res_handle = open(result_file,'rb')
86 shutil.copyfileobj( res_handle, out_handle )
87 res_handle.close()
88 os.remove( result_file )
89 out_handle.close()
90
91 os.remove( temp_link )
92
93 def __main__():
94 """
95 Multiprocessing obgrep search.
96 """
97 args = parse_command_line()
98 obgrep( args )
99
100 if __name__ == "__main__" :
101 __main__()