Mercurial > repos > xuebing > sharplabtool
comparison tools/metag_tools/megablast_wrapper.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 run megablast for metagenomics data | |
4 | |
5 usage: %prog [options] | |
6 -d, --db_build=d: The database to use | |
7 -i, --input=i: Input FASTQ candidate file | |
8 -w, --word_size=w: Size of best perfect match | |
9 -c, --identity_cutoff=c: Report hits at or above this identity | |
10 -e, --eval_cutoff=e: Expectation value cutoff | |
11 -f, --filter_query=f: Filter out low complexity regions | |
12 -x, --index_dir=x: Data index directory | |
13 -o, --output=o: Output file | |
14 | |
15 usage: %prog db_build input_file word_size identity_cutoff eval_cutoff filter_query index_dir output_file | |
16 """ | |
17 | |
18 import os, subprocess, sys, tempfile | |
19 from galaxy import eggs | |
20 import pkg_resources; pkg_resources.require( "bx-python" ) | |
21 from bx.cookbook import doc_optparse | |
22 | |
23 assert sys.version_info[:2] >= ( 2, 4 ) | |
24 | |
25 def stop_err( msg ): | |
26 sys.stderr.write( "%s\n" % msg ) | |
27 sys.exit() | |
28 | |
29 def __main__(): | |
30 #Parse Command Line | |
31 options, args = doc_optparse.parse( __doc__ ) | |
32 query_filename = options.input.strip() | |
33 output_filename = options.output.strip() | |
34 mega_word_size = options.word_size # -W | |
35 mega_iden_cutoff = options.identity_cutoff # -p | |
36 mega_evalue_cutoff = options.eval_cutoff # -e | |
37 mega_temp_output = tempfile.NamedTemporaryFile().name | |
38 GALAXY_DATA_INDEX_DIR = options.index_dir | |
39 DB_LOC = "%s/blastdb.loc" % GALAXY_DATA_INDEX_DIR | |
40 | |
41 # megablast parameters | |
42 try: | |
43 int( mega_word_size ) | |
44 except: | |
45 stop_err( 'Invalid value for word size' ) | |
46 try: | |
47 float( mega_iden_cutoff ) | |
48 except: | |
49 stop_err( 'Invalid value for identity cut-off' ) | |
50 try: | |
51 float( mega_evalue_cutoff ) | |
52 except: | |
53 stop_err( 'Invalid value for Expectation value' ) | |
54 | |
55 if not os.path.exists( os.path.split( options.db_build )[0] ): | |
56 stop_err( 'Cannot locate the target database directory. Please check your location file.' ) | |
57 | |
58 # arguments for megablast | |
59 megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null" \ | |
60 % ( options.db_build, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, options.filter_query ) | |
61 | |
62 print megablast_command | |
63 | |
64 tmp = tempfile.NamedTemporaryFile().name | |
65 try: | |
66 tmp_stderr = open( tmp, 'wb' ) | |
67 proc = subprocess.Popen( args=megablast_command, shell=True, stderr=tmp_stderr.fileno() ) | |
68 returncode = proc.wait() | |
69 tmp_stderr.close() | |
70 # get stderr, allowing for case where it's very large | |
71 tmp_stderr = open( tmp, 'rb' ) | |
72 stderr = '' | |
73 buffsize = 1048576 | |
74 try: | |
75 while True: | |
76 stderr += tmp_stderr.read( buffsize ) | |
77 if not stderr or len( stderr ) % buffsize != 0: | |
78 break | |
79 except OverflowError: | |
80 pass | |
81 tmp_stderr.close() | |
82 if returncode != 0: | |
83 raise Exception, stderr | |
84 if os.path.exists( tmp ): | |
85 os.unlink( tmp ) | |
86 except Exception, e: | |
87 if os.path.exists( mega_temp_output ): | |
88 os.unlink( mega_temp_output ) | |
89 if os.path.exists( tmp ): | |
90 os.unlink( tmp ) | |
91 stop_err( 'Error indexing reference sequence. ' + str( e ) ) | |
92 | |
93 output = open( output_filename, 'w' ) | |
94 invalid_lines = 0 | |
95 for i, line in enumerate( file( mega_temp_output ) ): | |
96 line = line.rstrip( '\r\n' ) | |
97 fields = line.split() | |
98 try: | |
99 # get gi and length of that gi seq | |
100 gi, gi_len = fields[1].split( '_' ) | |
101 # convert the last column (causing problem in filter tool) to float | |
102 fields[-1] = float( fields[-1] ) | |
103 new_line = "%s\t%s\t%s\t%s\t%0.1f" % ( fields[0], gi, gi_len, '\t'.join( fields[2:-1] ), fields[-1] ) | |
104 except: | |
105 new_line = line | |
106 invalid_lines += 1 | |
107 output.write( "%s\n" % new_line ) | |
108 output.close() | |
109 | |
110 if os.path.exists( mega_temp_output ): | |
111 os.unlink( mega_temp_output ) #remove the tempfile that we just reformatted the contents of | |
112 | |
113 if invalid_lines: | |
114 print "Unable to parse %d lines. Keep the default format." % invalid_lines | |
115 | |
116 # megablast generates a file called error.log, if empty, delete it, if not, show the contents | |
117 if os.path.exists( './error.log' ): | |
118 for i, line in enumerate( file( './error.log' ) ): | |
119 line = line.rstrip( '\r\n' ) | |
120 print line | |
121 os.remove( './error.log' ) | |
122 | |
123 if __name__ == "__main__" : __main__() |