Mercurial > repos > xuebing > sharplabtool
comparison tools/metag_tools/shrimp_color_wrapper.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 SHRiMP wrapper : Color space | |
5 """ | |
6 | |
7 import os, sys, tempfile, os.path, re | |
8 | |
9 assert sys.version_info[:2] >= (2.4) | |
10 | |
11 def stop_err( msg ): | |
12 | |
13 sys.stderr.write( "%s\n" % msg ) | |
14 sys.exit() | |
15 | |
16 | |
17 def __main__(): | |
18 | |
19 # SHRiMP path | |
20 shrimp = 'rmapper-cs' | |
21 | |
22 # I/O | |
23 input_target_file = sys.argv[1] # fasta | |
24 input_query_file = sys.argv[2] | |
25 shrimp_outfile = sys.argv[3] # shrimp output | |
26 | |
27 # SHRiMP parameters | |
28 spaced_seed = '1111001111' | |
29 seed_matches_per_window = '2' | |
30 seed_hit_taboo_length = '4' | |
31 seed_generation_taboo_length = '0' | |
32 seed_window_length = '115.0' | |
33 max_hits_per_read = '100' | |
34 max_read_length = '1000' | |
35 kmer = '-1' | |
36 sw_match_value = '100' | |
37 sw_mismatch_value = '-150' | |
38 sw_gap_open_ref = '-400' | |
39 sw_gap_open_query = '-400' | |
40 sw_gap_ext_ref = '-70' | |
41 sw_gap_ext_query = '-70' | |
42 sw_crossover_penalty = '-140' | |
43 sw_full_hit_threshold = '68.0' | |
44 sw_vector_hit_threshold = '60.0' | |
45 | |
46 # TODO: put the threshold on each of these parameters | |
47 if len(sys.argv) > 4: | |
48 | |
49 try: | |
50 if sys.argv[4].isdigit(): | |
51 spaced_seed = sys.argv[4] | |
52 else: | |
53 stop_err('Error in assigning parameter: Spaced seed.') | |
54 except: | |
55 stop_err('Spaced seed must be a combination of 1s and 0s.') | |
56 | |
57 seed_matches_per_window = sys.argv[5] | |
58 seed_hit_taboo_length = sys.argv[6] | |
59 seed_generation_taboo_length = sys.argv[7] | |
60 seed_window_length = sys.argv[8] | |
61 max_hits_per_read = sys.argv[9] | |
62 max_read_length = sys.argv[10] | |
63 kmer = sys.argv[11] | |
64 sw_match_value = sys.argv[12] | |
65 sw_mismatch_value = sys.argv[13] | |
66 sw_gap_open_ref = sys.argv[14] | |
67 sw_gap_open_query = sys.argv[15] | |
68 sw_gap_ext_ref = sys.argv[16] | |
69 sw_gap_ext_query = sys.argv[17] | |
70 sw_crossover_penalty = sys.argv[18] | |
71 sw_full_hit_threshold = sys.argv[19] | |
72 sw_vector_hit_threshold = sys.argv[20] | |
73 | |
74 # temp file for shrimp log file | |
75 shrimp_log = tempfile.NamedTemporaryFile().name | |
76 | |
77 # SHRiMP command | |
78 command = ' '.join([shrimp, '-s', spaced_seed, '-n', seed_matches_per_window, '-t', seed_hit_taboo_length, '-9', seed_generation_taboo_length, '-w', seed_window_length, '-o', max_hits_per_read, '-r', max_read_length, '-d', kmer, '-m', sw_match_value, '-i', sw_mismatch_value, '-g', sw_gap_open_ref, '-q', sw_gap_open_query, '-e', sw_gap_ext_ref, '-f', sw_gap_ext_query, '-x', sw_crossover_penalty, '-h', sw_full_hit_threshold, '-v', sw_vector_hit_threshold, input_query_file, input_target_file, '>', shrimp_outfile, '2>', shrimp_log]) | |
79 | |
80 try: | |
81 os.system(command) | |
82 except Exception, e: | |
83 if os.path.exists(query_fasta): os.remove(query_fasta) | |
84 if os.path.exists(query_qual): os.remove(query_qual) | |
85 stop_err(str(e)) | |
86 | |
87 # check SHRiMP output: count number of lines | |
88 num_hits = 0 | |
89 if shrimp_outfile: | |
90 for i, line in enumerate(file(shrimp_outfile)): | |
91 line = line.rstrip('\r\n') | |
92 if not line or line.startswith('#'): continue | |
93 try: | |
94 fields = line.split() | |
95 num_hits += 1 | |
96 except Exception, e: | |
97 stop_err(str(e)) | |
98 | |
99 if num_hits == 0: # no hits generated | |
100 err_msg = '' | |
101 if shrimp_log: | |
102 for i, line in enumerate(file(shrimp_log)): | |
103 if line.startswith('error'): # deal with memory error: | |
104 err_msg += line # error: realloc failed: Cannot allocate memory | |
105 if re.search('Reads Matched', line): # deal with zero hits | |
106 if int(line[8:].split()[2]) == 0: | |
107 err_msg = 'Zero hits found.\n' | |
108 stop_err('SHRiMP Failed due to:\n' + err_msg) | |
109 | |
110 | |
111 # remove temp. files | |
112 if os.path.exists(shrimp_log): os.remove(shrimp_log) | |
113 | |
114 | |
115 if __name__ == '__main__': __main__() | |
116 |