Mercurial > repos > xuebing > sharplabtool
comparison tools/metag_tools/mapping_to_ucsc.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from galaxy import eggs | |
4 import sys, tempfile, os | |
5 | |
6 assert sys.version_info[:2] >= (2.4) | |
7 | |
8 def stop_err(msg): | |
9 sys.stderr.write(msg) | |
10 sys.exit() | |
11 | |
12 def main(): | |
13 | |
14 out_fname = sys.argv[1] | |
15 in_fname = sys.argv[2] | |
16 chr_col = int(sys.argv[3])-1 | |
17 coord_col = int(sys.argv[4])-1 | |
18 track_type = sys.argv[5] | |
19 if track_type == 'coverage' or track_type == 'both': | |
20 coverage_col = int(sys.argv[6])-1 | |
21 cname = sys.argv[7] | |
22 cdescription = sys.argv[8] | |
23 ccolor = sys.argv[9].replace('-',',') | |
24 cvisibility = sys.argv[10] | |
25 if track_type == 'snp' or track_type == 'both': | |
26 if track_type == 'both': | |
27 j = 5 | |
28 else: | |
29 j = 0 | |
30 #sname = sys.argv[7+j] | |
31 sdescription = sys.argv[6+j] | |
32 svisibility = sys.argv[7+j] | |
33 #ref_col = int(sys.argv[10+j])-1 | |
34 read_col = int(sys.argv[8+j])-1 | |
35 | |
36 | |
37 # Sort the input file based on chromosome (alphabetically) and start co-ordinates (numerically) | |
38 sorted_infile = tempfile.NamedTemporaryFile() | |
39 try: | |
40 os.system("sort -k %d,%d -k %dn -o %s %s" %(chr_col+1,chr_col+1,coord_col+1,sorted_infile.name,in_fname)) | |
41 except Exception, exc: | |
42 stop_err( 'Initialization error -> %s' %str(exc) ) | |
43 | |
44 #generate chr list | |
45 sorted_infile.seek(0) | |
46 chr_vals = [] | |
47 for line in file( sorted_infile.name ): | |
48 line = line.strip() | |
49 if not(line): | |
50 continue | |
51 try: | |
52 fields = line.split('\t') | |
53 chr = fields[chr_col] | |
54 if chr not in chr_vals: | |
55 chr_vals.append(chr) | |
56 except: | |
57 pass | |
58 if not(chr_vals): | |
59 stop_err("Skipped all lines as invalid.") | |
60 | |
61 if track_type == 'coverage' or track_type == 'both': | |
62 if track_type == 'coverage': | |
63 fout = open( out_fname, "w" ) | |
64 else: | |
65 fout = tempfile.NamedTemporaryFile() | |
66 fout.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | |
67 % ( cname, cdescription, ccolor, cvisibility )) | |
68 if track_type == 'snp' or track_type == 'both': | |
69 fout_a = tempfile.NamedTemporaryFile() | |
70 fout_t = tempfile.NamedTemporaryFile() | |
71 fout_g = tempfile.NamedTemporaryFile() | |
72 fout_c = tempfile.NamedTemporaryFile() | |
73 fout_ref = tempfile.NamedTemporaryFile() | |
74 | |
75 fout_a.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | |
76 % ( "Track A", sdescription, '255,0,0', svisibility )) | |
77 fout_t.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | |
78 % ( "Track T", sdescription, '0,255,0', svisibility )) | |
79 fout_g.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | |
80 % ( "Track G", sdescription, '0,0,255', svisibility )) | |
81 fout_c.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | |
82 % ( "Track C", sdescription, '255,0,255', svisibility )) | |
83 | |
84 | |
85 sorted_infile.seek(0) | |
86 for line in file( sorted_infile.name ): | |
87 line = line.strip() | |
88 if not(line): | |
89 continue | |
90 try: | |
91 fields = line.split('\t') | |
92 chr = fields[chr_col] | |
93 start = int(fields[coord_col]) | |
94 assert start > 0 | |
95 except: | |
96 continue | |
97 try: | |
98 ind = chr_vals.index(chr) #encountered chr for the 1st time | |
99 del chr_vals[ind] | |
100 prev_start = '' | |
101 header = "variableStep chrom=%s\n" %(chr) | |
102 if track_type == 'coverage' or track_type == 'both': | |
103 coverage = int(fields[coverage_col]) | |
104 line1 = "%s\t%s\n" %(start,coverage) | |
105 fout.write("%s%s" %(header,line1)) | |
106 if track_type == 'snp' or track_type == 'both': | |
107 a = t = g = c = 0 | |
108 fout_a.write("%s" %(header)) | |
109 fout_t.write("%s" %(header)) | |
110 fout_g.write("%s" %(header)) | |
111 fout_c.write("%s" %(header)) | |
112 try: | |
113 #ref_nt = fields[ref_col].capitalize() | |
114 read_nt = fields[read_col].capitalize() | |
115 try: | |
116 nt_ind = ['A','T','G','C'].index(read_nt) | |
117 if nt_ind == 0: | |
118 a+=1 | |
119 elif nt_ind == 1: | |
120 t+=1 | |
121 elif nt_ind == 2: | |
122 g+=1 | |
123 else: | |
124 c+=1 | |
125 except ValueError: | |
126 pass | |
127 except: | |
128 pass | |
129 prev_start = start | |
130 except ValueError: | |
131 if start != prev_start: | |
132 if track_type == 'coverage' or track_type == 'both': | |
133 coverage = int(fields[coverage_col]) | |
134 fout.write("%s\t%s\n" %(start,coverage)) | |
135 if track_type == 'snp' or track_type == 'both': | |
136 if a: | |
137 fout_a.write("%s\t%s\n" %(prev_start,a)) | |
138 if t: | |
139 fout_t.write("%s\t%s\n" %(prev_start,t)) | |
140 if g: | |
141 fout_g.write("%s\t%s\n" %(prev_start,g)) | |
142 if c: | |
143 fout_c.write("%s\t%s\n" %(prev_start,c)) | |
144 a = t = g = c = 0 | |
145 try: | |
146 #ref_nt = fields[ref_col].capitalize() | |
147 read_nt = fields[read_col].capitalize() | |
148 try: | |
149 nt_ind = ['A','T','G','C'].index(read_nt) | |
150 if nt_ind == 0: | |
151 a+=1 | |
152 elif nt_ind == 1: | |
153 t+=1 | |
154 elif nt_ind == 2: | |
155 g+=1 | |
156 else: | |
157 c+=1 | |
158 except ValueError: | |
159 pass | |
160 except: | |
161 pass | |
162 prev_start = start | |
163 else: | |
164 if track_type == 'snp' or track_type == 'both': | |
165 try: | |
166 #ref_nt = fields[ref_col].capitalize() | |
167 read_nt = fields[read_col].capitalize() | |
168 try: | |
169 nt_ind = ['A','T','G','C'].index(read_nt) | |
170 if nt_ind == 0: | |
171 a+=1 | |
172 elif nt_ind == 1: | |
173 t+=1 | |
174 elif nt_ind == 2: | |
175 g+=1 | |
176 else: | |
177 c+=1 | |
178 except ValueError: | |
179 pass | |
180 except: | |
181 pass | |
182 | |
183 if track_type == 'snp' or track_type == 'both': | |
184 if a: | |
185 fout_a.write("%s\t%s\n" %(prev_start,a)) | |
186 if t: | |
187 fout_t.write("%s\t%s\n" %(prev_start,t)) | |
188 if g: | |
189 fout_g.write("%s\t%s\n" %(prev_start,g)) | |
190 if c: | |
191 fout_c.write("%s\t%s\n" %(prev_start,c)) | |
192 | |
193 fout_a.seek(0) | |
194 fout_g.seek(0) | |
195 fout_t.seek(0) | |
196 fout_c.seek(0) | |
197 | |
198 if track_type == 'snp': | |
199 os.system("cat %s %s %s %s >> %s" %(fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname)) | |
200 elif track_type == 'both': | |
201 fout.seek(0) | |
202 os.system("cat %s %s %s %s %s | cat > %s" %(fout.name,fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname)) | |
203 if __name__ == "__main__": | |
204 main() |