comparison tools/metag_tools/mapping_to_ucsc.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/env python
2
3 from galaxy import eggs
4 import sys, tempfile, os
5
6 assert sys.version_info[:2] >= (2.4)
7
8 def stop_err(msg):
9 sys.stderr.write(msg)
10 sys.exit()
11
12 def main():
13
14 out_fname = sys.argv[1]
15 in_fname = sys.argv[2]
16 chr_col = int(sys.argv[3])-1
17 coord_col = int(sys.argv[4])-1
18 track_type = sys.argv[5]
19 if track_type == 'coverage' or track_type == 'both':
20 coverage_col = int(sys.argv[6])-1
21 cname = sys.argv[7]
22 cdescription = sys.argv[8]
23 ccolor = sys.argv[9].replace('-',',')
24 cvisibility = sys.argv[10]
25 if track_type == 'snp' or track_type == 'both':
26 if track_type == 'both':
27 j = 5
28 else:
29 j = 0
30 #sname = sys.argv[7+j]
31 sdescription = sys.argv[6+j]
32 svisibility = sys.argv[7+j]
33 #ref_col = int(sys.argv[10+j])-1
34 read_col = int(sys.argv[8+j])-1
35
36
37 # Sort the input file based on chromosome (alphabetically) and start co-ordinates (numerically)
38 sorted_infile = tempfile.NamedTemporaryFile()
39 try:
40 os.system("sort -k %d,%d -k %dn -o %s %s" %(chr_col+1,chr_col+1,coord_col+1,sorted_infile.name,in_fname))
41 except Exception, exc:
42 stop_err( 'Initialization error -> %s' %str(exc) )
43
44 #generate chr list
45 sorted_infile.seek(0)
46 chr_vals = []
47 for line in file( sorted_infile.name ):
48 line = line.strip()
49 if not(line):
50 continue
51 try:
52 fields = line.split('\t')
53 chr = fields[chr_col]
54 if chr not in chr_vals:
55 chr_vals.append(chr)
56 except:
57 pass
58 if not(chr_vals):
59 stop_err("Skipped all lines as invalid.")
60
61 if track_type == 'coverage' or track_type == 'both':
62 if track_type == 'coverage':
63 fout = open( out_fname, "w" )
64 else:
65 fout = tempfile.NamedTemporaryFile()
66 fout.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
67 % ( cname, cdescription, ccolor, cvisibility ))
68 if track_type == 'snp' or track_type == 'both':
69 fout_a = tempfile.NamedTemporaryFile()
70 fout_t = tempfile.NamedTemporaryFile()
71 fout_g = tempfile.NamedTemporaryFile()
72 fout_c = tempfile.NamedTemporaryFile()
73 fout_ref = tempfile.NamedTemporaryFile()
74
75 fout_a.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
76 % ( "Track A", sdescription, '255,0,0', svisibility ))
77 fout_t.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
78 % ( "Track T", sdescription, '0,255,0', svisibility ))
79 fout_g.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
80 % ( "Track G", sdescription, '0,0,255', svisibility ))
81 fout_c.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \
82 % ( "Track C", sdescription, '255,0,255', svisibility ))
83
84
85 sorted_infile.seek(0)
86 for line in file( sorted_infile.name ):
87 line = line.strip()
88 if not(line):
89 continue
90 try:
91 fields = line.split('\t')
92 chr = fields[chr_col]
93 start = int(fields[coord_col])
94 assert start > 0
95 except:
96 continue
97 try:
98 ind = chr_vals.index(chr) #encountered chr for the 1st time
99 del chr_vals[ind]
100 prev_start = ''
101 header = "variableStep chrom=%s\n" %(chr)
102 if track_type == 'coverage' or track_type == 'both':
103 coverage = int(fields[coverage_col])
104 line1 = "%s\t%s\n" %(start,coverage)
105 fout.write("%s%s" %(header,line1))
106 if track_type == 'snp' or track_type == 'both':
107 a = t = g = c = 0
108 fout_a.write("%s" %(header))
109 fout_t.write("%s" %(header))
110 fout_g.write("%s" %(header))
111 fout_c.write("%s" %(header))
112 try:
113 #ref_nt = fields[ref_col].capitalize()
114 read_nt = fields[read_col].capitalize()
115 try:
116 nt_ind = ['A','T','G','C'].index(read_nt)
117 if nt_ind == 0:
118 a+=1
119 elif nt_ind == 1:
120 t+=1
121 elif nt_ind == 2:
122 g+=1
123 else:
124 c+=1
125 except ValueError:
126 pass
127 except:
128 pass
129 prev_start = start
130 except ValueError:
131 if start != prev_start:
132 if track_type == 'coverage' or track_type == 'both':
133 coverage = int(fields[coverage_col])
134 fout.write("%s\t%s\n" %(start,coverage))
135 if track_type == 'snp' or track_type == 'both':
136 if a:
137 fout_a.write("%s\t%s\n" %(prev_start,a))
138 if t:
139 fout_t.write("%s\t%s\n" %(prev_start,t))
140 if g:
141 fout_g.write("%s\t%s\n" %(prev_start,g))
142 if c:
143 fout_c.write("%s\t%s\n" %(prev_start,c))
144 a = t = g = c = 0
145 try:
146 #ref_nt = fields[ref_col].capitalize()
147 read_nt = fields[read_col].capitalize()
148 try:
149 nt_ind = ['A','T','G','C'].index(read_nt)
150 if nt_ind == 0:
151 a+=1
152 elif nt_ind == 1:
153 t+=1
154 elif nt_ind == 2:
155 g+=1
156 else:
157 c+=1
158 except ValueError:
159 pass
160 except:
161 pass
162 prev_start = start
163 else:
164 if track_type == 'snp' or track_type == 'both':
165 try:
166 #ref_nt = fields[ref_col].capitalize()
167 read_nt = fields[read_col].capitalize()
168 try:
169 nt_ind = ['A','T','G','C'].index(read_nt)
170 if nt_ind == 0:
171 a+=1
172 elif nt_ind == 1:
173 t+=1
174 elif nt_ind == 2:
175 g+=1
176 else:
177 c+=1
178 except ValueError:
179 pass
180 except:
181 pass
182
183 if track_type == 'snp' or track_type == 'both':
184 if a:
185 fout_a.write("%s\t%s\n" %(prev_start,a))
186 if t:
187 fout_t.write("%s\t%s\n" %(prev_start,t))
188 if g:
189 fout_g.write("%s\t%s\n" %(prev_start,g))
190 if c:
191 fout_c.write("%s\t%s\n" %(prev_start,c))
192
193 fout_a.seek(0)
194 fout_g.seek(0)
195 fout_t.seek(0)
196 fout_c.seek(0)
197
198 if track_type == 'snp':
199 os.system("cat %s %s %s %s >> %s" %(fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname))
200 elif track_type == 'both':
201 fout.seek(0)
202 os.system("cat %s %s %s %s %s | cat > %s" %(fout.name,fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname))
203 if __name__ == "__main__":
204 main()