comparison dpmix_plot.py @ 0:2c498d40ecde

Uploaded
author miller-lab
date Mon, 09 Apr 2012 12:03:06 -0400
parents
children 22fe0154fa54
comparison
equal deleted inserted replaced
-1:000000000000 0:2c498d40ecde
1 #!/usr/bin/env python
2
3 import os
4 import sys
5 import math
6 import matplotlib as mpl
7 mpl.use('PDF')
8 import matplotlib.pyplot as plt
9 from matplotlib.path import Path
10 import matplotlib.patches as patches
11
12 ################################################################################
13
14 def build_chrom_len_dict(dbkey, galaxy_data_index_dir):
15 chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom')
16 chrom_len_file = '{0}.len'.format(dbkey)
17 chrom_len_path = os.path.join(chrom_len_root, chrom_len_file)
18
19 chrom_len = {}
20
21 try:
22 with open(chrom_len_path) as fh:
23 for line in fh:
24 line = line.rstrip('\r\n')
25 elems = line.split()
26 if len(elems) == 2:
27 chrom = elems[0]
28 length = int(elems[1])
29 chrom_len[chrom] = length
30 except:
31 pass
32
33 return chrom_len
34
35 def parse_input_file(input_file):
36 chroms = []
37 individuals = []
38 data = {}
39 chrom_len = {}
40
41 with open(input_file) as fh:
42 for line in fh:
43 line = line.strip()
44 if line:
45 elems = line.split()
46 chrom = elems[0]
47 p1, p2, state = map(int, elems[1:4])
48 id = elems[4]
49
50 if chrom not in chroms:
51 chroms.append(chrom)
52
53 if id not in individuals:
54 individuals.append(id)
55
56 data.setdefault(chrom, {})
57 data[chrom].setdefault(id, [])
58 data[chrom][id].append((p1, p2, state))
59
60 if p2 > chrom_len.setdefault(chrom, 0):
61 chrom_len[chrom] = p2
62
63 return chroms, individuals, data, chrom_len
64
65 def check_chroms(chroms, chrom_len, dbkey):
66 error = 0
67 for chrom in chroms:
68 if chrom not in chrom_len:
69 print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom)
70 error = 1
71 if error:
72 sys.exit(1)
73
74 def check_data(data, chrom_len, dbkey):
75 error = 0
76 for chrom in data:
77 chrom_beg = 0
78 chrom_end = chrom_len[chrom]
79 for individual in data[chrom]:
80 for p1, p2, state in data[chrom][individual]:
81 if p1 >= p2:
82 print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual)
83 error = 1
84 if p1 < chrom_beg or p2 > chrom_end:
85 print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual)
86 error = 1
87 if error:
88 sys.exit(1)
89
90 def make_rectangle(p1, p2, color, bottom=0.0, top=1.0):
91 verts = [
92 (p1, bottom), # left, bottom
93 (p1, top), # left, top
94 (p2, top), # right, top
95 (p2, bottom), # right, bottom
96 (0.0, 0.0) # ignored
97 ]
98
99 codes = [
100 Path.MOVETO,
101 Path.LINETO,
102 Path.LINETO,
103 Path.LINETO,
104 Path.CLOSEPOLY
105 ]
106
107 path = Path(verts, codes)
108 return patches.PathPatch(path, facecolor=color, lw=0)
109
110 def make_split_rectangle(p1, p2, top_color, bottom_color):
111 patch1 = make_rectangle(p1, p2, bottom_color, top=0.5)
112 patch2 = make_rectangle(p1, p2, top_color, bottom=0.5)
113 return [patch1, patch2]
114
115 def make_state_rectangle(p1, p2, state, chrom, individual):
116 if state == 0:
117 return [ make_rectangle(p1, p2, 'r') ]
118 elif state == 1:
119 return make_split_rectangle(p1, p2, 'r', 'g')
120 elif state == 2:
121 return [ make_rectangle(p1, p2, 'g') ]
122 else:
123 print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual)
124 sys.exit(1)
125
126 def nicenum(num, round=False):
127 if num == 0:
128 return 0.0
129
130 exp = int(math.floor(math.log10(num)))
131 f = num / math.pow(10, exp)
132
133 if round:
134 if f < 1.5:
135 nf = 1.0
136 elif f < 3.0:
137 nf = 2.0
138 elif f < 7.0:
139 nf = 5.0
140 else:
141 nf = 10.0
142 else:
143 if f <= 1.0:
144 nf = 1.0
145 elif f <= 2.0:
146 nf = 2.0
147 elif f <= 5.0:
148 nf = 5.0
149 else:
150 nf = 10.0
151
152 return nf * pow(10, exp)
153
154 def tick_foo(beg, end, loose=False):
155 ntick = 10
156
157 range = nicenum(end - beg, round=False)
158 d = nicenum(range/(ntick - 1), round=True)
159 digits = int(math.floor(math.log10(d)))
160
161 if loose:
162 graph_min = math.floor(beg/d) * d
163 graph_max = math.ceil(end/d) * d
164 else:
165 graph_min = beg
166 graph_max = end
167
168 nfrac = max([-1 * digits, 0])
169 vals = []
170
171 stop = graph_max
172 if loose:
173 stop = graph_max + (0.5 * d)
174
175 x = graph_min
176 while x <= stop:
177 vals.append(int(x))
178 x += d
179
180 vals = vals[1:]
181
182 # if not loose:
183 # if vals[-1] < graph_max:
184 # vals.append(int(graph_max))
185
186 labels = []
187 for val in vals:
188 labels.append('{0}'.format(int(val/math.pow(10, digits))))
189
190 # labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits)))
191
192 return vals, labels
193
194 ################################################################################
195
196 def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir):
197 fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir)
198 chroms, individuals, data, chrom_len = parse_input_file(input_file)
199
200 for chrom in chrom_len.keys():
201 if chrom in fs_chrom_len:
202 chrom_len[chrom] = fs_chrom_len[chrom]
203
204 #check_chroms(chroms, chrom_len, input_dbkey)
205 check_data(data, chrom_len, input_dbkey)
206
207 ## units below are inches
208 top_space = 0.10
209 chrom_space = 0.25
210 chrom_height = 0.25
211 ind_space = 0.10
212 ind_height = 0.25
213
214 total_height = 0.0
215 at_top = True
216 for chrom in chroms:
217 if at_top:
218 total_height += (top_space + chrom_height)
219 at_top = False
220 else:
221 total_height += (top_space + chrom_space + chrom_height)
222
223 individual_count = 0
224 for individual in individuals:
225 if individual in data[chrom]:
226 individual_count += 1
227 total_height += individual_count * (ind_space + ind_height)
228
229 width = 7.5
230 height = math.ceil(total_height)
231
232 bottom = 1.0
233
234 fig = plt.figure(figsize=(width, height))
235
236 at_top = True
237 for_webb = False
238
239 for chrom in chroms:
240 length = chrom_len[chrom]
241 vals, labels = tick_foo(0, length)
242
243 if at_top:
244 bottom -= (top_space + chrom_height)/height
245 at_top = False
246 else:
247 bottom -= (top_space + chrom_space + chrom_height)/height
248
249 if not for_webb:
250 ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height])
251 plt.axis('off')
252 plt.text(0.5, 0.5, chrom, fontsize=14, ha='center')
253
254 individual_count = 0
255 for individual in individuals:
256 if individual in data[chrom]:
257 individual_count += 1
258
259 i = 0
260 for individual in individuals:
261 if individual in data[chrom]:
262 i += 1
263
264 bottom -= (ind_space + ind_height)/height
265 if not for_webb:
266 # [left, bottom, width, height]
267 ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height])
268 plt.axis('off')
269 plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center')
270 # [left, bottom, width, height]
271 ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False)
272 ax2.set_xlim(0, length)
273 ax2.set_ylim(0, 1)
274 if i != individual_count:
275 plt.axis('off')
276 else:
277 if not for_webb:
278 ax2.tick_params(top=False, left=False, right=False, labelleft=False)
279 ax2.set_xticks(vals)
280 ax2.set_xticklabels(labels)
281 else:
282 plt.axis('off')
283 for p1, p2, state in sorted(data[chrom][individual]):
284 for patch in make_state_rectangle(p1, p2, state, chrom, individual):
285 ax2.add_patch(patch)
286
287 plt.savefig(output_file)
288
289 ################################################################################
290
291 if __name__ == '__main__':
292 input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5]
293 make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir)
294 sys.exit(0)
295