0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os
|
|
4 import sys
|
|
5 import math
|
|
6 import matplotlib as mpl
|
|
7 mpl.use('PDF')
|
|
8 import matplotlib.pyplot as plt
|
|
9 from matplotlib.path import Path
|
|
10 import matplotlib.patches as patches
|
|
11
|
|
12 ################################################################################
|
|
13
|
|
14 def build_chrom_len_dict(dbkey, galaxy_data_index_dir):
|
|
15 chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom')
|
|
16 chrom_len_file = '{0}.len'.format(dbkey)
|
|
17 chrom_len_path = os.path.join(chrom_len_root, chrom_len_file)
|
|
18
|
|
19 chrom_len = {}
|
|
20
|
|
21 try:
|
|
22 with open(chrom_len_path) as fh:
|
|
23 for line in fh:
|
|
24 line = line.rstrip('\r\n')
|
|
25 elems = line.split()
|
|
26 if len(elems) == 2:
|
|
27 chrom = elems[0]
|
|
28 length = int(elems[1])
|
|
29 chrom_len[chrom] = length
|
|
30 except:
|
|
31 pass
|
|
32
|
|
33 return chrom_len
|
|
34
|
|
35 def parse_input_file(input_file):
|
|
36 chroms = []
|
|
37 individuals = []
|
|
38 data = {}
|
|
39 chrom_len = {}
|
|
40
|
|
41 with open(input_file) as fh:
|
|
42 for line in fh:
|
|
43 line = line.strip()
|
|
44 if line:
|
|
45 elems = line.split()
|
|
46 chrom = elems[0]
|
|
47 p1, p2, state = map(int, elems[1:4])
|
|
48 id = elems[4]
|
|
49
|
|
50 if chrom not in chroms:
|
|
51 chroms.append(chrom)
|
|
52
|
|
53 if id not in individuals:
|
|
54 individuals.append(id)
|
|
55
|
|
56 data.setdefault(chrom, {})
|
|
57 data[chrom].setdefault(id, [])
|
|
58 data[chrom][id].append((p1, p2, state))
|
|
59
|
|
60 if p2 > chrom_len.setdefault(chrom, 0):
|
|
61 chrom_len[chrom] = p2
|
|
62
|
|
63 return chroms, individuals, data, chrom_len
|
|
64
|
|
65 def check_chroms(chroms, chrom_len, dbkey):
|
|
66 error = 0
|
|
67 for chrom in chroms:
|
|
68 if chrom not in chrom_len:
|
|
69 print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom)
|
|
70 error = 1
|
|
71 if error:
|
|
72 sys.exit(1)
|
|
73
|
|
74 def check_data(data, chrom_len, dbkey):
|
|
75 error = 0
|
|
76 for chrom in data:
|
|
77 chrom_beg = 0
|
|
78 chrom_end = chrom_len[chrom]
|
|
79 for individual in data[chrom]:
|
|
80 for p1, p2, state in data[chrom][individual]:
|
|
81 if p1 >= p2:
|
|
82 print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual)
|
|
83 error = 1
|
|
84 if p1 < chrom_beg or p2 > chrom_end:
|
|
85 print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual)
|
|
86 error = 1
|
|
87 if error:
|
|
88 sys.exit(1)
|
|
89
|
|
90 def make_rectangle(p1, p2, color, bottom=0.0, top=1.0):
|
|
91 verts = [
|
|
92 (p1, bottom), # left, bottom
|
|
93 (p1, top), # left, top
|
|
94 (p2, top), # right, top
|
|
95 (p2, bottom), # right, bottom
|
|
96 (0.0, 0.0) # ignored
|
|
97 ]
|
|
98
|
|
99 codes = [
|
|
100 Path.MOVETO,
|
|
101 Path.LINETO,
|
|
102 Path.LINETO,
|
|
103 Path.LINETO,
|
|
104 Path.CLOSEPOLY
|
|
105 ]
|
|
106
|
|
107 path = Path(verts, codes)
|
|
108 return patches.PathPatch(path, facecolor=color, lw=0)
|
|
109
|
|
110 def make_split_rectangle(p1, p2, top_color, bottom_color):
|
|
111 patch1 = make_rectangle(p1, p2, bottom_color, top=0.5)
|
|
112 patch2 = make_rectangle(p1, p2, top_color, bottom=0.5)
|
|
113 return [patch1, patch2]
|
|
114
|
|
115 def make_state_rectangle(p1, p2, state, chrom, individual):
|
|
116 if state == 0:
|
|
117 return [ make_rectangle(p1, p2, 'r') ]
|
|
118 elif state == 1:
|
|
119 return make_split_rectangle(p1, p2, 'r', 'g')
|
|
120 elif state == 2:
|
|
121 return [ make_rectangle(p1, p2, 'g') ]
|
|
122 else:
|
|
123 print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual)
|
|
124 sys.exit(1)
|
|
125
|
|
126 def nicenum(num, round=False):
|
|
127 if num == 0:
|
|
128 return 0.0
|
|
129
|
|
130 exp = int(math.floor(math.log10(num)))
|
|
131 f = num / math.pow(10, exp)
|
|
132
|
|
133 if round:
|
|
134 if f < 1.5:
|
|
135 nf = 1.0
|
|
136 elif f < 3.0:
|
|
137 nf = 2.0
|
|
138 elif f < 7.0:
|
|
139 nf = 5.0
|
|
140 else:
|
|
141 nf = 10.0
|
|
142 else:
|
|
143 if f <= 1.0:
|
|
144 nf = 1.0
|
|
145 elif f <= 2.0:
|
|
146 nf = 2.0
|
|
147 elif f <= 5.0:
|
|
148 nf = 5.0
|
|
149 else:
|
|
150 nf = 10.0
|
|
151
|
|
152 return nf * pow(10, exp)
|
|
153
|
|
154 def tick_foo(beg, end, loose=False):
|
|
155 ntick = 10
|
|
156
|
|
157 range = nicenum(end - beg, round=False)
|
|
158 d = nicenum(range/(ntick - 1), round=True)
|
|
159 digits = int(math.floor(math.log10(d)))
|
|
160
|
|
161 if loose:
|
|
162 graph_min = math.floor(beg/d) * d
|
|
163 graph_max = math.ceil(end/d) * d
|
|
164 else:
|
|
165 graph_min = beg
|
|
166 graph_max = end
|
|
167
|
|
168 nfrac = max([-1 * digits, 0])
|
|
169 vals = []
|
|
170
|
|
171 stop = graph_max
|
|
172 if loose:
|
|
173 stop = graph_max + (0.5 * d)
|
|
174
|
|
175 x = graph_min
|
|
176 while x <= stop:
|
|
177 vals.append(int(x))
|
|
178 x += d
|
|
179
|
|
180 vals = vals[1:]
|
|
181
|
|
182 # if not loose:
|
|
183 # if vals[-1] < graph_max:
|
|
184 # vals.append(int(graph_max))
|
|
185
|
|
186 labels = []
|
|
187 for val in vals:
|
|
188 labels.append('{0}'.format(int(val/math.pow(10, digits))))
|
|
189
|
|
190 # labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits)))
|
|
191
|
|
192 return vals, labels
|
|
193
|
|
194 ################################################################################
|
|
195
|
|
196 def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir):
|
|
197 fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir)
|
|
198 chroms, individuals, data, chrom_len = parse_input_file(input_file)
|
|
199
|
|
200 for chrom in chrom_len.keys():
|
|
201 if chrom in fs_chrom_len:
|
|
202 chrom_len[chrom] = fs_chrom_len[chrom]
|
|
203
|
|
204 #check_chroms(chroms, chrom_len, input_dbkey)
|
|
205 check_data(data, chrom_len, input_dbkey)
|
|
206
|
|
207 ## units below are inches
|
|
208 top_space = 0.10
|
|
209 chrom_space = 0.25
|
|
210 chrom_height = 0.25
|
|
211 ind_space = 0.10
|
|
212 ind_height = 0.25
|
|
213
|
|
214 total_height = 0.0
|
|
215 at_top = True
|
|
216 for chrom in chroms:
|
|
217 if at_top:
|
|
218 total_height += (top_space + chrom_height)
|
|
219 at_top = False
|
|
220 else:
|
|
221 total_height += (top_space + chrom_space + chrom_height)
|
|
222
|
|
223 individual_count = 0
|
|
224 for individual in individuals:
|
|
225 if individual in data[chrom]:
|
|
226 individual_count += 1
|
|
227 total_height += individual_count * (ind_space + ind_height)
|
|
228
|
|
229 width = 7.5
|
|
230 height = math.ceil(total_height)
|
|
231
|
|
232 bottom = 1.0
|
|
233
|
|
234 fig = plt.figure(figsize=(width, height))
|
|
235
|
|
236 at_top = True
|
|
237 for_webb = False
|
|
238
|
|
239 for chrom in chroms:
|
|
240 length = chrom_len[chrom]
|
|
241 vals, labels = tick_foo(0, length)
|
|
242
|
|
243 if at_top:
|
|
244 bottom -= (top_space + chrom_height)/height
|
|
245 at_top = False
|
|
246 else:
|
|
247 bottom -= (top_space + chrom_space + chrom_height)/height
|
|
248
|
|
249 if not for_webb:
|
|
250 ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height])
|
|
251 plt.axis('off')
|
|
252 plt.text(0.5, 0.5, chrom, fontsize=14, ha='center')
|
|
253
|
|
254 individual_count = 0
|
|
255 for individual in individuals:
|
|
256 if individual in data[chrom]:
|
|
257 individual_count += 1
|
|
258
|
|
259 i = 0
|
|
260 for individual in individuals:
|
|
261 if individual in data[chrom]:
|
|
262 i += 1
|
|
263
|
|
264 bottom -= (ind_space + ind_height)/height
|
|
265 if not for_webb:
|
|
266 # [left, bottom, width, height]
|
|
267 ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height])
|
|
268 plt.axis('off')
|
|
269 plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center')
|
|
270 # [left, bottom, width, height]
|
|
271 ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False)
|
|
272 ax2.set_xlim(0, length)
|
|
273 ax2.set_ylim(0, 1)
|
|
274 if i != individual_count:
|
|
275 plt.axis('off')
|
|
276 else:
|
|
277 if not for_webb:
|
|
278 ax2.tick_params(top=False, left=False, right=False, labelleft=False)
|
|
279 ax2.set_xticks(vals)
|
|
280 ax2.set_xticklabels(labels)
|
|
281 else:
|
|
282 plt.axis('off')
|
|
283 for p1, p2, state in sorted(data[chrom][individual]):
|
|
284 for patch in make_state_rectangle(p1, p2, state, chrom, individual):
|
|
285 ax2.add_patch(patch)
|
|
286
|
|
287 plt.savefig(output_file)
|
|
288
|
|
289 ################################################################################
|
|
290
|
|
291 if __name__ == '__main__':
|
|
292 input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5]
|
|
293 make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir)
|
|
294 sys.exit(0)
|
|
295
|