Mercurial > repos > sagun98 > micropita_v2
comparison src/breadcrumbs/hclust/hclust.py @ 0:0de566f21448 draft default tip
v2
author | sagun98 |
---|---|
date | Thu, 03 Jun 2021 18:13:32 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0de566f21448 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys | |
4 import numpy as np | |
5 import matplotlib | |
6 matplotlib.use('Agg') | |
7 import scipy | |
8 import pylab | |
9 import scipy.cluster.hierarchy as sch | |
10 import scipy.spatial.distance as dis | |
11 from scipy import stats | |
12 | |
13 # User defined color maps (in addition to matplotlib ones) | |
14 bbcyr = {'red': ( (0.0, 0.0, 0.0), | |
15 (0.25, 0.0, 0.0), | |
16 (0.50, 0.0, 0.0), | |
17 (0.75, 1.0, 1.0), | |
18 (1.0, 1.0, 1.0)), | |
19 'green': ( (0.0, 0.0, 0.0), | |
20 (0.25, 0.0, 0.0), | |
21 (0.50, 1.0, 1.0), | |
22 (0.75, 1.0, 1.0), | |
23 (1.0, 0.0, 1.0)), | |
24 'blue': ( (0.0, 0.0, 0.0), | |
25 (0.25, 1.0, 1.0), | |
26 (0.50, 1.0, 1.0), | |
27 (0.75, 0.0, 0.0), | |
28 (1.0, 0.0, 1.0))} | |
29 | |
30 bbcry = {'red': ( (0.0, 0.0, 0.0), | |
31 (0.25, 0.0, 0.0), | |
32 (0.50, 0.0, 0.0), | |
33 (0.75, 1.0, 1.0), | |
34 (1.0, 1.0, 1.0)), | |
35 'green': ( (0.0, 0.0, 0.0), | |
36 (0.25, 0.0, 0.0), | |
37 (0.50, 1.0, 1.0), | |
38 (0.75, 0.0, 0.0), | |
39 (1.0, 1.0, 1.0)), | |
40 'blue': ( (0.0, 0.0, 0.0), | |
41 (0.25, 1.0, 1.0), | |
42 (0.50, 1.0, 1.0), | |
43 (0.75, 0.0, 0.0), | |
44 (1.0, 0.0, 1.0))} | |
45 my_colormaps = [ ('bbcyr',bbcyr), | |
46 ('bbcry',bbcry)] | |
47 | |
48 | |
49 | |
50 def read_params(args): | |
51 import argparse as ap | |
52 import textwrap | |
53 | |
54 p = ap.ArgumentParser( description= "TBA" ) | |
55 | |
56 p.add_argument( '--in', '--inp', metavar='INPUT_FILE', type=str, | |
57 nargs='?', default=sys.stdin, | |
58 help= "the input archive " ) | |
59 | |
60 p.add_argument( '--out', metavar='OUTPUT_FILE', type=str, | |
61 nargs = '?', default=None, | |
62 help= " the output file, image on screen" | |
63 " if not specified. " ) | |
64 | |
65 p.add_argument( '-m', metavar='method', type=str, | |
66 choices=[ "single","complete","average", | |
67 "weighted","centroid","median", | |
68 "ward" ], | |
69 default="average" ) | |
70 | |
71 dist_funcs = [ "euclidean","minkowski","cityblock","seuclidean", | |
72 "sqeuclidean","cosine","correlation","hamming", | |
73 "jaccard","chebyshev","canberra","braycurtis", | |
74 "mahalanobis","yule","matching","dice", | |
75 "kulsinski","rogerstanimoto","russellrao","sokalmichener", | |
76 "sokalsneath","wminkowski","ward"] | |
77 p.add_argument( '-d', metavar='distance function', type=str, | |
78 choices=dist_funcs, | |
79 default="euclidean" ) | |
80 p.add_argument( '-f', metavar='distance function for features', type=str, | |
81 choices=dist_funcs, | |
82 default="d" ) | |
83 | |
84 p.add_argument( '--dmf', metavar='distance matrix for features', type=str, | |
85 default = None ) | |
86 p.add_argument( '--dms', metavar='distance matrix for samples', type=str, | |
87 default = None ) | |
88 | |
89 | |
90 p.add_argument( '-l', metavar='sample label', type=str, | |
91 default = None ) | |
92 | |
93 p.add_argument( '-s', metavar='scale norm', type=str, | |
94 default = 'lin', choices = ['log','lin']) | |
95 | |
96 p.add_argument( '-x', metavar='x cell width', type=float, | |
97 default = 0.1) | |
98 p.add_argument( '-y', metavar='y cell width', type=float, | |
99 default = 0.1 ) | |
100 | |
101 p.add_argument( '--minv', metavar='min value', type=float, | |
102 default = 0.0 ) | |
103 p.add_argument( '--maxv', metavar='max value', type=float, | |
104 default = None ) | |
105 | |
106 p.add_argument( '--xstart', metavar='x coordinate of the top left cell ' | |
107 'of the values', | |
108 type=int, default=1 ) | |
109 p.add_argument( '--ystart', metavar='y coordinate of the top left cell ' | |
110 'of the values', | |
111 type=int, default=1 ) | |
112 p.add_argument( '--xstop', metavar='x coordinate of the bottom right cell ' | |
113 'of the values (default None = last row)', | |
114 type=int, default=None ) | |
115 p.add_argument( '--ystop', metavar='y coordinate of the bottom right cell ' | |
116 'of the values (default None = last column)', | |
117 type=int, default=None ) | |
118 | |
119 p.add_argument( '--perc', metavar='percentile for ordering and rows selection', type=int, default=None ) | |
120 p.add_argument( '--top', metavar='selection of the top N rows', type=int, default=None ) | |
121 p.add_argument( '--norm', metavar='whether to normalize columns (default 0)', type=int, default=0 ) | |
122 | |
123 p.add_argument( '--sdend_h', metavar='height of the sample dendrogram', type=float, | |
124 default = 0.1 ) | |
125 p.add_argument( '--fdend_w', metavar='width of the feature dendrogram', type=float, | |
126 default = 0.1 ) | |
127 p.add_argument( '--cm_h', metavar='height of the colormap', type=float, | |
128 default = 0.03 ) | |
129 p.add_argument( '--cm_ticks', metavar='label for ticks of the colormap', type=str, | |
130 default = None ) | |
131 | |
132 p.add_argument( '--font_size', metavar='label_font_size', type=int, | |
133 default = 7 ) | |
134 p.add_argument( '--feat_dend_col_th', metavar='Color threshold for feature dendrogram', type=float, | |
135 default = None ) | |
136 p.add_argument( '--sample_dend_col_th', metavar='Color threshold for sample dendrogram', type=float, | |
137 default = None ) | |
138 p.add_argument( '--clust_ncols', metavar='Number of colors for clusters', type=int, | |
139 default = 7 ) | |
140 p.add_argument( '--clust_line_w', metavar='Cluster line width', type=float, | |
141 default = 1.0 ) | |
142 p.add_argument( '--label_cols', metavar='Label colors', type=str, | |
143 default = None ) | |
144 p.add_argument( '--label2cols', metavar='Label to colors mapping file', type=str, | |
145 default = None ) | |
146 p.add_argument( '--sdend_out', metavar='File for storing the samples dendrogram in PhyloXML format', type=str, | |
147 default = None ) | |
148 p.add_argument( '--fdend_out', metavar='File for storing the features dendrogram in PhyloXML format', type=str, | |
149 default = None ) | |
150 | |
151 | |
152 p.add_argument( '--pad_inches', metavar='Proportion of figure to be left blank around the plot', type=float, | |
153 default = 0.1 ) | |
154 | |
155 | |
156 p.add_argument( '--flabel', metavar='Whether to show the labels for the features', type=int, | |
157 default = 0 ) | |
158 p.add_argument( '--slabel', metavar='Whether to show the labels for the samples', type=int, | |
159 default = 0 ) | |
160 | |
161 p.add_argument( '--legend', metavar='Whether to show the samples to label legend', type=int, | |
162 default = 0 ) | |
163 p.add_argument( '--legend_font_size', metavar='Legend font size', type=int, | |
164 default = 7 ) | |
165 p.add_argument( '--legend_ncol', metavar='Number of columns for the legend', type=int, | |
166 default = 3 ) | |
167 p.add_argument( '--grid', metavar='Whether to show the grid (only black for now)', type=int, | |
168 default = 0 ) | |
169 | |
170 col_maps = ['Accent', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'Dark2', 'GnBu', | |
171 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'Paired', | |
172 'Pastel1', 'Pastel2', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr', | |
173 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn', | |
174 'Reds', 'Set1', 'Set2', 'Set3', 'Spectral', 'YlGn', 'YlGnBu', | |
175 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone', | |
176 'brg', 'bwr', 'cool', 'copper', 'flag', 'gist_earth', | |
177 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow', | |
178 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray', | |
179 'hot', 'hsv', 'jet', 'ocean', 'pink', 'prism', 'rainbow', | |
180 'seismic', 'spectral', 'spring', 'summer', 'terrain', 'winter'] + [n for n,c in my_colormaps] | |
181 p.add_argument( '-c', metavar='colormap', type=str, | |
182 choices = col_maps, default = 'jet' ) | |
183 | |
184 return vars(p.parse_args()) | |
185 | |
186 # Predefined colors for dendrograms brances and class labels | |
187 colors = [ "#B22222","#006400","#0000CD","#9400D3","#696969","#8B4513", | |
188 "#FF1493","#FF8C00","#3CB371","#00Bfff","#CDC9C9","#FFD700", | |
189 "#2F4F4F","#FF0000","#ADFF2F","#B03060" ] | |
190 | |
191 def samples2classes_panel(fig, samples, s2l, idx1, idx2, height, xsize, cols, legendon, fontsize, label2cols, legend_ncol ): | |
192 from matplotlib.patches import Rectangle | |
193 samples2labels = dict([(l[0],l[1]) | |
194 for l in [ll.strip().split('\t') | |
195 for ll in open(s2l)] if len(l) > 1]) | |
196 | |
197 if label2cols: | |
198 labels2colors = dict([(l[0],l[1]) for l in [ll.strip().split('\t') for ll in open(label2cols)]]) | |
199 else: | |
200 cs = cols if cols else colors | |
201 labels2colors = dict([(l,cs[i%len(cs)]) for i,l in enumerate(set(samples2labels.values()))]) | |
202 ax1 = fig.add_axes([0.,1.0,1.0,height],frameon=False) | |
203 ax1.set_xticks([]) | |
204 ax1.set_yticks([]) | |
205 ax1.set_ylim( [0.0, height] ) | |
206 ax1.set_xlim( [0.0, xsize] ) | |
207 step = xsize / float(len(samples)) | |
208 labels = set() | |
209 added_labels = set() | |
210 for i,ind in enumerate(idx2): | |
211 if not samples[ind] in samples2labels or \ | |
212 not samples2labels[samples[ind]] in labels2colors: | |
213 fc, ll = "k", None | |
214 else: | |
215 ll = samples2labels[samples[ind]] | |
216 ll = None if ll in added_labels else ll | |
217 added_labels.add( ll ) | |
218 fc = labels2colors[samples2labels[samples[ind]]] | |
219 | |
220 rect = Rectangle( [float(i)*step, 0.0], step, height, | |
221 facecolor = fc, | |
222 label = ll, | |
223 edgecolor='b', lw = 0.0) | |
224 labels.add( ll ) | |
225 ax1.add_patch(rect) | |
226 ax1.autoscale_view() | |
227 | |
228 if legendon: | |
229 ax1.legend( loc = 2, ncol = legend_ncol, bbox_to_anchor=(1.01, 3.), | |
230 borderpad = 0.0, labelspacing = 0.0, | |
231 handlelength = 0.5, handletextpad = 0.3, | |
232 borderaxespad = 0.0, columnspacing = 0.3, | |
233 prop = {'size':fontsize}, frameon = False) | |
234 | |
235 def samples_dend_panel( fig, Z, Z2, ystart, ylen, lw ): | |
236 ax2 = fig.add_axes([0.0,1.0+ystart,1.0,ylen], frameon=False) | |
237 Z2['color_list'] = [c.replace('b','k') for c in Z2['color_list']] | |
238 mh = max(Z[:,2]) | |
239 sch._plot_dendrogram( Z2['icoord'], Z2['dcoord'], Z2['ivl'], | |
240 Z.shape[0] + 1, Z.shape[0] + 1, | |
241 mh, 'top', no_labels=True, | |
242 color_list=Z2['color_list'] ) | |
243 for coll in ax2.collections: | |
244 coll._linewidths = (lw,) | |
245 ax2.set_xticks([]) | |
246 ax2.set_yticks([]) | |
247 ax2.set_xticklabels([]) | |
248 | |
249 def features_dend_panel( fig, Z, Z2, width, lw ): | |
250 ax1 = fig.add_axes([-width,0.0,width,1.0], frameon=False) | |
251 Z2['color_list'] = [c.replace('b','k').replace('x','b') for c in Z2['color_list']] | |
252 mh = max(Z[:,2]) | |
253 sch._plot_dendrogram(Z2['icoord'], Z2['dcoord'], Z2['ivl'], Z.shape[0] + 1, Z.shape[0] + 1, mh, 'right', no_labels=True, color_list=Z2['color_list']) | |
254 for coll in ax1.collections: | |
255 coll._linewidths = (lw,) | |
256 ax1.set_xticks([]) | |
257 ax1.set_yticks([]) | |
258 ax1.set_xticklabels([]) | |
259 | |
260 | |
261 def add_cmap( cmapdict, name ): | |
262 my_cmap = matplotlib.colors.LinearSegmentedColormap(name,cmapdict,256) | |
263 pylab.register_cmap(name=name,cmap=my_cmap) | |
264 | |
265 def init_fig(xsize,ysize,ncol): | |
266 fig = pylab.figure(figsize=(xsize,ysize)) | |
267 sch._link_line_colors = colors[:ncol] | |
268 return fig | |
269 | |
270 def heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, cols, rows, label_font_size, cb_offset, cb_l, flabelson, slabelson, cm_ticks, gridon, bar_offset ): | |
271 cm = pylab.get_cmap(cm_name) | |
272 bottom_col = [ cm._segmentdata['red'][0][1], | |
273 cm._segmentdata['green'][0][1], | |
274 cm._segmentdata['blue'][0][1] ] | |
275 axmatrix = fig.add_axes( [0.0,0.0,1.0,1.0], | |
276 axisbg=bottom_col) | |
277 if any([c < 0.95 for c in bottom_col]): | |
278 axmatrix.spines['right'].set_color('none') | |
279 axmatrix.spines['left'].set_color('none') | |
280 axmatrix.spines['top'].set_color('none') | |
281 axmatrix.spines['bottom'].set_color('none') | |
282 norm_f = matplotlib.colors.LogNorm if scale == 'log' else matplotlib.colors.Normalize | |
283 im = axmatrix.matshow( D, norm = norm_f( vmin=minv if minv > 0.0 else None, | |
284 vmax=maxv), | |
285 aspect='auto', origin='lower', cmap=cm, vmax=maxv) | |
286 | |
287 axmatrix2 = axmatrix.twinx() | |
288 axmatrix3 = axmatrix.twiny() | |
289 | |
290 axmatrix.set_xticks([]) | |
291 axmatrix2.set_xticks([]) | |
292 axmatrix3.set_xticks([]) | |
293 axmatrix.set_yticks([]) | |
294 axmatrix2.set_yticks([]) | |
295 axmatrix3.set_yticks([]) | |
296 | |
297 axmatrix.set_xticklabels([]) | |
298 axmatrix2.set_xticklabels([]) | |
299 axmatrix3.set_xticklabels([]) | |
300 axmatrix.set_yticklabels([]) | |
301 axmatrix2.set_yticklabels([]) | |
302 axmatrix3.set_yticklabels([]) | |
303 | |
304 if any([c < 0.95 for c in bottom_col]): | |
305 axmatrix2.spines['right'].set_color('none') | |
306 axmatrix2.spines['left'].set_color('none') | |
307 axmatrix2.spines['top'].set_color('none') | |
308 axmatrix2.spines['bottom'].set_color('none') | |
309 if any([c < 0.95 for c in bottom_col]): | |
310 axmatrix3.spines['right'].set_color('none') | |
311 axmatrix3.spines['left'].set_color('none') | |
312 axmatrix3.spines['top'].set_color('none') | |
313 axmatrix3.spines['bottom'].set_color('none') | |
314 if flabelson: | |
315 axmatrix2.set_yticks(np.arange(len(rows))+0.5) | |
316 axmatrix2.set_yticklabels([rows[r] for r in idx1],size=label_font_size,va='center') | |
317 if slabelson: | |
318 axmatrix.set_xticks(np.arange(len(cols))) | |
319 axmatrix.set_xticklabels([cols[r] for r in idx2],size=label_font_size,rotation=90,va='top',ha='center') | |
320 axmatrix.tick_params(length=0) | |
321 axmatrix2.tick_params(length=0) | |
322 axmatrix3.tick_params(length=0) | |
323 axmatrix2.set_ylim(0,len(rows)) | |
324 | |
325 if gridon: | |
326 axmatrix.set_yticks(np.arange(len(idx1)-1)+0.5) | |
327 axmatrix.set_xticks(np.arange(len(idx2))+0.5) | |
328 axmatrix.grid( True ) | |
329 ticklines = axmatrix.get_xticklines() | |
330 ticklines.extend( axmatrix.get_yticklines() ) | |
331 #gridlines = axmatrix.get_xgridlines() | |
332 #gridlines.extend( axmatrix.get_ygridlines() ) | |
333 | |
334 for line in ticklines: | |
335 line.set_linewidth(3) | |
336 | |
337 if cb_l > 0.0: | |
338 axcolor = fig.add_axes([0.0,1.0+bar_offset*1.25,1.0,cb_l]) | |
339 cbar = fig.colorbar(im, cax=axcolor, orientation='horizontal') | |
340 cbar.ax.tick_params(labelsize=label_font_size) | |
341 if cm_ticks: | |
342 cbar.ax.set_xticklabels( cm_ticks.split(":") ) | |
343 | |
344 | |
345 def read_table( fin, xstart,xstop,ystart,ystop, percentile = None, top = None, norm = False ): | |
346 mat = [l.rstrip().split('\t') for l in open( fin )] | |
347 | |
348 if fin.endswith(".biom"): | |
349 sample_labels = mat[1][1:-1] | |
350 m = [(mm[-1]+"; OTU"+mm[0],np.array([float(f) for f in mm[1:-1]])) for mm in mat[2:]] | |
351 #feat_labels = [m[-1].replace(";","_").replace(" ","")+m[0] for m in mat[2:]] | |
352 #print len(feat_labels) | |
353 #print len(sample_labels) | |
354 else: | |
355 sample_labels = mat[0][xstart:xstop] | |
356 m = [(mm[xstart-1],np.array([float(f) for f in mm[xstart:xstop]])) for mm in mat[ystart:ystop]] | |
357 | |
358 if norm: | |
359 msums = [0.0 for l in m[0][1]] | |
360 for mm in m: | |
361 for i,v in enumerate(mm[1]): | |
362 msums[i] += v | |
363 | |
364 if top and not percentile: | |
365 percentile = 90 | |
366 | |
367 if percentile: | |
368 m = sorted(m,key=lambda x:-stats.scoreatpercentile(x[1],percentile)) | |
369 if top: | |
370 if fin.endswith(".biom"): | |
371 #feat_labels = [mm[-1].replace(";","_").replace(" ","")+mm[0] for mm in m[:top]] | |
372 feat_labels = [mm[0] for mm in m[:top]] | |
373 else: | |
374 feat_labels = [mm[0] for mm in m[:top]] | |
375 if norm: | |
376 m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m[:top]] | |
377 else: | |
378 m = [mm[1] for mm in m[:top]] | |
379 else: | |
380 if fin.endswith(".biom"): | |
381 feat_labels = [mm[0] for mm in m] | |
382 else: | |
383 feat_labels = [mm[0] for mm in m] | |
384 if norm: | |
385 m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m] | |
386 else: | |
387 m = [mm[1] for mm in m] | |
388 #m = [mm[1] for mm in m] | |
389 | |
390 D = np.matrix( np.array( m ) ) | |
391 | |
392 return D, feat_labels, sample_labels | |
393 | |
394 def read_dm( fin, n ): | |
395 mat = [[float(f) for f in l.strip().split('\t')] for l in open( fin )] | |
396 nc = sum([len(r) for r in mat]) | |
397 | |
398 if nc == n*n: | |
399 dm = [] | |
400 for i in range(n): | |
401 dm += mat[i][i+1:] | |
402 return np.array(dm) | |
403 if nc == (n*n-n)/2: | |
404 dm = [] | |
405 for i in range(n): | |
406 dm += mat[i] | |
407 return np.array(dm) | |
408 sys.stderr.write( "Error in reading the distance matrix\n" ) | |
409 sys.exit() | |
410 | |
411 | |
412 def exp_newick( inp, labels, outfile, tree_format = 'phyloxml' ): | |
413 n_leaves = int(inp[-1][-1]) | |
414 from Bio import Phylo | |
415 import collections | |
416 from Bio.Phylo.BaseTree import Tree as BTree | |
417 from Bio.Phylo.BaseTree import Clade as BClade | |
418 tree = BTree() | |
419 tree.root = BClade() | |
420 | |
421 subclades = {} | |
422 sb_cbl = {} | |
423 | |
424 for i,(fr,to,bl,nsub) in enumerate( inp ): | |
425 if fr < n_leaves: | |
426 fr_c = BClade(branch_length=-1.0,name=labels[int(fr)]) | |
427 subclades[fr] = fr_c | |
428 sb_cbl[fr] = bl | |
429 if to < n_leaves: | |
430 to_c = BClade(branch_length=-1.0,name=labels[int(to)]) | |
431 subclades[to] = to_c | |
432 sb_cbl[to] = bl | |
433 for i,(fr,to,bl,nsub) in enumerate( inp ): | |
434 fr_c = subclades[fr] | |
435 to_c = subclades[to] | |
436 cur_c = BClade(branch_length=bl) | |
437 cur_c.clades.append( fr_c ) | |
438 cur_c.clades.append( to_c ) | |
439 subclades[i+n_leaves] = cur_c | |
440 | |
441 def reset_rec( clade, fath_bl ): | |
442 if clade.branch_length < 0: | |
443 clade.branch_length = fath_bl | |
444 return | |
445 for c in clade.clades: | |
446 reset_rec( c, clade.branch_length ) | |
447 clade.branch_length = fath_bl-clade.branch_length | |
448 | |
449 tree.root = cur_c | |
450 reset_rec( tree.root, 0.0 ) | |
451 tree.root.branch_length = 0.0 | |
452 Phylo.write(tree, outfile, tree_format ) | |
453 | |
454 def hclust( fin, fout, | |
455 method = "average", | |
456 dist_func = "euclidean", | |
457 feat_dist_func = "d", | |
458 xcw = 0.1, | |
459 ycw = 0.1, | |
460 scale = 'lin', | |
461 minv = 0.0, | |
462 maxv = None, | |
463 xstart = 1, | |
464 ystart = 1, | |
465 xstop = None, | |
466 ystop = None, | |
467 percentile = None, | |
468 top = None, | |
469 norm = False, | |
470 cm_name = 'jet', | |
471 s2l = None, | |
472 label_font_size = 7, | |
473 feat_dend_col_th = None, | |
474 sample_dend_col_th = None, | |
475 clust_ncols = 7, | |
476 clust_line_w = 1.0, | |
477 label_cols = None, | |
478 sdend_h = 0.1, | |
479 fdend_w = 0.1, | |
480 cm_h = 0.03, | |
481 dmf = None, | |
482 dms = None, | |
483 legendon = False, | |
484 label2cols = None, | |
485 flabelon = True, | |
486 slabelon = True, | |
487 cm_ticks = None, | |
488 legend_ncol = 3, | |
489 pad_inches = None, | |
490 legend_font_size = 7, | |
491 gridon = 0, | |
492 sdend_out = None, | |
493 fdend_out= None): | |
494 | |
495 if label_cols and label_cols.count("-"): | |
496 label_cols = label_cols.split("-") | |
497 | |
498 for n,c in my_colormaps: | |
499 add_cmap( c, n ) | |
500 | |
501 if feat_dist_func == 'd': | |
502 feat_dist_func = dist_func | |
503 | |
504 D, feat_labels, sample_labels = read_table(fin,xstart,xstop,ystart,ystop,percentile,top,norm) | |
505 | |
506 ylen,xlen = D[:].shape | |
507 Dt = D.transpose() | |
508 | |
509 size_cx, size_cy = xcw, ycw | |
510 | |
511 xsize, ysize = max(xlen*size_cx,2.0), max(ylen*size_cy,2.0) | |
512 ydend_offset = 0.025*8.0/ysize if s2l else 0.0 | |
513 | |
514 fig = init_fig(xsize,ysize,clust_ncols) | |
515 | |
516 nfeats, nsamples = len(D), len(Dt) | |
517 | |
518 if dmf: | |
519 p1 = read_dm( dmf, nfeats ) | |
520 Y1 = sch.linkage( p1, method=method ) | |
521 else: | |
522 p1 = dis.pdist( D, feat_dist_func ) | |
523 Y1 = sch.linkage( p1, method=method ) # , metric=feat_dist_func ) | |
524 #Y1 = sch.linkage( D, method=method, metric=feat_dist_func ) | |
525 Z1 = sch.dendrogram(Y1, no_plot=True, color_threshold=feat_dend_col_th) | |
526 | |
527 if fdend_out: | |
528 exp_newick( Y1, feat_labels, fdend_out ) | |
529 | |
530 if dms: | |
531 p2 = read_dm( dms, nsamples ) | |
532 Y2 = sch.linkage( p2, method=method ) | |
533 else: | |
534 p2 = dis.pdist( Dt, dist_func ) | |
535 Y2 = sch.linkage( p2, method=method ) # , metric=dist_func ) | |
536 #Y2 = sch.linkage( Dt, method=method, metric=dist_func ) | |
537 Z2 = sch.dendrogram(Y2, no_plot=True, color_threshold=sample_dend_col_th) | |
538 | |
539 if sdend_out: | |
540 exp_newick( Y2, sample_labels, sdend_out ) | |
541 | |
542 if fdend_w > 0.0: | |
543 features_dend_panel(fig, Y1, Z1, fdend_w*8.0/xsize, clust_line_w ) | |
544 if sdend_h > 0.0: | |
545 samples_dend_panel(fig, Y2, Z2, ydend_offset, sdend_h*8.0/ysize, clust_line_w) | |
546 | |
547 idx1, idx2 = Z1['leaves'], Z2['leaves'] | |
548 D = D[idx1,:][:,idx2] | |
549 | |
550 if s2l: | |
551 samples2classes_panel( fig, sample_labels, s2l, idx1, idx2, 0.025*8.0/ysize, xsize, label_cols, legendon, legend_font_size, label2cols, legend_ncol ) | |
552 heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, sample_labels, feat_labels, label_font_size, -cm_h*8.0/ysize, cm_h*0.8*8.0/ysize, flabelon, slabelon, cm_ticks, gridon, ydend_offset+sdend_h*8.0/ysize ) | |
553 | |
554 fig.savefig( fout, bbox_inches='tight', | |
555 pad_inches = pad_inches, | |
556 dpi=300) if fout else pylab.show() | |
557 | |
558 if __name__ == '__main__': | |
559 pars = read_params( sys.argv ) | |
560 | |
561 hclust( fin = pars['in'], | |
562 fout = pars['out'], | |
563 method = pars['m'], | |
564 dist_func = pars['d'], | |
565 feat_dist_func = pars['f'], | |
566 xcw = pars['x'], | |
567 ycw = pars['y'], | |
568 scale = pars['s'], | |
569 minv = pars['minv'], | |
570 maxv = pars['maxv'], | |
571 xstart = pars['xstart'], | |
572 ystart = pars['ystart'], | |
573 xstop = pars['xstop'], | |
574 ystop = pars['ystop'], | |
575 percentile = pars['perc'], | |
576 top = pars['top'], | |
577 norm = pars['norm'], | |
578 cm_name = pars['c'], | |
579 s2l = pars['l'], | |
580 label_font_size = pars['font_size'], | |
581 feat_dend_col_th = pars['feat_dend_col_th'], | |
582 sample_dend_col_th = pars['sample_dend_col_th'], | |
583 clust_ncols = pars['clust_ncols'], | |
584 clust_line_w = pars['clust_line_w'], | |
585 label_cols = pars['label_cols'], | |
586 sdend_h = pars['sdend_h'], | |
587 fdend_w = pars['fdend_w'], | |
588 cm_h = pars['cm_h'], | |
589 dmf = pars['dmf'], | |
590 dms = pars['dms'], | |
591 legendon = pars['legend'], | |
592 label2cols = pars['label2cols'], | |
593 flabelon = pars['flabel'], | |
594 slabelon = pars['slabel'], | |
595 cm_ticks = pars['cm_ticks'], | |
596 legend_ncol = pars['legend_ncol'], | |
597 pad_inches = pars['pad_inches'], | |
598 legend_font_size = pars['legend_font_size'], | |
599 gridon = pars['grid'], | |
600 sdend_out = pars['sdend_out'], | |
601 fdend_out = pars['fdend_out'], | |
602 ) | |
603 |