0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys
|
|
4 import numpy as np
|
|
5 import matplotlib
|
|
6 matplotlib.use('Agg')
|
|
7 import scipy
|
|
8 import pylab
|
|
9 import scipy.cluster.hierarchy as sch
|
|
10 import scipy.spatial.distance as dis
|
|
11 from scipy import stats
|
|
12
|
|
13 # User defined color maps (in addition to matplotlib ones)
|
|
14 bbcyr = {'red': ( (0.0, 0.0, 0.0),
|
|
15 (0.25, 0.0, 0.0),
|
|
16 (0.50, 0.0, 0.0),
|
|
17 (0.75, 1.0, 1.0),
|
|
18 (1.0, 1.0, 1.0)),
|
|
19 'green': ( (0.0, 0.0, 0.0),
|
|
20 (0.25, 0.0, 0.0),
|
|
21 (0.50, 1.0, 1.0),
|
|
22 (0.75, 1.0, 1.0),
|
|
23 (1.0, 0.0, 1.0)),
|
|
24 'blue': ( (0.0, 0.0, 0.0),
|
|
25 (0.25, 1.0, 1.0),
|
|
26 (0.50, 1.0, 1.0),
|
|
27 (0.75, 0.0, 0.0),
|
|
28 (1.0, 0.0, 1.0))}
|
|
29
|
|
30 bbcry = {'red': ( (0.0, 0.0, 0.0),
|
|
31 (0.25, 0.0, 0.0),
|
|
32 (0.50, 0.0, 0.0),
|
|
33 (0.75, 1.0, 1.0),
|
|
34 (1.0, 1.0, 1.0)),
|
|
35 'green': ( (0.0, 0.0, 0.0),
|
|
36 (0.25, 0.0, 0.0),
|
|
37 (0.50, 1.0, 1.0),
|
|
38 (0.75, 0.0, 0.0),
|
|
39 (1.0, 1.0, 1.0)),
|
|
40 'blue': ( (0.0, 0.0, 0.0),
|
|
41 (0.25, 1.0, 1.0),
|
|
42 (0.50, 1.0, 1.0),
|
|
43 (0.75, 0.0, 0.0),
|
|
44 (1.0, 0.0, 1.0))}
|
|
45 my_colormaps = [ ('bbcyr',bbcyr),
|
|
46 ('bbcry',bbcry)]
|
|
47
|
|
48
|
|
49
|
|
50 def read_params(args):
|
|
51 import argparse as ap
|
|
52 import textwrap
|
|
53
|
|
54 p = ap.ArgumentParser( description= "TBA" )
|
|
55
|
|
56 p.add_argument( '--in', '--inp', metavar='INPUT_FILE', type=str,
|
|
57 nargs='?', default=sys.stdin,
|
|
58 help= "the input archive " )
|
|
59
|
|
60 p.add_argument( '--out', metavar='OUTPUT_FILE', type=str,
|
|
61 nargs = '?', default=None,
|
|
62 help= " the output file, image on screen"
|
|
63 " if not specified. " )
|
|
64
|
|
65 p.add_argument( '-m', metavar='method', type=str,
|
|
66 choices=[ "single","complete","average",
|
|
67 "weighted","centroid","median",
|
|
68 "ward" ],
|
|
69 default="average" )
|
|
70
|
|
71 dist_funcs = [ "euclidean","minkowski","cityblock","seuclidean",
|
|
72 "sqeuclidean","cosine","correlation","hamming",
|
|
73 "jaccard","chebyshev","canberra","braycurtis",
|
|
74 "mahalanobis","yule","matching","dice",
|
|
75 "kulsinski","rogerstanimoto","russellrao","sokalmichener",
|
|
76 "sokalsneath","wminkowski","ward"]
|
|
77 p.add_argument( '-d', metavar='distance function', type=str,
|
|
78 choices=dist_funcs,
|
|
79 default="euclidean" )
|
|
80 p.add_argument( '-f', metavar='distance function for features', type=str,
|
|
81 choices=dist_funcs,
|
|
82 default="d" )
|
|
83
|
|
84 p.add_argument( '--dmf', metavar='distance matrix for features', type=str,
|
|
85 default = None )
|
|
86 p.add_argument( '--dms', metavar='distance matrix for samples', type=str,
|
|
87 default = None )
|
|
88
|
|
89
|
|
90 p.add_argument( '-l', metavar='sample label', type=str,
|
|
91 default = None )
|
|
92
|
|
93 p.add_argument( '-s', metavar='scale norm', type=str,
|
|
94 default = 'lin', choices = ['log','lin'])
|
|
95
|
|
96 p.add_argument( '-x', metavar='x cell width', type=float,
|
|
97 default = 0.1)
|
|
98 p.add_argument( '-y', metavar='y cell width', type=float,
|
|
99 default = 0.1 )
|
|
100
|
|
101 p.add_argument( '--minv', metavar='min value', type=float,
|
|
102 default = 0.0 )
|
|
103 p.add_argument( '--maxv', metavar='max value', type=float,
|
|
104 default = None )
|
|
105
|
|
106 p.add_argument( '--xstart', metavar='x coordinate of the top left cell '
|
|
107 'of the values',
|
|
108 type=int, default=1 )
|
|
109 p.add_argument( '--ystart', metavar='y coordinate of the top left cell '
|
|
110 'of the values',
|
|
111 type=int, default=1 )
|
|
112 p.add_argument( '--xstop', metavar='x coordinate of the bottom right cell '
|
|
113 'of the values (default None = last row)',
|
|
114 type=int, default=None )
|
|
115 p.add_argument( '--ystop', metavar='y coordinate of the bottom right cell '
|
|
116 'of the values (default None = last column)',
|
|
117 type=int, default=None )
|
|
118
|
|
119 p.add_argument( '--perc', metavar='percentile for ordering and rows selection', type=int, default=None )
|
|
120 p.add_argument( '--top', metavar='selection of the top N rows', type=int, default=None )
|
|
121 p.add_argument( '--norm', metavar='whether to normalize columns (default 0)', type=int, default=0 )
|
|
122
|
|
123 p.add_argument( '--sdend_h', metavar='height of the sample dendrogram', type=float,
|
|
124 default = 0.1 )
|
|
125 p.add_argument( '--fdend_w', metavar='width of the feature dendrogram', type=float,
|
|
126 default = 0.1 )
|
|
127 p.add_argument( '--cm_h', metavar='height of the colormap', type=float,
|
|
128 default = 0.03 )
|
|
129 p.add_argument( '--cm_ticks', metavar='label for ticks of the colormap', type=str,
|
|
130 default = None )
|
|
131
|
|
132 p.add_argument( '--font_size', metavar='label_font_size', type=int,
|
|
133 default = 7 )
|
|
134 p.add_argument( '--feat_dend_col_th', metavar='Color threshold for feature dendrogram', type=float,
|
|
135 default = None )
|
|
136 p.add_argument( '--sample_dend_col_th', metavar='Color threshold for sample dendrogram', type=float,
|
|
137 default = None )
|
|
138 p.add_argument( '--clust_ncols', metavar='Number of colors for clusters', type=int,
|
|
139 default = 7 )
|
|
140 p.add_argument( '--clust_line_w', metavar='Cluster line width', type=float,
|
|
141 default = 1.0 )
|
|
142 p.add_argument( '--label_cols', metavar='Label colors', type=str,
|
|
143 default = None )
|
|
144 p.add_argument( '--label2cols', metavar='Label to colors mapping file', type=str,
|
|
145 default = None )
|
|
146 p.add_argument( '--sdend_out', metavar='File for storing the samples dendrogram in PhyloXML format', type=str,
|
|
147 default = None )
|
|
148 p.add_argument( '--fdend_out', metavar='File for storing the features dendrogram in PhyloXML format', type=str,
|
|
149 default = None )
|
|
150
|
|
151
|
|
152 p.add_argument( '--pad_inches', metavar='Proportion of figure to be left blank around the plot', type=float,
|
|
153 default = 0.1 )
|
|
154
|
|
155
|
|
156 p.add_argument( '--flabel', metavar='Whether to show the labels for the features', type=int,
|
|
157 default = 0 )
|
|
158 p.add_argument( '--slabel', metavar='Whether to show the labels for the samples', type=int,
|
|
159 default = 0 )
|
|
160
|
|
161 p.add_argument( '--legend', metavar='Whether to show the samples to label legend', type=int,
|
|
162 default = 0 )
|
|
163 p.add_argument( '--legend_font_size', metavar='Legend font size', type=int,
|
|
164 default = 7 )
|
|
165 p.add_argument( '--legend_ncol', metavar='Number of columns for the legend', type=int,
|
|
166 default = 3 )
|
|
167 p.add_argument( '--grid', metavar='Whether to show the grid (only black for now)', type=int,
|
|
168 default = 0 )
|
|
169
|
|
170 col_maps = ['Accent', 'Blues', 'BrBG', 'BuGn', 'BuPu', 'Dark2', 'GnBu',
|
|
171 'Greens', 'Greys', 'OrRd', 'Oranges', 'PRGn', 'Paired',
|
|
172 'Pastel1', 'Pastel2', 'PiYG', 'PuBu', 'PuBuGn', 'PuOr',
|
|
173 'PuRd', 'Purples', 'RdBu', 'RdGy', 'RdPu', 'RdYlBu', 'RdYlGn',
|
|
174 'Reds', 'Set1', 'Set2', 'Set3', 'Spectral', 'YlGn', 'YlGnBu',
|
|
175 'YlOrBr', 'YlOrRd', 'afmhot', 'autumn', 'binary', 'bone',
|
|
176 'brg', 'bwr', 'cool', 'copper', 'flag', 'gist_earth',
|
|
177 'gist_gray', 'gist_heat', 'gist_ncar', 'gist_rainbow',
|
|
178 'gist_stern', 'gist_yarg', 'gnuplot', 'gnuplot2', 'gray',
|
|
179 'hot', 'hsv', 'jet', 'ocean', 'pink', 'prism', 'rainbow',
|
|
180 'seismic', 'spectral', 'spring', 'summer', 'terrain', 'winter'] + [n for n,c in my_colormaps]
|
|
181 p.add_argument( '-c', metavar='colormap', type=str,
|
|
182 choices = col_maps, default = 'jet' )
|
|
183
|
|
184 return vars(p.parse_args())
|
|
185
|
|
186 # Predefined colors for dendrograms brances and class labels
|
|
187 colors = [ "#B22222","#006400","#0000CD","#9400D3","#696969","#8B4513",
|
|
188 "#FF1493","#FF8C00","#3CB371","#00Bfff","#CDC9C9","#FFD700",
|
|
189 "#2F4F4F","#FF0000","#ADFF2F","#B03060" ]
|
|
190
|
|
191 def samples2classes_panel(fig, samples, s2l, idx1, idx2, height, xsize, cols, legendon, fontsize, label2cols, legend_ncol ):
|
|
192 from matplotlib.patches import Rectangle
|
|
193 samples2labels = dict([(l[0],l[1])
|
|
194 for l in [ll.strip().split('\t')
|
|
195 for ll in open(s2l)] if len(l) > 1])
|
|
196
|
|
197 if label2cols:
|
|
198 labels2colors = dict([(l[0],l[1]) for l in [ll.strip().split('\t') for ll in open(label2cols)]])
|
|
199 else:
|
|
200 cs = cols if cols else colors
|
|
201 labels2colors = dict([(l,cs[i%len(cs)]) for i,l in enumerate(set(samples2labels.values()))])
|
|
202 ax1 = fig.add_axes([0.,1.0,1.0,height],frameon=False)
|
|
203 ax1.set_xticks([])
|
|
204 ax1.set_yticks([])
|
|
205 ax1.set_ylim( [0.0, height] )
|
|
206 ax1.set_xlim( [0.0, xsize] )
|
|
207 step = xsize / float(len(samples))
|
|
208 labels = set()
|
|
209 added_labels = set()
|
|
210 for i,ind in enumerate(idx2):
|
|
211 if not samples[ind] in samples2labels or \
|
|
212 not samples2labels[samples[ind]] in labels2colors:
|
|
213 fc, ll = "k", None
|
|
214 else:
|
|
215 ll = samples2labels[samples[ind]]
|
|
216 ll = None if ll in added_labels else ll
|
|
217 added_labels.add( ll )
|
|
218 fc = labels2colors[samples2labels[samples[ind]]]
|
|
219
|
|
220 rect = Rectangle( [float(i)*step, 0.0], step, height,
|
|
221 facecolor = fc,
|
|
222 label = ll,
|
|
223 edgecolor='b', lw = 0.0)
|
|
224 labels.add( ll )
|
|
225 ax1.add_patch(rect)
|
|
226 ax1.autoscale_view()
|
|
227
|
|
228 if legendon:
|
|
229 ax1.legend( loc = 2, ncol = legend_ncol, bbox_to_anchor=(1.01, 3.),
|
|
230 borderpad = 0.0, labelspacing = 0.0,
|
|
231 handlelength = 0.5, handletextpad = 0.3,
|
|
232 borderaxespad = 0.0, columnspacing = 0.3,
|
|
233 prop = {'size':fontsize}, frameon = False)
|
|
234
|
|
235 def samples_dend_panel( fig, Z, Z2, ystart, ylen, lw ):
|
|
236 ax2 = fig.add_axes([0.0,1.0+ystart,1.0,ylen], frameon=False)
|
|
237 Z2['color_list'] = [c.replace('b','k') for c in Z2['color_list']]
|
|
238 mh = max(Z[:,2])
|
|
239 sch._plot_dendrogram( Z2['icoord'], Z2['dcoord'], Z2['ivl'],
|
|
240 Z.shape[0] + 1, Z.shape[0] + 1,
|
|
241 mh, 'top', no_labels=True,
|
|
242 color_list=Z2['color_list'] )
|
|
243 for coll in ax2.collections:
|
|
244 coll._linewidths = (lw,)
|
|
245 ax2.set_xticks([])
|
|
246 ax2.set_yticks([])
|
|
247 ax2.set_xticklabels([])
|
|
248
|
|
249 def features_dend_panel( fig, Z, Z2, width, lw ):
|
|
250 ax1 = fig.add_axes([-width,0.0,width,1.0], frameon=False)
|
|
251 Z2['color_list'] = [c.replace('b','k').replace('x','b') for c in Z2['color_list']]
|
|
252 mh = max(Z[:,2])
|
|
253 sch._plot_dendrogram(Z2['icoord'], Z2['dcoord'], Z2['ivl'], Z.shape[0] + 1, Z.shape[0] + 1, mh, 'right', no_labels=True, color_list=Z2['color_list'])
|
|
254 for coll in ax1.collections:
|
|
255 coll._linewidths = (lw,)
|
|
256 ax1.set_xticks([])
|
|
257 ax1.set_yticks([])
|
|
258 ax1.set_xticklabels([])
|
|
259
|
|
260
|
|
261 def add_cmap( cmapdict, name ):
|
|
262 my_cmap = matplotlib.colors.LinearSegmentedColormap(name,cmapdict,256)
|
|
263 pylab.register_cmap(name=name,cmap=my_cmap)
|
|
264
|
|
265 def init_fig(xsize,ysize,ncol):
|
|
266 fig = pylab.figure(figsize=(xsize,ysize))
|
|
267 sch._link_line_colors = colors[:ncol]
|
|
268 return fig
|
|
269
|
|
270 def heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, cols, rows, label_font_size, cb_offset, cb_l, flabelson, slabelson, cm_ticks, gridon, bar_offset ):
|
|
271 cm = pylab.get_cmap(cm_name)
|
|
272 bottom_col = [ cm._segmentdata['red'][0][1],
|
|
273 cm._segmentdata['green'][0][1],
|
|
274 cm._segmentdata['blue'][0][1] ]
|
|
275 axmatrix = fig.add_axes( [0.0,0.0,1.0,1.0],
|
|
276 axisbg=bottom_col)
|
|
277 if any([c < 0.95 for c in bottom_col]):
|
|
278 axmatrix.spines['right'].set_color('none')
|
|
279 axmatrix.spines['left'].set_color('none')
|
|
280 axmatrix.spines['top'].set_color('none')
|
|
281 axmatrix.spines['bottom'].set_color('none')
|
|
282 norm_f = matplotlib.colors.LogNorm if scale == 'log' else matplotlib.colors.Normalize
|
|
283 im = axmatrix.matshow( D, norm = norm_f( vmin=minv if minv > 0.0 else None,
|
|
284 vmax=maxv),
|
|
285 aspect='auto', origin='lower', cmap=cm, vmax=maxv)
|
|
286
|
|
287 axmatrix2 = axmatrix.twinx()
|
|
288 axmatrix3 = axmatrix.twiny()
|
|
289
|
|
290 axmatrix.set_xticks([])
|
|
291 axmatrix2.set_xticks([])
|
|
292 axmatrix3.set_xticks([])
|
|
293 axmatrix.set_yticks([])
|
|
294 axmatrix2.set_yticks([])
|
|
295 axmatrix3.set_yticks([])
|
|
296
|
|
297 axmatrix.set_xticklabels([])
|
|
298 axmatrix2.set_xticklabels([])
|
|
299 axmatrix3.set_xticklabels([])
|
|
300 axmatrix.set_yticklabels([])
|
|
301 axmatrix2.set_yticklabels([])
|
|
302 axmatrix3.set_yticklabels([])
|
|
303
|
|
304 if any([c < 0.95 for c in bottom_col]):
|
|
305 axmatrix2.spines['right'].set_color('none')
|
|
306 axmatrix2.spines['left'].set_color('none')
|
|
307 axmatrix2.spines['top'].set_color('none')
|
|
308 axmatrix2.spines['bottom'].set_color('none')
|
|
309 if any([c < 0.95 for c in bottom_col]):
|
|
310 axmatrix3.spines['right'].set_color('none')
|
|
311 axmatrix3.spines['left'].set_color('none')
|
|
312 axmatrix3.spines['top'].set_color('none')
|
|
313 axmatrix3.spines['bottom'].set_color('none')
|
|
314 if flabelson:
|
|
315 axmatrix2.set_yticks(np.arange(len(rows))+0.5)
|
|
316 axmatrix2.set_yticklabels([rows[r] for r in idx1],size=label_font_size,va='center')
|
|
317 if slabelson:
|
|
318 axmatrix.set_xticks(np.arange(len(cols)))
|
|
319 axmatrix.set_xticklabels([cols[r] for r in idx2],size=label_font_size,rotation=90,va='top',ha='center')
|
|
320 axmatrix.tick_params(length=0)
|
|
321 axmatrix2.tick_params(length=0)
|
|
322 axmatrix3.tick_params(length=0)
|
|
323 axmatrix2.set_ylim(0,len(rows))
|
|
324
|
|
325 if gridon:
|
|
326 axmatrix.set_yticks(np.arange(len(idx1)-1)+0.5)
|
|
327 axmatrix.set_xticks(np.arange(len(idx2))+0.5)
|
|
328 axmatrix.grid( True )
|
|
329 ticklines = axmatrix.get_xticklines()
|
|
330 ticklines.extend( axmatrix.get_yticklines() )
|
|
331 #gridlines = axmatrix.get_xgridlines()
|
|
332 #gridlines.extend( axmatrix.get_ygridlines() )
|
|
333
|
|
334 for line in ticklines:
|
|
335 line.set_linewidth(3)
|
|
336
|
|
337 if cb_l > 0.0:
|
|
338 axcolor = fig.add_axes([0.0,1.0+bar_offset*1.25,1.0,cb_l])
|
|
339 cbar = fig.colorbar(im, cax=axcolor, orientation='horizontal')
|
|
340 cbar.ax.tick_params(labelsize=label_font_size)
|
|
341 if cm_ticks:
|
|
342 cbar.ax.set_xticklabels( cm_ticks.split(":") )
|
|
343
|
|
344
|
|
345 def read_table( fin, xstart,xstop,ystart,ystop, percentile = None, top = None, norm = False ):
|
|
346 mat = [l.rstrip().split('\t') for l in open( fin )]
|
|
347
|
|
348 if fin.endswith(".biom"):
|
|
349 sample_labels = mat[1][1:-1]
|
|
350 m = [(mm[-1]+"; OTU"+mm[0],np.array([float(f) for f in mm[1:-1]])) for mm in mat[2:]]
|
|
351 #feat_labels = [m[-1].replace(";","_").replace(" ","")+m[0] for m in mat[2:]]
|
|
352 #print len(feat_labels)
|
|
353 #print len(sample_labels)
|
|
354 else:
|
|
355 sample_labels = mat[0][xstart:xstop]
|
|
356 m = [(mm[xstart-1],np.array([float(f) for f in mm[xstart:xstop]])) for mm in mat[ystart:ystop]]
|
|
357
|
|
358 if norm:
|
|
359 msums = [0.0 for l in m[0][1]]
|
|
360 for mm in m:
|
|
361 for i,v in enumerate(mm[1]):
|
|
362 msums[i] += v
|
|
363
|
|
364 if top and not percentile:
|
|
365 percentile = 90
|
|
366
|
|
367 if percentile:
|
|
368 m = sorted(m,key=lambda x:-stats.scoreatpercentile(x[1],percentile))
|
|
369 if top:
|
|
370 if fin.endswith(".biom"):
|
|
371 #feat_labels = [mm[-1].replace(";","_").replace(" ","")+mm[0] for mm in m[:top]]
|
|
372 feat_labels = [mm[0] for mm in m[:top]]
|
|
373 else:
|
|
374 feat_labels = [mm[0] for mm in m[:top]]
|
|
375 if norm:
|
|
376 m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m[:top]]
|
|
377 else:
|
|
378 m = [mm[1] for mm in m[:top]]
|
|
379 else:
|
|
380 if fin.endswith(".biom"):
|
|
381 feat_labels = [mm[0] for mm in m]
|
|
382 else:
|
|
383 feat_labels = [mm[0] for mm in m]
|
|
384 if norm:
|
|
385 m = [np.array([n/v for n,v in zip(mm[1],msums)]) for mm in m]
|
|
386 else:
|
|
387 m = [mm[1] for mm in m]
|
|
388 #m = [mm[1] for mm in m]
|
|
389
|
|
390 D = np.matrix( np.array( m ) )
|
|
391
|
|
392 return D, feat_labels, sample_labels
|
|
393
|
|
394 def read_dm( fin, n ):
|
|
395 mat = [[float(f) for f in l.strip().split('\t')] for l in open( fin )]
|
|
396 nc = sum([len(r) for r in mat])
|
|
397
|
|
398 if nc == n*n:
|
|
399 dm = []
|
|
400 for i in range(n):
|
|
401 dm += mat[i][i+1:]
|
|
402 return np.array(dm)
|
|
403 if nc == (n*n-n)/2:
|
|
404 dm = []
|
|
405 for i in range(n):
|
|
406 dm += mat[i]
|
|
407 return np.array(dm)
|
|
408 sys.stderr.write( "Error in reading the distance matrix\n" )
|
|
409 sys.exit()
|
|
410
|
|
411
|
|
412 def exp_newick( inp, labels, outfile, tree_format = 'phyloxml' ):
|
|
413 n_leaves = int(inp[-1][-1])
|
|
414 from Bio import Phylo
|
|
415 import collections
|
|
416 from Bio.Phylo.BaseTree import Tree as BTree
|
|
417 from Bio.Phylo.BaseTree import Clade as BClade
|
|
418 tree = BTree()
|
|
419 tree.root = BClade()
|
|
420
|
|
421 subclades = {}
|
|
422 sb_cbl = {}
|
|
423
|
|
424 for i,(fr,to,bl,nsub) in enumerate( inp ):
|
|
425 if fr < n_leaves:
|
|
426 fr_c = BClade(branch_length=-1.0,name=labels[int(fr)])
|
|
427 subclades[fr] = fr_c
|
|
428 sb_cbl[fr] = bl
|
|
429 if to < n_leaves:
|
|
430 to_c = BClade(branch_length=-1.0,name=labels[int(to)])
|
|
431 subclades[to] = to_c
|
|
432 sb_cbl[to] = bl
|
|
433 for i,(fr,to,bl,nsub) in enumerate( inp ):
|
|
434 fr_c = subclades[fr]
|
|
435 to_c = subclades[to]
|
|
436 cur_c = BClade(branch_length=bl)
|
|
437 cur_c.clades.append( fr_c )
|
|
438 cur_c.clades.append( to_c )
|
|
439 subclades[i+n_leaves] = cur_c
|
|
440
|
|
441 def reset_rec( clade, fath_bl ):
|
|
442 if clade.branch_length < 0:
|
|
443 clade.branch_length = fath_bl
|
|
444 return
|
|
445 for c in clade.clades:
|
|
446 reset_rec( c, clade.branch_length )
|
|
447 clade.branch_length = fath_bl-clade.branch_length
|
|
448
|
|
449 tree.root = cur_c
|
|
450 reset_rec( tree.root, 0.0 )
|
|
451 tree.root.branch_length = 0.0
|
|
452 Phylo.write(tree, outfile, tree_format )
|
|
453
|
|
454 def hclust( fin, fout,
|
|
455 method = "average",
|
|
456 dist_func = "euclidean",
|
|
457 feat_dist_func = "d",
|
|
458 xcw = 0.1,
|
|
459 ycw = 0.1,
|
|
460 scale = 'lin',
|
|
461 minv = 0.0,
|
|
462 maxv = None,
|
|
463 xstart = 1,
|
|
464 ystart = 1,
|
|
465 xstop = None,
|
|
466 ystop = None,
|
|
467 percentile = None,
|
|
468 top = None,
|
|
469 norm = False,
|
|
470 cm_name = 'jet',
|
|
471 s2l = None,
|
|
472 label_font_size = 7,
|
|
473 feat_dend_col_th = None,
|
|
474 sample_dend_col_th = None,
|
|
475 clust_ncols = 7,
|
|
476 clust_line_w = 1.0,
|
|
477 label_cols = None,
|
|
478 sdend_h = 0.1,
|
|
479 fdend_w = 0.1,
|
|
480 cm_h = 0.03,
|
|
481 dmf = None,
|
|
482 dms = None,
|
|
483 legendon = False,
|
|
484 label2cols = None,
|
|
485 flabelon = True,
|
|
486 slabelon = True,
|
|
487 cm_ticks = None,
|
|
488 legend_ncol = 3,
|
|
489 pad_inches = None,
|
|
490 legend_font_size = 7,
|
|
491 gridon = 0,
|
|
492 sdend_out = None,
|
|
493 fdend_out= None):
|
|
494
|
|
495 if label_cols and label_cols.count("-"):
|
|
496 label_cols = label_cols.split("-")
|
|
497
|
|
498 for n,c in my_colormaps:
|
|
499 add_cmap( c, n )
|
|
500
|
|
501 if feat_dist_func == 'd':
|
|
502 feat_dist_func = dist_func
|
|
503
|
|
504 D, feat_labels, sample_labels = read_table(fin,xstart,xstop,ystart,ystop,percentile,top,norm)
|
|
505
|
|
506 ylen,xlen = D[:].shape
|
|
507 Dt = D.transpose()
|
|
508
|
|
509 size_cx, size_cy = xcw, ycw
|
|
510
|
|
511 xsize, ysize = max(xlen*size_cx,2.0), max(ylen*size_cy,2.0)
|
|
512 ydend_offset = 0.025*8.0/ysize if s2l else 0.0
|
|
513
|
|
514 fig = init_fig(xsize,ysize,clust_ncols)
|
|
515
|
|
516 nfeats, nsamples = len(D), len(Dt)
|
|
517
|
|
518 if dmf:
|
|
519 p1 = read_dm( dmf, nfeats )
|
|
520 Y1 = sch.linkage( p1, method=method )
|
|
521 else:
|
|
522 p1 = dis.pdist( D, feat_dist_func )
|
|
523 Y1 = sch.linkage( p1, method=method ) # , metric=feat_dist_func )
|
|
524 #Y1 = sch.linkage( D, method=method, metric=feat_dist_func )
|
|
525 Z1 = sch.dendrogram(Y1, no_plot=True, color_threshold=feat_dend_col_th)
|
|
526
|
|
527 if fdend_out:
|
|
528 exp_newick( Y1, feat_labels, fdend_out )
|
|
529
|
|
530 if dms:
|
|
531 p2 = read_dm( dms, nsamples )
|
|
532 Y2 = sch.linkage( p2, method=method )
|
|
533 else:
|
|
534 p2 = dis.pdist( Dt, dist_func )
|
|
535 Y2 = sch.linkage( p2, method=method ) # , metric=dist_func )
|
|
536 #Y2 = sch.linkage( Dt, method=method, metric=dist_func )
|
|
537 Z2 = sch.dendrogram(Y2, no_plot=True, color_threshold=sample_dend_col_th)
|
|
538
|
|
539 if sdend_out:
|
|
540 exp_newick( Y2, sample_labels, sdend_out )
|
|
541
|
|
542 if fdend_w > 0.0:
|
|
543 features_dend_panel(fig, Y1, Z1, fdend_w*8.0/xsize, clust_line_w )
|
|
544 if sdend_h > 0.0:
|
|
545 samples_dend_panel(fig, Y2, Z2, ydend_offset, sdend_h*8.0/ysize, clust_line_w)
|
|
546
|
|
547 idx1, idx2 = Z1['leaves'], Z2['leaves']
|
|
548 D = D[idx1,:][:,idx2]
|
|
549
|
|
550 if s2l:
|
|
551 samples2classes_panel( fig, sample_labels, s2l, idx1, idx2, 0.025*8.0/ysize, xsize, label_cols, legendon, legend_font_size, label2cols, legend_ncol )
|
|
552 heatmap_panel( fig, D, minv, maxv, idx1, idx2, cm_name, scale, sample_labels, feat_labels, label_font_size, -cm_h*8.0/ysize, cm_h*0.8*8.0/ysize, flabelon, slabelon, cm_ticks, gridon, ydend_offset+sdend_h*8.0/ysize )
|
|
553
|
|
554 fig.savefig( fout, bbox_inches='tight',
|
|
555 pad_inches = pad_inches,
|
|
556 dpi=300) if fout else pylab.show()
|
|
557
|
|
558 if __name__ == '__main__':
|
|
559 pars = read_params( sys.argv )
|
|
560
|
|
561 hclust( fin = pars['in'],
|
|
562 fout = pars['out'],
|
|
563 method = pars['m'],
|
|
564 dist_func = pars['d'],
|
|
565 feat_dist_func = pars['f'],
|
|
566 xcw = pars['x'],
|
|
567 ycw = pars['y'],
|
|
568 scale = pars['s'],
|
|
569 minv = pars['minv'],
|
|
570 maxv = pars['maxv'],
|
|
571 xstart = pars['xstart'],
|
|
572 ystart = pars['ystart'],
|
|
573 xstop = pars['xstop'],
|
|
574 ystop = pars['ystop'],
|
|
575 percentile = pars['perc'],
|
|
576 top = pars['top'],
|
|
577 norm = pars['norm'],
|
|
578 cm_name = pars['c'],
|
|
579 s2l = pars['l'],
|
|
580 label_font_size = pars['font_size'],
|
|
581 feat_dend_col_th = pars['feat_dend_col_th'],
|
|
582 sample_dend_col_th = pars['sample_dend_col_th'],
|
|
583 clust_ncols = pars['clust_ncols'],
|
|
584 clust_line_w = pars['clust_line_w'],
|
|
585 label_cols = pars['label_cols'],
|
|
586 sdend_h = pars['sdend_h'],
|
|
587 fdend_w = pars['fdend_w'],
|
|
588 cm_h = pars['cm_h'],
|
|
589 dmf = pars['dmf'],
|
|
590 dms = pars['dms'],
|
|
591 legendon = pars['legend'],
|
|
592 label2cols = pars['label2cols'],
|
|
593 flabelon = pars['flabel'],
|
|
594 slabelon = pars['slabel'],
|
|
595 cm_ticks = pars['cm_ticks'],
|
|
596 legend_ncol = pars['legend_ncol'],
|
|
597 pad_inches = pars['pad_inches'],
|
|
598 legend_font_size = pars['legend_font_size'],
|
|
599 gridon = pars['grid'],
|
|
600 sdend_out = pars['sdend_out'],
|
|
601 fdend_out = pars['fdend_out'],
|
|
602 )
|
|
603
|