0
|
1 #coding: utf-8
|
|
2 from __future__ import print_function
|
|
3
|
|
4 import csv, json, copy, re, argparse, os, urllib2
|
|
5
|
|
6 import numpy, scipy, fastcluster, sklearn
|
|
7 import scipy.cluster.hierarchy as hcluster
|
|
8 from sklearn import preprocessing
|
|
9 from scipy import spatial
|
|
10
|
|
11 LINKAGES = ["single", "complete", "average", "centroid", "ward", "median", "weighted"]
|
|
12 RAW_LINKAGES = ["ward", "centroid"]
|
|
13 DISTANCES = {"numeric": ["braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "euclidean", "mahalanobis", "minkowski", "seuclidean", "sqeuclidean"],
|
|
14 "binary": ["dice","hamming","jaccard","kulsinski","matching","rogerstanimoto","russellrao","sokalmichener","sokalsneath","yule"]}
|
|
15
|
|
16 class Dendrogram():
|
|
17 """Class which handles the generation of cluster heatmap format of clustered data.
|
|
18 As an input it takes a Cluster instance with clustered data."""
|
|
19
|
|
20 def __init__(self, clustering):
|
|
21 self.cluster_object = clustering
|
|
22 self.data_type = clustering.data_type
|
|
23 self.axis = clustering.clustering_axis
|
|
24 self.clustering = clustering.clustering
|
|
25 self.tree = hcluster.to_tree(self.clustering)
|
|
26 self.data = clustering.data
|
|
27 self.data_names = clustering.data_names
|
|
28 self.header = clustering.header
|
|
29 self.dendrogram = False
|
|
30
|
|
31 def __get_cluster_heatmap__(self, write_data):
|
|
32 root, nodes = hcluster.to_tree(self.clustering, rd=True)
|
|
33 node_id2node = {}
|
|
34 dendrogram = {"nodes":{}}
|
|
35
|
|
36 for node in nodes:
|
|
37 node_id = node.id
|
|
38 if node.count == 1:
|
|
39 node_id2node[node_id] = {"count":1, "distance":0}
|
|
40
|
|
41 else:
|
|
42 node_left_child = node.get_left().id
|
|
43 node_right_child = node.get_right().id
|
|
44 node_id2node[node_id] = {"count":node.count, "distance":round(node.dist, 3), "left_child": node_left_child, "right_child": node_right_child}
|
|
45
|
|
46 for n in node_id2node:
|
|
47 node = node_id2node[n]
|
|
48 if node["count"] != 1:
|
|
49 node_id2node[node["left_child"]]["parent"] = n
|
|
50 node_id2node[node["right_child"]]["parent"] = n
|
|
51
|
|
52 for n in node_id2node:
|
|
53 node = node_id2node[n]
|
|
54
|
|
55 if node["count"] == 1:
|
|
56 data = self.data[n]
|
|
57 node["objects"] = [self.data_names[n]]
|
|
58
|
|
59 if node_id2node[node["parent"]]["left_child"] == n:
|
|
60 node_id2node[node["parent"]]["left_child"] = n
|
|
61 else:
|
|
62 node_id2node[node["parent"]]["right_child"] = n
|
|
63
|
|
64 if not write_data:
|
|
65 data = []
|
|
66
|
|
67 node["features"] = data
|
|
68 dendrogram["nodes"][n] = node
|
|
69
|
|
70 for n in node_id2node:
|
|
71 if node_id2node[n]["count"] != 1:
|
|
72 dendrogram["nodes"][n] = node_id2node[n]
|
|
73
|
|
74 return dendrogram
|
|
75
|
|
76 def __get_column_dendrogram__(self):
|
|
77 root, nodes = hcluster.to_tree(self.cluster_object.column_clustering, rd=True)
|
|
78 node_id2node = {}
|
|
79 dendrogram = {"nodes":{}}
|
|
80
|
|
81 for node in nodes:
|
|
82 node_id = node.id
|
|
83 if node.count == 1:
|
|
84 node_id2node[node_id] = {"count":1, "distance":0}
|
|
85
|
|
86 else:
|
|
87 node_left_child = node.get_left().id
|
|
88 node_right_child = node.get_right().id
|
|
89 node_id2node[node_id] = {"count":node.count, "distance":round(node.dist, 3), "left_child": node_left_child, "right_child": node_right_child}
|
|
90
|
|
91 for n in node_id2node:
|
|
92 node = node_id2node[n]
|
|
93 if node["count"] != 1:
|
|
94 node_id2node[node["left_child"]]["parent"] = n
|
|
95 node_id2node[node["right_child"]]["parent"] = n
|
|
96
|
|
97 for n in node_id2node:
|
|
98 if not n in dendrogram["nodes"]:
|
|
99 dendrogram["nodes"][n] = node_id2node[n]
|
|
100
|
|
101 return dendrogram
|
|
102
|
|
103 def create_cluster_heatmap(self, compress=False, compressed_value="median", write_data=True):
|
|
104 """Creates cluster heatmap representation in inchlib format. By setting compress parameter to True you can
|
|
105 cut the dendrogram in a distance to decrease the row size of the heatmap to specified count.
|
|
106 When compressing the type of the resulted value of merged rows is given by the compressed_value parameter (median, mean).
|
|
107 When the metadata are nominal (text values) the most frequent is the result after compression.
|
|
108 By setting write_data to False the data features won't be present in the resulting format."""
|
|
109 self.dendrogram = {"data": self.__get_cluster_heatmap__(write_data)}
|
|
110
|
|
111 self.compress = compress
|
|
112 self.compressed_value = compressed_value
|
|
113 self.compress_cluster_treshold = 0
|
|
114 if self.compress and self.compress >= 0:
|
|
115 self.compress_cluster_treshold = self.__get_distance_treshold__(compress)
|
|
116 print("Distance treshold for compression:", self.compress_cluster_treshold)
|
|
117 if self.compress_cluster_treshold >= 0:
|
|
118 self.__compress_data__()
|
|
119 else:
|
|
120 self.compress = False
|
|
121
|
|
122 if self.header and write_data:
|
|
123 self.dendrogram["data"]["feature_names"] = [h for h in self.header]
|
|
124 elif self.header and not write_data:
|
|
125 self.dendrogram["data"]["feature_names"] = []
|
|
126
|
|
127 if self.axis == "both" and len(self.cluster_object.column_clustering):
|
|
128 column_dendrogram = hcluster.to_tree(self.cluster_object.column_clustering)
|
|
129 self.dendrogram["column_dendrogram"] = self.__get_column_dendrogram__()
|
|
130 return
|
|
131
|
|
132 def __compress_data__(self):
|
|
133 nodes = {}
|
|
134 to_remove = set()
|
|
135
|
|
136 compressed_value2fnc = {
|
|
137 "median": lambda values: [round(numpy.median(value), 3) for value in values],
|
|
138 "mean": lambda values: [round(numpy.average(value), 3) for value in values],
|
|
139 }
|
|
140
|
|
141 for n in self.dendrogram["data"]["nodes"]:
|
|
142 node = self.dendrogram["data"]["nodes"][n]
|
|
143
|
|
144 if node["count"] == 1:
|
|
145 objects = node["objects"]
|
|
146 data = node["features"]
|
|
147 node_id = n
|
|
148
|
|
149 while self.dendrogram["data"]["nodes"][node["parent"]]["distance"] <= self.compress_cluster_treshold:
|
|
150 to_remove.add(node_id)
|
|
151 node_id = node["parent"]
|
|
152 node = self.dendrogram["data"]["nodes"][node_id]
|
|
153
|
|
154 if node["count"] != 1:
|
|
155
|
|
156 if not "objects" in self.dendrogram["data"]["nodes"][node_id]:
|
|
157 self.dendrogram["data"]["nodes"][node_id]["objects"] = []
|
|
158 self.dendrogram["data"]["nodes"][node_id]["features"] = []
|
|
159
|
|
160 self.dendrogram["data"]["nodes"][node_id]["objects"].extend(objects)
|
|
161
|
|
162 if data:
|
|
163 self.dendrogram["data"]["nodes"][node_id]["features"].append(data)
|
|
164
|
|
165 for node in to_remove:
|
|
166 self.dendrogram["data"]["nodes"].pop(node)
|
|
167
|
|
168 for k in self.dendrogram["data"]["nodes"]:
|
|
169 node = self.dendrogram["data"]["nodes"][k]
|
|
170 if "objects" in node and node["count"] != 1:
|
|
171 self.dendrogram["data"]["nodes"][k]["distance"] = 0
|
|
172 self.dendrogram["data"]["nodes"][k]["count"] = 1
|
|
173 self.dendrogram["data"]["nodes"][k].pop("left_child")
|
|
174 self.dendrogram["data"]["nodes"][k].pop("right_child")
|
|
175 rows = zip(*self.dendrogram["data"]["nodes"][k]["features"])
|
|
176 self.dendrogram["data"]["nodes"][k]["features"] = compressed_value2fnc[self.compressed_value](rows)
|
|
177
|
|
178 self.__adjust_node_counts__()
|
|
179
|
|
180 return
|
|
181
|
|
182 def __adjust_node_counts__(self):
|
|
183 leaves = []
|
|
184
|
|
185 for n in self.dendrogram["data"]["nodes"]:
|
|
186 if self.dendrogram["data"]["nodes"][n]["count"] > 1:
|
|
187 self.dendrogram["data"]["nodes"][n]["count"] = 0
|
|
188 else:
|
|
189 leaves.append(n)
|
|
190
|
|
191 for n in leaves:
|
|
192 node = self.dendrogram["data"]["nodes"][n]
|
|
193 parent_id = node["parent"]
|
|
194
|
|
195 while parent_id:
|
|
196 node = self.dendrogram["data"]["nodes"][parent_id]
|
|
197 self.dendrogram["data"]["nodes"][parent_id]["count"] += 1
|
|
198 parent_id = False
|
|
199 if "parent" in node:
|
|
200 parent_id = node["parent"]
|
|
201 return
|
|
202
|
|
203 def __get_distance_treshold__(self, cluster_count):
|
|
204 print("Calculating distance treshold for cluster compression...")
|
|
205 if cluster_count >= self.tree.count:
|
|
206 return -1
|
|
207
|
|
208 i = 0
|
|
209 count = cluster_count + 1
|
|
210 test_step = self.tree.dist/2
|
|
211
|
|
212 while test_step >= 0.1:
|
|
213 count = len(set([c for c in hcluster.fcluster(self.clustering, i, "distance")]))
|
|
214 if count < cluster_count:
|
|
215 if i == 0:
|
|
216 return 0
|
|
217 i = i - test_step
|
|
218 test_step = test_step/2
|
|
219 elif count == cluster_count:
|
|
220 return i
|
|
221 else:
|
|
222 i += test_step
|
|
223
|
|
224 return i+test_step*2
|
|
225
|
|
226 def export_cluster_heatmap_as_json(self, filename=None):
|
|
227 """Returns cluster heatmap in a JSON format or exports it to the file specified by the filename parameter."""
|
|
228 dendrogram_json = json.dumps(self.dendrogram, indent=4)
|
|
229 if filename:
|
|
230 output = open(filename, "w")
|
|
231 output.write(dendrogram_json)
|
|
232 return dendrogram_json
|
|
233
|
|
234 def export_cluster_heatmap_as_html(self, htmldir="."):
|
|
235 """Export simple HTML page with embedded cluster heatmap and dependencies to given directory."""
|
|
236 if not os.path.exists(htmldir):
|
|
237 os.makedirs(htmldir)
|
|
238 dendrogram_json = json.dumps(self.dendrogram, indent=4)
|
|
239 template = """<html>
|
|
240 <head>
|
|
241 <script src="jquery-2.0.3.min.js"></script>
|
|
242 <script src="kinetic-v5.0.0.min.js"></script>
|
|
243 <script src="inchlib-1.0.1.min.js"></script>
|
|
244 <script>
|
|
245 $(document).ready(function() {{
|
|
246 var data = {};
|
|
247 var inchlib = new InCHlib({{
|
|
248 target: "inchlib",
|
|
249 max_height: 1200,
|
|
250 width: 1000,
|
|
251 }});
|
|
252 inchlib.read_data(data);
|
|
253 inchlib.draw();
|
|
254 }});
|
|
255 </script>
|
|
256 </head>
|
|
257
|
|
258 <body>
|
|
259 <div id="inchlib"></div>
|
|
260 </body>
|
|
261 </html>""".format(dendrogram_json)
|
|
262
|
|
263 lib2url = {"inchlib-1.0.1.min.js": "http://openscreen.cz/software/inchlib/static/js/inchlib-1.0.1.min.js",
|
|
264 "jquery-2.0.3.min.js": "http://openscreen.cz/software/inchlib/static/js/jquery-2.0.3.min.js",
|
|
265 "kinetic-v5.0.0.min.js": "http://openscreen.cz/software/inchlib/static/js/kinetic-v5.0.0.min.js"}
|
|
266
|
|
267 for lib, url in lib2url.items():
|
|
268 try:
|
|
269 source = urllib2.urlopen(url)
|
|
270 source_html = source.read()
|
|
271 with open(os.path.join(htmldir, lib), "w") as output:
|
|
272 output.write(source_html)
|
|
273 except urllib2.URLError, e:
|
|
274 raise Exception("\nCan't download file {}.\nPlease check your internet connection and try again.\nIf the error persists there can be something wrong with the InCHlib server.\n".format(url))
|
|
275
|
|
276 with open(os.path.join(htmdlir, "inchlib.html"), "w") as output:
|
|
277 output.write(template)
|
|
278 return
|
|
279
|
|
280 def add_metadata_from_file(self, metadata_file, delimiter, header=True, metadata_compressed_value="median"):
|
|
281 """Adds metadata from csv file.
|
|
282 Metadata_compressed_value specifies the resulted value when the data are compressed (median/mean/frequency)"""
|
|
283 self.metadata_compressed_value = metadata_compressed_value
|
|
284 self.metadata, self.metadata_header = self.__read_metadata_file__(metadata_file, delimiter, header)
|
|
285 self.__connect_metadata_to_data__()
|
|
286 return
|
|
287
|
|
288 def add_metadata(self, metadata, header=True, metadata_compressed_value="median"):
|
|
289 """Adds metadata in a form of list of lists (tuples).
|
|
290 Metadata_compressed_value specifies the resulted value when the data are compressed (median/mean/frequency)"""
|
|
291 self.metadata_compressed_value = metadata_compressed_value
|
|
292 self.metadata, self.metadata_header = self.__read_metadata__(metadata, header)
|
|
293 self.__connect_metadata_to_data__()
|
|
294 return
|
|
295
|
|
296 def __connect_metadata_to_data__(self):
|
|
297 if len(set(self.metadata.keys()) & set(self.data_names)) == 0:
|
|
298 raise Exception("Metadata objects must correspond with original data objects.")
|
|
299
|
|
300 if not self.dendrogram:
|
|
301 raise Exception("You must create dendrogram before adding metadata.")
|
|
302
|
|
303 self.dendrogram["metadata"] = {"nodes":{}}
|
|
304
|
|
305 if self.metadata_header:
|
|
306 self.dendrogram["metadata"]["feature_names"] = self.metadata_header
|
|
307
|
|
308 leaves = {n:node for n, node in self.dendrogram["data"]["nodes"].items() if node["count"] == 1}
|
|
309
|
|
310 if not self.compress:
|
|
311
|
|
312 for leaf_id, leaf in leaves.items():
|
|
313 try:
|
|
314 self.dendrogram["metadata"]["nodes"][leaf_id] = self.metadata[leaf["objects"][0]]
|
|
315 except Exception, e:
|
|
316 continue
|
|
317 else:
|
|
318 compressed_value2fnc = {
|
|
319 "median": lambda values: round(numpy.median(col), 3),
|
|
320 "mean": lambda values: round(numpy.average(col), 3)
|
|
321 }
|
|
322
|
|
323 for leaf in leaves:
|
|
324 objects = []
|
|
325 for item in leaves[leaf]["objects"]:
|
|
326 try:
|
|
327 objects.append(self.metadata[item])
|
|
328 except Exception, e:
|
|
329 continue
|
|
330
|
|
331 cols = zip(*objects)
|
|
332 row = []
|
|
333 cols = [list(c) for c in cols]
|
|
334
|
|
335 for col in cols:
|
|
336
|
|
337 if self.metadata_compressed_value in compressed_value2fnc:
|
|
338 try:
|
|
339 col = [float(c) for c in col]
|
|
340 value = compressed_value2fnc[self.metadata_compressed_value](col)
|
|
341 except ValueError:
|
|
342 freq2val = {col.count(v):v for v in set(col)}
|
|
343 value = freq2val[max(freq2val.keys())]
|
|
344
|
|
345 elif self.metadata_compressed_value == "frequency":
|
|
346 freq2val = {col.count(v):v for v in set(col)}
|
|
347 value = freq2val[max(freq2val.keys())]
|
|
348
|
|
349 else:
|
|
350 raise Exception("Unkown type of metadata_compressed_value: {}. Possible values are: median, mean, frequency.".format(self.metadata_compressed_value))
|
|
351
|
|
352 row.append(value)
|
|
353
|
|
354 self.dendrogram["metadata"]["nodes"][leaf] = row
|
|
355 return
|
|
356
|
|
357 def __read_metadata__(self, metadata, header):
|
|
358 metadata_header = []
|
|
359 rows = metadata
|
|
360 metadata = {}
|
|
361 data_start = 0
|
|
362
|
|
363 if header:
|
|
364 metadata_header = rows[0][1:]
|
|
365 data_start = 1
|
|
366
|
|
367 for row in rows[data_start:]:
|
|
368 metadata[str(row[0])] = [r for r in row[1:]]
|
|
369
|
|
370 return metadata, metadata_header
|
|
371
|
|
372
|
|
373 def __read_metadata_file__(self, metadata_file, delimiter, header):
|
|
374 csv_reader = csv.reader(open(metadata_file, "r"), delimiter=delimiter)
|
|
375 metadata_header = []
|
|
376 rows = [row for row in csv_reader]
|
|
377 metadata = {}
|
|
378 data_start = 0
|
|
379
|
|
380 if header:
|
|
381 metadata_header = rows[0][1:]
|
|
382 data_start = 1
|
|
383
|
|
384 for row in rows[data_start:]:
|
|
385 metadata_id = str(row[0])
|
|
386 metadata[metadata_id] = [r for r in row[1:]]
|
|
387
|
|
388 return metadata, metadata_header
|
|
389
|
|
390
|
|
391 class Cluster():
|
|
392 """Class for data clustering"""
|
|
393
|
|
394 def __init__(self):
|
|
395 self.write_original = False
|
|
396
|
|
397 def read_csv(self, filename, delimiter=",", header=False):
|
|
398 """Reads data from the CSV file"""
|
|
399 self.filename = filename
|
|
400 csv_reader = csv.reader(open(self.filename, "r"), delimiter=delimiter)
|
|
401 rows = [row for row in csv_reader]
|
|
402 self.read_data(rows, header)
|
|
403
|
|
404 def read_data(self, rows, header=False):
|
|
405 """Reads data in a form of list of lists (tuples)"""
|
|
406 self.header = header
|
|
407 data_start = 0
|
|
408
|
|
409 if self.header:
|
|
410 self.header = rows[0][1:]
|
|
411 data_start = 1
|
|
412
|
|
413 self.data_names = [str(row[0]) for row in rows[data_start:]]
|
|
414 self.data = [[round(float(value), 3) for value in row[1:]] for row in rows[data_start:]]
|
|
415 self.original_data = copy.deepcopy(self.data)
|
|
416 return
|
|
417
|
|
418 def normalize_data(self, feature_range=(0,1), write_original=False):
|
|
419 """Normalizes data to a scale from 0 to 1. When write_original is set to True,
|
|
420 the normalized data will be clustered, but original data will be written to the heatmap."""
|
|
421 self.write_original = write_original
|
|
422 min_max_scaler = preprocessing.MinMaxScaler(feature_range)
|
|
423 self.data = min_max_scaler.fit_transform(self.data)
|
|
424 self.data = [[round(v, 3) for v in row] for row in self.data]
|
|
425 return
|
|
426
|
|
427 def cluster_data(self, data_type="numeric", row_distance="euclidean", row_linkage="single", axis="row", column_distance="euclidean", column_linkage="ward"):
|
|
428 """Performs clustering according to the given parameters.
|
|
429 @data_type - numeric/binary
|
|
430 @row_distance/column_distance - see. DISTANCES variable
|
|
431 @row_linkage/column_linkage - see. LINKAGES variable
|
|
432 @axis - row/both
|
|
433 """
|
|
434
|
|
435 print("Clustering rows:", row_distance, row_linkage)
|
|
436 self.data_type = data_type
|
|
437 self.clustering_axis = axis
|
|
438 row_linkage = str(row_linkage)
|
|
439
|
|
440 if row_linkage in RAW_LINKAGES:
|
|
441 self.clustering = fastcluster.linkage(self.data, method=row_linkage, metric=row_distance)
|
|
442
|
|
443 else:
|
|
444 self.distance_vector = fastcluster.pdist(self.data, row_distance)
|
|
445
|
|
446 if data_type in DISTANCES and not row_distance in DISTANCES[data_type]:
|
|
447 raise Exception("".join(["When clustering" , data_type, "data you must choose from these distance measures: ", ", ".join(DISTANCES[data_type])]))
|
|
448 elif not data_type in DISTANCES.keys():
|
|
449 raise Exception("".join(["You can choose only from data types: ", ", ".join(DISTANCES.keys())]))
|
|
450
|
|
451 self.clustering = fastcluster.linkage(self.distance_vector, method=str(row_linkage))
|
|
452
|
|
453 self.column_clustering = []
|
|
454 if axis == "both" and len(self.data[0]) > 2:
|
|
455 print("Clustering columns:", column_distance, column_linkage)
|
|
456 self.__cluster_columns__(column_distance, column_linkage)
|
|
457
|
|
458 if self.write_original:
|
|
459 self.data = self.original_data
|
|
460
|
|
461 return
|
|
462
|
|
463 def __cluster_columns__(self, column_distance, column_linkage):
|
|
464 columns = zip(*self.data)
|
|
465 self.column_clustering = fastcluster.linkage(columns, method=column_linkage, metric=column_distance)
|
|
466 self.data_order = hcluster.leaves_list(self.column_clustering)
|
|
467 self.data = self.__reorder_data__(self.data, self.data_order)
|
|
468 self.original_data = self.__reorder_data__(self.original_data, self.data_order)
|
|
469 if self.header:
|
|
470 self.header = self.__reorder_data__([self.header], self.data_order)[0]
|
|
471 return
|
|
472
|
|
473 def __reorder_data__(self, data, order):
|
|
474 for i in xrange(len(data)):
|
|
475 reordered_data = []
|
|
476 for j in order:
|
|
477 reordered_data.append(data[i][j])
|
|
478 reordered_data.reverse()
|
|
479 data[i] = reordered_data
|
|
480
|
|
481 return data
|
|
482
|
|
483 def _process_(arguments):
|
|
484 c = Cluster()
|
|
485 c.read_csv(arguments.data_file, arguments.data_delimiter, arguments.data_header)
|
|
486
|
|
487 if arguments.normalize:
|
|
488 c.normalize_data(feature_range=(0,1), write_original=arguments.write_original)
|
|
489
|
|
490 c.cluster_data(data_type=arguments.datatype, row_distance=arguments.row_distance, row_linkage=arguments.row_linkage, axis=arguments.axis, column_distance=arguments.column_distance, column_linkage=arguments.column_linkage)
|
|
491
|
|
492 d = Dendrogram(c)
|
|
493 d.create_cluster_heatmap(compress=arguments.compress, compressed_value=arguments.compressed_value, write_data=not arguments.dont_write_data)
|
|
494
|
|
495 if arguments.metadata:
|
|
496 d.add_metadata_from_file(metadata_file=arguments.metadata, delimiter=arguments.metadata_delimiter, header=arguments.metadata_header, metadata_compressed_value=arguments.metadata_compressed_value)
|
|
497
|
|
498 if arguments.output_file or arguments.html_dir:
|
|
499 if arguments.output_file:
|
|
500 d.export_cluster_heatmap_as_json(arguments.output_file)
|
|
501 else:
|
|
502 d.export_cluster_heatmap_as_html(arguments.html_dir)
|
|
503 else:
|
|
504 print(json.dumps(d.dendrogram, indent=4))
|
|
505
|
|
506
|
|
507 if __name__ == '__main__':
|
|
508 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
509
|
|
510 parser.add_argument("data_file", type=str, help="csv(text) data file with delimited values")
|
|
511 parser.add_argument("-o", "--output_file", type=str, help="the name of output file")
|
|
512 parser.add_argument("-html", "--html_dir", type=str, help="the directory to store HTML page with dependencies")
|
|
513 parser.add_argument("-rd", "--row_distance", type=str, default="euclidean", help="set the distance to use for clustering rows")
|
|
514 parser.add_argument("-rl", "--row_linkage", type=str, default="ward", help="set the linkage to use for clustering rows")
|
|
515 parser.add_argument("-cd", "--column_distance", type=str, default="euclidean", help="set the distance to use for clustering columns (only when clustering by both axis -a parameter)")
|
|
516 parser.add_argument("-cl", "--column_linkage", type=str, default="ward", help="set the linkage to use for clustering columns (only when clustering by both axis -a parameter)")
|
|
517 parser.add_argument("-a", "--axis", type=str, default="row", help="define clustering axis (row/both)")
|
|
518 parser.add_argument("-dt", "--datatype", type=str, default="numeric", help="specify the type of the data (numeric/binary)")
|
|
519 parser.add_argument("-dd", "--data_delimiter", type=str, default=",", help="delimiter of values in data file")
|
|
520 parser.add_argument("-m", "--metadata", type=str, default=None, help="csv(text) metadata file with delimited values")
|
|
521 parser.add_argument("-md", "--metadata_delimiter", type=str, default=",", help="delimiter of values in metadata file")
|
|
522 parser.add_argument("-dh", "--data_header", default=False, help="whether the first row of data file is a header", action="store_true")
|
|
523 parser.add_argument("-mh", "--metadata_header", default=False, help="whether the first row of metadata file is a header", action="store_true")
|
|
524 parser.add_argument("-c", "--compress", type=int, default=0, help="compress the data to contain maximum of specified count of rows")
|
|
525 parser.add_argument("-cv", "--compressed_value", type=str, default="median", help="the resulted value from merged rows when the data are compressed (median/mean/frequency)")
|
|
526 parser.add_argument("-mcv", "--metadata_compressed_value", type=str, default="median", help="the resulted value from merged rows of metadata when the data are compressed (median/mean/count)")
|
|
527 parser.add_argument("-dwd", "--dont_write_data", default=False, help="don't write clustered data to the inchlib data format", action="store_true")
|
|
528 parser.add_argument("-n", "--normalize", default=False, help="normalize data to [0, 1] range", action="store_true")
|
|
529 parser.add_argument("-wo", "--write_original", default=False, help="cluster normalized data but write the original ones to the heatmap", action="store_true")
|
|
530
|
|
531 args = parser.parse_args()
|
|
532 _process_(args)
|
|
533
|