Mercurial > repos > iuc > clustering_from_distmat
annotate clustering_from_distmat.py @ 0:8192b416f945 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
author | iuc |
---|---|
date | Thu, 08 Aug 2024 19:34:36 +0000 |
parents | |
children | c0b01c55a0e0 |
rev | line source |
---|---|
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
1 import argparse |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
2 import sys |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
3 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
4 import scipy |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
5 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
6 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
7 def linkage_as_newick(linkage, tip_names): |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
8 newick_parts = tip_names[::] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
9 within_cluster_distances = [0] * len(tip_names) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
10 for step in linkage: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
11 n1 = int(step[0]) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
12 n2 = int(step[1]) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
13 d = float(step[2]) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
14 d1 = d - within_cluster_distances[n1] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
15 d2 = d - within_cluster_distances[n2] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
16 id1 = newick_parts[n1] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
17 id2 = newick_parts[n2] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
18 part = f'({id1}:{d1 / 2},{id2}:{d2 / 2})' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
19 within_cluster_distances.append(d) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
20 newick_parts.append(part) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
21 return newick_parts[-1].format(*newick_parts) + ';' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
22 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
23 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
24 if __name__ == "__main__": |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
25 parser = argparse.ArgumentParser() |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
26 parser.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
27 'infile', |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
28 help='Distance matrix input file' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
29 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
30 parser.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
31 'out_prefix', |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
32 help="Output prefix" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
33 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
34 parser.add_argument |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
35 parser.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
36 '-m', '--method', default="average", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
37 choices=[ |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
38 "single", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
39 "complete", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
40 "average", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
41 "weighted", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
42 "centroid", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
43 "median", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
44 "ward" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
45 ], |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
46 help="Clustering method to use" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
47 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
48 cut_mode = parser.add_mutually_exclusive_group() |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
49 cut_mode.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
50 "-n", "--n-clusters", nargs="*", type=int |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
51 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
52 cut_mode.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
53 "--height", nargs="*", type=float |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
54 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
55 args = parser.parse_args() |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
56 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
57 # TO DO: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
58 # - parse outputs to generate |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
59 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
60 # read from input and check that |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
61 # we have been passed a symmetric distance matrix |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
62 with open(args.infile) as i: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
63 col_names = next(i).rstrip("\n\r").split("\t")[1:] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
64 col_count = len(col_names) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
65 if not col_count: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
66 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
67 'No data columns found. ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
68 'This tool expects tabular input with column names on the first line ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
69 'and a row name in the first column of each row followed by data columns.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
70 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
71 row_count = 0 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
72 matrix = [] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
73 for line in i: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
74 if not line.strip(): |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
75 # skip empty lines |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
76 continue |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
77 row_count += 1 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
78 if row_count > col_count: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
79 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
80 'This tool expects a symmetric distance matrix with an equal number of rows and columns, ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
81 'but got more rows than columns.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
82 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
83 row_name, *row_data = line.strip(" \n\r").split("\t") |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
84 col_name = col_names[row_count - 1] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
85 if not row_name: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
86 # tolerate omitted row names, use col name instead |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
87 row_name = col_name |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
88 if row_name != col_name: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
89 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
90 'This tool expects a symmetric distance matrix with identical names for rows and columns, ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
91 f'but got "{col_name}" in column {row_count} and "{row_name}" on row {row_count}.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
92 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
93 if len(row_data) != col_count: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
94 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
95 'This tool expects a symmetric distance matrix with the same number of columns on each row, ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
96 f'but row {row_count} ("{row_name}") has {len(row_data)} columns instead of {col_count}.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
97 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
98 try: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
99 matrix.append([float(x) for x in row_data]) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
100 except ValueError as e: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
101 sys.exit(str(e) + f' on row {row_count} ("{row_name}")') |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
102 if row_count < col_count: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
103 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
104 'This tool expects a symmetric distance matrix with an equal number of rows and columns, ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
105 'but got more columns than rows.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
106 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
107 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
108 # turn the distance matrix into "condensed" vector form |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
109 # this gives us further checks and raises ValueErrors if: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
110 # - the values on the diagonal aren't zero |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
111 # - the upper and lower triangle of the matrix aren't identical |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
112 D = scipy.spatial.distance.squareform(matrix) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
113 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
114 # perform the requested clustering and retrieve the result as a linkage object |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
115 linkage = scipy.cluster.hierarchy.linkage(D, args.method) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
116 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
117 with open(args.out_prefix + '.tree.newick', 'w') as o: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
118 o.write(linkage_as_newick(linkage, col_names)) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
119 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
120 # cut the tree as specified and report sample to cluster assignments |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
121 if args.n_clusters or args.height: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
122 if args.n_clusters: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
123 cut_values = args.n_clusters |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
124 colname_template = "cluster_id_n{}" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
125 else: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
126 cut_values = args.height |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
127 colname_template = "cluster_id_h{}" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
128 header_cols = ["sample"] + [ |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
129 colname_template.format(x) for x in cut_values |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
130 ] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
131 cluster_assignments = [] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
132 for name, cluster_ids in zip( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
133 col_names, |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
134 scipy.cluster.hierarchy.cut_tree( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
135 linkage, |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
136 args.n_clusters, |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
137 args.height |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
138 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
139 ): |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
140 cluster_assignments.append( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
141 [name] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
142 + [str(c + 1) for c in cluster_ids] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
143 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
144 with open(args.out_prefix + '.cluster_assignments.tsv', 'w') as o: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
145 print("\t".join(header_cols), file=o) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
146 for ass in cluster_assignments: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
147 print("\t".join(ass), file=o) |