annotate clustering_from_distmat.py @ 0:8192b416f945 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
author iuc
date Thu, 08 Aug 2024 19:34:36 +0000
parents
children c0b01c55a0e0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
1 import argparse
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
2 import sys
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
3
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
4 import scipy
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
5
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
6
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
7 def linkage_as_newick(linkage, tip_names):
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
8 newick_parts = tip_names[::]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
9 within_cluster_distances = [0] * len(tip_names)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
10 for step in linkage:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
11 n1 = int(step[0])
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
12 n2 = int(step[1])
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
13 d = float(step[2])
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
14 d1 = d - within_cluster_distances[n1]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
15 d2 = d - within_cluster_distances[n2]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
16 id1 = newick_parts[n1]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
17 id2 = newick_parts[n2]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
18 part = f'({id1}:{d1 / 2},{id2}:{d2 / 2})'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
19 within_cluster_distances.append(d)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
20 newick_parts.append(part)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
21 return newick_parts[-1].format(*newick_parts) + ';'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
22
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
23
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
24 if __name__ == "__main__":
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
25 parser = argparse.ArgumentParser()
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
26 parser.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
27 'infile',
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
28 help='Distance matrix input file'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
29 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
30 parser.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
31 'out_prefix',
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
32 help="Output prefix"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
33 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
34 parser.add_argument
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
35 parser.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
36 '-m', '--method', default="average",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
37 choices=[
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
38 "single",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
39 "complete",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
40 "average",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
41 "weighted",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
42 "centroid",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
43 "median",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
44 "ward"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
45 ],
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
46 help="Clustering method to use"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
47 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
48 cut_mode = parser.add_mutually_exclusive_group()
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
49 cut_mode.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
50 "-n", "--n-clusters", nargs="*", type=int
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
51 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
52 cut_mode.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
53 "--height", nargs="*", type=float
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
54 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
55 args = parser.parse_args()
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
56
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
57 # TO DO:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
58 # - parse outputs to generate
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
59
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
60 # read from input and check that
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
61 # we have been passed a symmetric distance matrix
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
62 with open(args.infile) as i:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
63 col_names = next(i).rstrip("\n\r").split("\t")[1:]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
64 col_count = len(col_names)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
65 if not col_count:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
66 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
67 'No data columns found. '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
68 'This tool expects tabular input with column names on the first line '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
69 'and a row name in the first column of each row followed by data columns.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
70 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
71 row_count = 0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
72 matrix = []
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
73 for line in i:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
74 if not line.strip():
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
75 # skip empty lines
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
76 continue
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
77 row_count += 1
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
78 if row_count > col_count:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
79 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
80 'This tool expects a symmetric distance matrix with an equal number of rows and columns, '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
81 'but got more rows than columns.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
82 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
83 row_name, *row_data = line.strip(" \n\r").split("\t")
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
84 col_name = col_names[row_count - 1]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
85 if not row_name:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
86 # tolerate omitted row names, use col name instead
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
87 row_name = col_name
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
88 if row_name != col_name:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
89 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
90 'This tool expects a symmetric distance matrix with identical names for rows and columns, '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
91 f'but got "{col_name}" in column {row_count} and "{row_name}" on row {row_count}.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
92 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
93 if len(row_data) != col_count:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
94 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
95 'This tool expects a symmetric distance matrix with the same number of columns on each row, '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
96 f'but row {row_count} ("{row_name}") has {len(row_data)} columns instead of {col_count}.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
97 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
98 try:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
99 matrix.append([float(x) for x in row_data])
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
100 except ValueError as e:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
101 sys.exit(str(e) + f' on row {row_count} ("{row_name}")')
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
102 if row_count < col_count:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
103 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
104 'This tool expects a symmetric distance matrix with an equal number of rows and columns, '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
105 'but got more columns than rows.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
106 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
107
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
108 # turn the distance matrix into "condensed" vector form
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
109 # this gives us further checks and raises ValueErrors if:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
110 # - the values on the diagonal aren't zero
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
111 # - the upper and lower triangle of the matrix aren't identical
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
112 D = scipy.spatial.distance.squareform(matrix)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
113
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
114 # perform the requested clustering and retrieve the result as a linkage object
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
115 linkage = scipy.cluster.hierarchy.linkage(D, args.method)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
116
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
117 with open(args.out_prefix + '.tree.newick', 'w') as o:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
118 o.write(linkage_as_newick(linkage, col_names))
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
119
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
120 # cut the tree as specified and report sample to cluster assignments
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
121 if args.n_clusters or args.height:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
122 if args.n_clusters:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
123 cut_values = args.n_clusters
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
124 colname_template = "cluster_id_n{}"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
125 else:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
126 cut_values = args.height
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
127 colname_template = "cluster_id_h{}"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
128 header_cols = ["sample"] + [
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
129 colname_template.format(x) for x in cut_values
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
130 ]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
131 cluster_assignments = []
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
132 for name, cluster_ids in zip(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
133 col_names,
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
134 scipy.cluster.hierarchy.cut_tree(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
135 linkage,
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
136 args.n_clusters,
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
137 args.height
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
138 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
139 ):
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
140 cluster_assignments.append(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
141 [name]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
142 + [str(c + 1) for c in cluster_ids]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
143 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
144 with open(args.out_prefix + '.cluster_assignments.tsv', 'w') as o:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
145 print("\t".join(header_cols), file=o)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
146 for ass in cluster_assignments:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
147 print("\t".join(ass), file=o)