Mercurial > repos > iuc > clustering_from_distmat
annotate clustering_from_distmat.py @ 1:c0b01c55a0e0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
author | iuc |
---|---|
date | Mon, 19 Aug 2024 15:33:16 +0000 |
parents | 8192b416f945 |
children |
rev | line source |
---|---|
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
1 import argparse |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
2 import sys |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
3 from collections import Counter |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
4 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
5 import scipy |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
6 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
7 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
8 def linkage_as_newick(linkage, tip_names): |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
9 newick_parts = tip_names[::] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
10 within_cluster_distances = [0] * len(tip_names) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
11 for step in linkage: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
12 n1 = int(step[0]) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
13 n2 = int(step[1]) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
14 d = float(step[2]) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
15 d1 = d - within_cluster_distances[n1] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
16 d2 = d - within_cluster_distances[n2] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
17 id1 = newick_parts[n1] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
18 id2 = newick_parts[n2] |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
19 part = f'({id1}:{d1 / 2},{id2}:{d2 / 2})' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
20 within_cluster_distances.append(d) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
21 newick_parts.append(part) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
22 return newick_parts[-1].format(*newick_parts) + ';' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
23 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
24 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
25 if __name__ == "__main__": |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
26 parser = argparse.ArgumentParser() |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
27 parser.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
28 'infile', |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
29 help='Distance matrix input file' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
30 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
31 parser.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
32 'out_prefix', |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
33 help="Output prefix" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
34 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
35 parser.add_argument |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
36 parser.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
37 '-m', '--method', default="average", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
38 choices=[ |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
39 "single", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
40 "complete", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
41 "average", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
42 "weighted", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
43 "centroid", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
44 "median", |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
45 "ward" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
46 ], |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
47 help="Clustering method to use" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
48 ) |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
49 missing_names = parser.add_mutually_exclusive_group() |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
50 missing_names.add_argument( |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
51 "--nc", "--no-colnames", action="store_true", |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
52 help="Indicate that the distance matrix input does not feature column names" |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
53 ) |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
54 missing_names.add_argument( |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
55 "--nr", "--no-rownames", action="store_true", |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
56 help="Indicate that the distance matrix input does not feature row names" |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
57 ) |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
58 cut_mode = parser.add_mutually_exclusive_group() |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
59 cut_mode.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
60 "-n", "--n-clusters", nargs="*", type=int |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
61 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
62 cut_mode.add_argument( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
63 "--height", nargs="*", type=float |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
64 ) |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
65 parser.add_argument("-s", "--min-cluster-size", type=int, default=2) |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
66 args = parser.parse_args() |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
67 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
68 # read from input and check that |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
69 # we have been passed a symmetric distance matrix |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
70 with open(args.infile) as i: |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
71 col_count = None |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
72 row_count = 0 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
73 matrix = [] |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
74 if args.nc: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
75 col_names = col_count = None |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
76 else: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
77 while True: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
78 # skip leading empty lines |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
79 line = next(i).rstrip("\n\r") |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
80 if line: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
81 break |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
82 if args.nr: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
83 col_names = line.split("\t") |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
84 else: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
85 # first column is for row names, rest are column names |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
86 col_names = line.split("\t")[1:] |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
87 col_count = len(col_names) |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
88 if not col_count: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
89 sys.exit( |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
90 'No data columns found. ' |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
91 'By default, this tool expects tabular input with column names on the first line ' |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
92 'and a row name in the first column of each row followed by data columns. ' |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
93 'Use --no-colnames or --no-rownames to modify the expected format.' |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
94 ) |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
95 for line in i: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
96 if not line.strip(): |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
97 # skip empty lines |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
98 continue |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
99 row_count += 1 |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
100 if col_count is not None and row_count > col_count: |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
101 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
102 'This tool expects a symmetric distance matrix with an equal number of rows and columns, ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
103 'but got more rows than columns.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
104 ) |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
105 if args.nr: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
106 row_name = None |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
107 row_data = line.rstrip("\n\r").split("\t") |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
108 else: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
109 row_name, *row_data = line.rstrip("\n\r").split("\t") |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
110 if col_count is None: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
111 col_count = len(row_data) |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
112 col_names = [None] * col_count |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
113 col_name = col_names[row_count - 1] |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
114 if not row_name and col_name: |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
115 # tolerate omitted row names, use col name instead |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
116 row_name = col_name |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
117 elif row_name and not col_name: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
118 # likewise for column names |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
119 # plus update list of col names with row name |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
120 col_name = col_names[row_count - 1] = row_name |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
121 elif not row_name and not col_name: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
122 sys.exit( |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
123 'Each sample in the distance matrix must have its name specified via a row name, a column name, or both, ' |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
124 f'but found no name for sample number {row_count}' |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
125 ) |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
126 if row_name != col_name: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
127 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
128 'This tool expects a symmetric distance matrix with identical names for rows and columns, ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
129 f'but got "{col_name}" in column {row_count} and "{row_name}" on row {row_count}.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
130 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
131 if len(row_data) != col_count: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
132 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
133 'This tool expects a symmetric distance matrix with the same number of columns on each row, ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
134 f'but row {row_count} ("{row_name}") has {len(row_data)} columns instead of {col_count}.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
135 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
136 try: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
137 matrix.append([float(x) for x in row_data]) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
138 except ValueError as e: |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
139 if args.nr: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
140 sys.exit(str(e) + f' on row {row_count}') |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
141 else: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
142 sys.exit(str(e) + f' on row {row_count} ("{row_name}")') |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
143 if row_count < col_count: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
144 sys.exit( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
145 'This tool expects a symmetric distance matrix with an equal number of rows and columns, ' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
146 'but got more columns than rows.' |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
147 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
148 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
149 # turn the distance matrix into "condensed" vector form |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
150 # this gives us further checks and raises ValueErrors if: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
151 # - the values on the diagonal aren't zero |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
152 # - the upper and lower triangle of the matrix aren't identical |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
153 D = scipy.spatial.distance.squareform(matrix) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
154 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
155 # perform the requested clustering and retrieve the result as a linkage object |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
156 linkage = scipy.cluster.hierarchy.linkage(D, args.method) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
157 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
158 with open(args.out_prefix + '.tree.newick', 'w') as o: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
159 o.write(linkage_as_newick(linkage, col_names)) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
160 |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
161 # cut the tree as specified and report sample to cluster assignments |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
162 if args.n_clusters or args.height: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
163 if args.n_clusters: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
164 cut_values = args.n_clusters |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
165 colname_template = "cluster_id_n{}" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
166 else: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
167 cut_values = args.height |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
168 colname_template = "cluster_id_h{}" |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
169 header_cols = ["sample"] + [ |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
170 colname_template.format(x) for x in cut_values |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
171 ] |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
172 cut_result = scipy.cluster.hierarchy.cut_tree( |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
173 linkage, |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
174 args.n_clusters, |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
175 args.height |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
176 ) |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
177 |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
178 # Go through the cut results once to determine cluster sizes |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
179 |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
180 # In the final report, the ids of clusters with fewer members than |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
181 # args.min_cluster_size will be masked with "-". |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
182 # The remaining cluster ids will be renumbered to start fom 1. |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
183 # This has to be done for each clustering resulting from the |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
184 # user-specified cut_values. |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
185 cluster_member_counts = [Counter() for _ in cut_values] |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
186 effective_cluster_ids = [{} for _ in cut_values] |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
187 for cluster_ids in cut_result: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
188 for cl_count, cl_id, eff_id in zip(cluster_member_counts, cluster_ids, effective_cluster_ids): |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
189 cl_count[cl_id] += 1 |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
190 for counter, eff_ids in zip(cluster_member_counts, effective_cluster_ids): |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
191 eff_id = 1 |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
192 for item, count in counter.items(): |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
193 # Since Python 3.7, Counter objects (like dicts) preserve |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
194 # insertion order so we can be sure that in the mapping |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
195 # constructed below, clusters will get renumbered in |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
196 # the order they will be reported later. |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
197 if count >= args.min_cluster_size: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
198 eff_ids[item] = str(eff_id) |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
199 eff_id += 1 |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
200 else: |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
201 eff_ids[item] = "-" |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
202 |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
203 # build and write the cluster assignment report |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
204 # with remapped cluster ids |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
205 cluster_assignments = [] |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
206 for name, cluster_ids in zip(col_names, cut_result): |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
207 cluster_assignments.append( |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
208 [name] |
1
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
209 + [ |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
210 eff_ids[c] |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
211 for c, eff_ids in zip(cluster_ids, effective_cluster_ids) |
c0b01c55a0e0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents:
0
diff
changeset
|
212 ] |
0
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
213 ) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
214 with open(args.out_prefix + '.cluster_assignments.tsv', 'w') as o: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
215 print("\t".join(header_cols), file=o) |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
216 for ass in cluster_assignments: |
8192b416f945
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff
changeset
|
217 print("\t".join(ass), file=o) |