annotate clustering_from_distmat.py @ 2:f8ee933de3ca draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit e4b7fe74660f4d57fce7a5708bdbddaf769cc968
author iuc
date Mon, 16 Sep 2024 14:57:17 +0000
parents c0b01c55a0e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
1 import argparse
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
2 import sys
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
3 from collections import Counter
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
4
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
5 import scipy
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
6
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
7
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
8 def linkage_as_newick(linkage, tip_names):
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
9 newick_parts = tip_names[::]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
10 within_cluster_distances = [0] * len(tip_names)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
11 for step in linkage:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
12 n1 = int(step[0])
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
13 n2 = int(step[1])
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
14 d = float(step[2])
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
15 d1 = d - within_cluster_distances[n1]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
16 d2 = d - within_cluster_distances[n2]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
17 id1 = newick_parts[n1]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
18 id2 = newick_parts[n2]
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
19 part = f'({id1}:{d1 / 2},{id2}:{d2 / 2})'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
20 within_cluster_distances.append(d)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
21 newick_parts.append(part)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
22 return newick_parts[-1].format(*newick_parts) + ';'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
23
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
24
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
25 if __name__ == "__main__":
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
26 parser = argparse.ArgumentParser()
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
27 parser.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
28 'infile',
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
29 help='Distance matrix input file'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
30 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
31 parser.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
32 'out_prefix',
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
33 help="Output prefix"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
34 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
35 parser.add_argument
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
36 parser.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
37 '-m', '--method', default="average",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
38 choices=[
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
39 "single",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
40 "complete",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
41 "average",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
42 "weighted",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
43 "centroid",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
44 "median",
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
45 "ward"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
46 ],
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
47 help="Clustering method to use"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
48 )
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
49 missing_names = parser.add_mutually_exclusive_group()
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
50 missing_names.add_argument(
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
51 "--nc", "--no-colnames", action="store_true",
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
52 help="Indicate that the distance matrix input does not feature column names"
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
53 )
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
54 missing_names.add_argument(
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
55 "--nr", "--no-rownames", action="store_true",
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
56 help="Indicate that the distance matrix input does not feature row names"
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
57 )
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
58 cut_mode = parser.add_mutually_exclusive_group()
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
59 cut_mode.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
60 "-n", "--n-clusters", nargs="*", type=int
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
61 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
62 cut_mode.add_argument(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
63 "--height", nargs="*", type=float
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
64 )
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
65 parser.add_argument("-s", "--min-cluster-size", type=int, default=2)
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
66 args = parser.parse_args()
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
67
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
68 # read from input and check that
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
69 # we have been passed a symmetric distance matrix
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
70 with open(args.infile) as i:
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
71 col_count = None
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
72 row_count = 0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
73 matrix = []
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
74 if args.nc:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
75 col_names = col_count = None
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
76 else:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
77 while True:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
78 # skip leading empty lines
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
79 line = next(i).rstrip("\n\r")
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
80 if line:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
81 break
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
82 if args.nr:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
83 col_names = line.split("\t")
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
84 else:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
85 # first column is for row names, rest are column names
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
86 col_names = line.split("\t")[1:]
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
87 col_count = len(col_names)
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
88 if not col_count:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
89 sys.exit(
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
90 'No data columns found. '
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
91 'By default, this tool expects tabular input with column names on the first line '
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
92 'and a row name in the first column of each row followed by data columns. '
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
93 'Use --no-colnames or --no-rownames to modify the expected format.'
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
94 )
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
95 for line in i:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
96 if not line.strip():
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
97 # skip empty lines
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
98 continue
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
99 row_count += 1
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
100 if col_count is not None and row_count > col_count:
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
101 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
102 'This tool expects a symmetric distance matrix with an equal number of rows and columns, '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
103 'but got more rows than columns.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
104 )
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
105 if args.nr:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
106 row_name = None
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
107 row_data = line.rstrip("\n\r").split("\t")
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
108 else:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
109 row_name, *row_data = line.rstrip("\n\r").split("\t")
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
110 if col_count is None:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
111 col_count = len(row_data)
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
112 col_names = [None] * col_count
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
113 col_name = col_names[row_count - 1]
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
114 if not row_name and col_name:
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
115 # tolerate omitted row names, use col name instead
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
116 row_name = col_name
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
117 elif row_name and not col_name:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
118 # likewise for column names
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
119 # plus update list of col names with row name
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
120 col_name = col_names[row_count - 1] = row_name
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
121 elif not row_name and not col_name:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
122 sys.exit(
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
123 'Each sample in the distance matrix must have its name specified via a row name, a column name, or both, '
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
124 f'but found no name for sample number {row_count}'
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
125 )
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
126 if row_name != col_name:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
127 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
128 'This tool expects a symmetric distance matrix with identical names for rows and columns, '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
129 f'but got "{col_name}" in column {row_count} and "{row_name}" on row {row_count}.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
130 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
131 if len(row_data) != col_count:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
132 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
133 'This tool expects a symmetric distance matrix with the same number of columns on each row, '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
134 f'but row {row_count} ("{row_name}") has {len(row_data)} columns instead of {col_count}.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
135 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
136 try:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
137 matrix.append([float(x) for x in row_data])
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
138 except ValueError as e:
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
139 if args.nr:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
140 sys.exit(str(e) + f' on row {row_count}')
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
141 else:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
142 sys.exit(str(e) + f' on row {row_count} ("{row_name}")')
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
143 if row_count < col_count:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
144 sys.exit(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
145 'This tool expects a symmetric distance matrix with an equal number of rows and columns, '
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
146 'but got more columns than rows.'
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
147 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
148
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
149 # turn the distance matrix into "condensed" vector form
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
150 # this gives us further checks and raises ValueErrors if:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
151 # - the values on the diagonal aren't zero
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
152 # - the upper and lower triangle of the matrix aren't identical
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
153 D = scipy.spatial.distance.squareform(matrix)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
154
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
155 # perform the requested clustering and retrieve the result as a linkage object
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
156 linkage = scipy.cluster.hierarchy.linkage(D, args.method)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
157
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
158 with open(args.out_prefix + '.tree.newick', 'w') as o:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
159 o.write(linkage_as_newick(linkage, col_names))
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
160
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
161 # cut the tree as specified and report sample to cluster assignments
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
162 if args.n_clusters or args.height:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
163 if args.n_clusters:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
164 cut_values = args.n_clusters
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
165 colname_template = "cluster_id_n{}"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
166 else:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
167 cut_values = args.height
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
168 colname_template = "cluster_id_h{}"
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
169 header_cols = ["sample"] + [
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
170 colname_template.format(x) for x in cut_values
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
171 ]
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
172 cut_result = scipy.cluster.hierarchy.cut_tree(
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
173 linkage,
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
174 args.n_clusters,
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
175 args.height
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
176 )
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
177
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
178 # Go through the cut results once to determine cluster sizes
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
179
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
180 # In the final report, the ids of clusters with fewer members than
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
181 # args.min_cluster_size will be masked with "-".
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
182 # The remaining cluster ids will be renumbered to start fom 1.
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
183 # This has to be done for each clustering resulting from the
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
184 # user-specified cut_values.
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
185 cluster_member_counts = [Counter() for _ in cut_values]
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
186 effective_cluster_ids = [{} for _ in cut_values]
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
187 for cluster_ids in cut_result:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
188 for cl_count, cl_id, eff_id in zip(cluster_member_counts, cluster_ids, effective_cluster_ids):
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
189 cl_count[cl_id] += 1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
190 for counter, eff_ids in zip(cluster_member_counts, effective_cluster_ids):
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
191 eff_id = 1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
192 for item, count in counter.items():
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
193 # Since Python 3.7, Counter objects (like dicts) preserve
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
194 # insertion order so we can be sure that in the mapping
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
195 # constructed below, clusters will get renumbered in
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
196 # the order they will be reported later.
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
197 if count >= args.min_cluster_size:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
198 eff_ids[item] = str(eff_id)
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
199 eff_id += 1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
200 else:
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
201 eff_ids[item] = "-"
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
202
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
203 # build and write the cluster assignment report
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
204 # with remapped cluster ids
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
205 cluster_assignments = []
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
206 for name, cluster_ids in zip(col_names, cut_result):
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
207 cluster_assignments.append(
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
208 [name]
1
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
209 + [
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
210 eff_ids[c]
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
211 for c, eff_ids in zip(cluster_ids, effective_cluster_ids)
c0b01c55a0e0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit 65b5c6f177478883ce664aeb6f27d0bec7155fdc
iuc
parents: 0
diff changeset
212 ]
0
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
213 )
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
214 with open(args.out_prefix + '.cluster_assignments.tsv', 'w') as o:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
215 print("\t".join(header_cols), file=o)
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
216 for ass in cluster_assignments:
8192b416f945 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clustering_from_distmat/ commit a34052b87a2d05cabed5001c50f1bb10e74f97ee
iuc
parents:
diff changeset
217 print("\t".join(ass), file=o)