Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/community/lukes.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Lukes Algorithm for exact optimal weighted tree partitioning.""" | |
2 | |
3 from copy import deepcopy | |
4 from functools import lru_cache | |
5 from random import choice | |
6 | |
7 import networkx as nx | |
8 from networkx.utils import not_implemented_for | |
9 | |
10 __all__ = ["lukes_partitioning"] | |
11 | |
12 D_EDGE_W = "weight" | |
13 D_EDGE_VALUE = 1.0 | |
14 D_NODE_W = "weight" | |
15 D_NODE_VALUE = 1 | |
16 PKEY = "partitions" | |
17 CLUSTER_EVAL_CACHE_SIZE = 2048 | |
18 | |
19 | |
20 def _split_n_from(n: int, min_size_of_first_part: int): | |
21 # splits j in two parts of which the first is at least | |
22 # the second argument | |
23 assert n >= min_size_of_first_part | |
24 for p1 in range(min_size_of_first_part, n + 1): | |
25 yield p1, n - p1 | |
26 | |
27 | |
28 def lukes_partitioning(G, max_size: int, node_weight=None, edge_weight=None) -> list: | |
29 | |
30 """Optimal partitioning of a weighted tree using the Lukes algorithm. | |
31 | |
32 This algorithm partitions a connected, acyclic graph featuring integer | |
33 node weights and float edge weights. The resulting clusters are such | |
34 that the total weight of the nodes in each cluster does not exceed | |
35 max_size and that the weight of the edges that are cut by the partition | |
36 is minimum. The algorithm is based on LUKES[1]. | |
37 | |
38 Parameters | |
39 ---------- | |
40 G : graph | |
41 | |
42 max_size : int | |
43 Maximum weight a partition can have in terms of sum of | |
44 node_weight for all nodes in the partition | |
45 | |
46 edge_weight : key | |
47 Edge data key to use as weight. If None, the weights are all | |
48 set to one. | |
49 | |
50 node_weight : key | |
51 Node data key to use as weight. If None, the weights are all | |
52 set to one. The data must be int. | |
53 | |
54 Returns | |
55 ------- | |
56 partition : list | |
57 A list of sets of nodes representing the clusters of the | |
58 partition. | |
59 | |
60 Raises | |
61 ------- | |
62 NotATree | |
63 If G is not a tree. | |
64 TypeError | |
65 If any of the values of node_weight is not int. | |
66 | |
67 References | |
68 ---------- | |
69 .. Lukes, J. A. (1974). | |
70 "Efficient Algorithm for the Partitioning of Trees." | |
71 IBM Journal of Research and Development, 18(3), 217–224. | |
72 | |
73 """ | |
74 # First sanity check and tree preparation | |
75 if not nx.is_tree(G): | |
76 raise nx.NotATree("lukes_partitioning works only on trees") | |
77 else: | |
78 if nx.is_directed(G): | |
79 root = [n for n, d in G.in_degree() if d == 0] | |
80 assert len(root) == 1 | |
81 root = root[0] | |
82 t_G = deepcopy(G) | |
83 else: | |
84 root = choice(list(G.nodes)) | |
85 # this has the desirable side effect of not inheriting attributes | |
86 t_G = nx.dfs_tree(G, root) | |
87 | |
88 # Since we do not want to screw up the original graph, | |
89 # if we have a blank attribute, we make a deepcopy | |
90 if edge_weight is None or node_weight is None: | |
91 safe_G = deepcopy(G) | |
92 if edge_weight is None: | |
93 nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W) | |
94 edge_weight = D_EDGE_W | |
95 if node_weight is None: | |
96 nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W) | |
97 node_weight = D_NODE_W | |
98 else: | |
99 safe_G = G | |
100 | |
101 # Second sanity check | |
102 # The values of node_weight MUST BE int. | |
103 # I cannot see any room for duck typing without incurring serious | |
104 # danger of subtle bugs. | |
105 all_n_attr = nx.get_node_attributes(safe_G, node_weight).values() | |
106 for x in all_n_attr: | |
107 if not isinstance(x, int): | |
108 raise TypeError( | |
109 "lukes_partitioning needs integer " | |
110 f"values for node_weight ({node_weight})" | |
111 ) | |
112 | |
113 # SUBROUTINES ----------------------- | |
114 # these functions are defined here for two reasons: | |
115 # - brevity: we can leverage global "safe_G" | |
116 # - caching: signatures are hashable | |
117 | |
118 @not_implemented_for("undirected") | |
119 # this is intended to be called only on t_G | |
120 def _leaves(gr): | |
121 for x in gr.nodes: | |
122 if not nx.descendants(gr, x): | |
123 yield x | |
124 | |
125 @not_implemented_for("undirected") | |
126 def _a_parent_of_leaves_only(gr): | |
127 tleaves = set(_leaves(gr)) | |
128 for n in set(gr.nodes) - tleaves: | |
129 if all([x in tleaves for x in nx.descendants(gr, n)]): | |
130 return n | |
131 | |
132 @lru_cache(CLUSTER_EVAL_CACHE_SIZE) | |
133 def _value_of_cluster(cluster: frozenset): | |
134 valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster] | |
135 return sum([safe_G.edges[e][edge_weight] for e in valid_edges]) | |
136 | |
137 def _value_of_partition(partition: list): | |
138 return sum([_value_of_cluster(frozenset(c)) for c in partition]) | |
139 | |
140 @lru_cache(CLUSTER_EVAL_CACHE_SIZE) | |
141 def _weight_of_cluster(cluster: frozenset): | |
142 return sum([safe_G.nodes[n][node_weight] for n in cluster]) | |
143 | |
144 def _pivot(partition: list, node): | |
145 ccx = [c for c in partition if node in c] | |
146 assert len(ccx) == 1 | |
147 return ccx[0] | |
148 | |
149 def _concatenate_or_merge(partition_1: list, partition_2: list, x, i, ref_weigth): | |
150 | |
151 ccx = _pivot(partition_1, x) | |
152 cci = _pivot(partition_2, i) | |
153 merged_xi = ccx.union(cci) | |
154 | |
155 # We first check if we can do the merge. | |
156 # If so, we do the actual calculations, otherwise we concatenate | |
157 if _weight_of_cluster(frozenset(merged_xi)) <= ref_weigth: | |
158 cp1 = list(filter(lambda x: x != ccx, partition_1)) | |
159 cp2 = list(filter(lambda x: x != cci, partition_2)) | |
160 | |
161 option_2 = [merged_xi] + cp1 + cp2 | |
162 return option_2, _value_of_partition(option_2) | |
163 else: | |
164 option_1 = partition_1 + partition_2 | |
165 return option_1, _value_of_partition(option_1) | |
166 | |
167 # INITIALIZATION ----------------------- | |
168 leaves = set(_leaves(t_G)) | |
169 for lv in leaves: | |
170 t_G.nodes[lv][PKEY] = dict() | |
171 slot = safe_G.nodes[lv][node_weight] | |
172 t_G.nodes[lv][PKEY][slot] = [{lv}] | |
173 t_G.nodes[lv][PKEY][0] = [{lv}] | |
174 | |
175 for inner in [x for x in t_G.nodes if x not in leaves]: | |
176 t_G.nodes[inner][PKEY] = dict() | |
177 slot = safe_G.nodes[inner][node_weight] | |
178 t_G.nodes[inner][PKEY][slot] = [{inner}] | |
179 | |
180 # CORE ALGORITHM ----------------------- | |
181 while True: | |
182 x_node = _a_parent_of_leaves_only(t_G) | |
183 weight_of_x = safe_G.nodes[x_node][node_weight] | |
184 best_value = 0 | |
185 best_partition = None | |
186 bp_buffer = dict() | |
187 x_descendants = nx.descendants(t_G, x_node) | |
188 for i_node in x_descendants: | |
189 for j in range(weight_of_x, max_size + 1): | |
190 for a, b in _split_n_from(j, weight_of_x): | |
191 if ( | |
192 a not in t_G.nodes[x_node][PKEY].keys() | |
193 or b not in t_G.nodes[i_node][PKEY].keys() | |
194 ): | |
195 # it's not possible to form this particular weight sum | |
196 continue | |
197 | |
198 part1 = t_G.nodes[x_node][PKEY][a] | |
199 part2 = t_G.nodes[i_node][PKEY][b] | |
200 part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j) | |
201 | |
202 if j not in bp_buffer.keys() or bp_buffer[j][1] < value: | |
203 # we annotate in the buffer the best partition for j | |
204 bp_buffer[j] = part, value | |
205 | |
206 # we also keep track of the overall best partition | |
207 if best_value <= value: | |
208 best_value = value | |
209 best_partition = part | |
210 | |
211 # as illustrated in Lukes, once we finished a child, we can | |
212 # discharge the partitions we found into the graph | |
213 # (the key phrase is make all x == x') | |
214 # so that they are used by the subsequent children | |
215 for w, (best_part_for_vl, vl) in bp_buffer.items(): | |
216 t_G.nodes[x_node][PKEY][w] = best_part_for_vl | |
217 bp_buffer.clear() | |
218 | |
219 # the absolute best partition for this node | |
220 # across all weights has to be stored at 0 | |
221 t_G.nodes[x_node][PKEY][0] = best_partition | |
222 t_G.remove_nodes_from(x_descendants) | |
223 | |
224 if x_node == root: | |
225 # the 0-labeled partition of root | |
226 # is the optimal one for the whole tree | |
227 return t_G.nodes[root][PKEY][0] |