comparison env/lib/python3.9/site-packages/networkx/algorithms/community/lukes.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Lukes Algorithm for exact optimal weighted tree partitioning."""
2
3 from copy import deepcopy
4 from functools import lru_cache
5 from random import choice
6
7 import networkx as nx
8 from networkx.utils import not_implemented_for
9
10 __all__ = ["lukes_partitioning"]
11
12 D_EDGE_W = "weight"
13 D_EDGE_VALUE = 1.0
14 D_NODE_W = "weight"
15 D_NODE_VALUE = 1
16 PKEY = "partitions"
17 CLUSTER_EVAL_CACHE_SIZE = 2048
18
19
20 def _split_n_from(n: int, min_size_of_first_part: int):
21 # splits j in two parts of which the first is at least
22 # the second argument
23 assert n >= min_size_of_first_part
24 for p1 in range(min_size_of_first_part, n + 1):
25 yield p1, n - p1
26
27
28 def lukes_partitioning(G, max_size: int, node_weight=None, edge_weight=None) -> list:
29
30 """Optimal partitioning of a weighted tree using the Lukes algorithm.
31
32 This algorithm partitions a connected, acyclic graph featuring integer
33 node weights and float edge weights. The resulting clusters are such
34 that the total weight of the nodes in each cluster does not exceed
35 max_size and that the weight of the edges that are cut by the partition
36 is minimum. The algorithm is based on LUKES[1].
37
38 Parameters
39 ----------
40 G : graph
41
42 max_size : int
43 Maximum weight a partition can have in terms of sum of
44 node_weight for all nodes in the partition
45
46 edge_weight : key
47 Edge data key to use as weight. If None, the weights are all
48 set to one.
49
50 node_weight : key
51 Node data key to use as weight. If None, the weights are all
52 set to one. The data must be int.
53
54 Returns
55 -------
56 partition : list
57 A list of sets of nodes representing the clusters of the
58 partition.
59
60 Raises
61 -------
62 NotATree
63 If G is not a tree.
64 TypeError
65 If any of the values of node_weight is not int.
66
67 References
68 ----------
69 .. Lukes, J. A. (1974).
70 "Efficient Algorithm for the Partitioning of Trees."
71 IBM Journal of Research and Development, 18(3), 217–224.
72
73 """
74 # First sanity check and tree preparation
75 if not nx.is_tree(G):
76 raise nx.NotATree("lukes_partitioning works only on trees")
77 else:
78 if nx.is_directed(G):
79 root = [n for n, d in G.in_degree() if d == 0]
80 assert len(root) == 1
81 root = root[0]
82 t_G = deepcopy(G)
83 else:
84 root = choice(list(G.nodes))
85 # this has the desirable side effect of not inheriting attributes
86 t_G = nx.dfs_tree(G, root)
87
88 # Since we do not want to screw up the original graph,
89 # if we have a blank attribute, we make a deepcopy
90 if edge_weight is None or node_weight is None:
91 safe_G = deepcopy(G)
92 if edge_weight is None:
93 nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W)
94 edge_weight = D_EDGE_W
95 if node_weight is None:
96 nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W)
97 node_weight = D_NODE_W
98 else:
99 safe_G = G
100
101 # Second sanity check
102 # The values of node_weight MUST BE int.
103 # I cannot see any room for duck typing without incurring serious
104 # danger of subtle bugs.
105 all_n_attr = nx.get_node_attributes(safe_G, node_weight).values()
106 for x in all_n_attr:
107 if not isinstance(x, int):
108 raise TypeError(
109 "lukes_partitioning needs integer "
110 f"values for node_weight ({node_weight})"
111 )
112
113 # SUBROUTINES -----------------------
114 # these functions are defined here for two reasons:
115 # - brevity: we can leverage global "safe_G"
116 # - caching: signatures are hashable
117
118 @not_implemented_for("undirected")
119 # this is intended to be called only on t_G
120 def _leaves(gr):
121 for x in gr.nodes:
122 if not nx.descendants(gr, x):
123 yield x
124
125 @not_implemented_for("undirected")
126 def _a_parent_of_leaves_only(gr):
127 tleaves = set(_leaves(gr))
128 for n in set(gr.nodes) - tleaves:
129 if all([x in tleaves for x in nx.descendants(gr, n)]):
130 return n
131
132 @lru_cache(CLUSTER_EVAL_CACHE_SIZE)
133 def _value_of_cluster(cluster: frozenset):
134 valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster]
135 return sum([safe_G.edges[e][edge_weight] for e in valid_edges])
136
137 def _value_of_partition(partition: list):
138 return sum([_value_of_cluster(frozenset(c)) for c in partition])
139
140 @lru_cache(CLUSTER_EVAL_CACHE_SIZE)
141 def _weight_of_cluster(cluster: frozenset):
142 return sum([safe_G.nodes[n][node_weight] for n in cluster])
143
144 def _pivot(partition: list, node):
145 ccx = [c for c in partition if node in c]
146 assert len(ccx) == 1
147 return ccx[0]
148
149 def _concatenate_or_merge(partition_1: list, partition_2: list, x, i, ref_weigth):
150
151 ccx = _pivot(partition_1, x)
152 cci = _pivot(partition_2, i)
153 merged_xi = ccx.union(cci)
154
155 # We first check if we can do the merge.
156 # If so, we do the actual calculations, otherwise we concatenate
157 if _weight_of_cluster(frozenset(merged_xi)) <= ref_weigth:
158 cp1 = list(filter(lambda x: x != ccx, partition_1))
159 cp2 = list(filter(lambda x: x != cci, partition_2))
160
161 option_2 = [merged_xi] + cp1 + cp2
162 return option_2, _value_of_partition(option_2)
163 else:
164 option_1 = partition_1 + partition_2
165 return option_1, _value_of_partition(option_1)
166
167 # INITIALIZATION -----------------------
168 leaves = set(_leaves(t_G))
169 for lv in leaves:
170 t_G.nodes[lv][PKEY] = dict()
171 slot = safe_G.nodes[lv][node_weight]
172 t_G.nodes[lv][PKEY][slot] = [{lv}]
173 t_G.nodes[lv][PKEY][0] = [{lv}]
174
175 for inner in [x for x in t_G.nodes if x not in leaves]:
176 t_G.nodes[inner][PKEY] = dict()
177 slot = safe_G.nodes[inner][node_weight]
178 t_G.nodes[inner][PKEY][slot] = [{inner}]
179
180 # CORE ALGORITHM -----------------------
181 while True:
182 x_node = _a_parent_of_leaves_only(t_G)
183 weight_of_x = safe_G.nodes[x_node][node_weight]
184 best_value = 0
185 best_partition = None
186 bp_buffer = dict()
187 x_descendants = nx.descendants(t_G, x_node)
188 for i_node in x_descendants:
189 for j in range(weight_of_x, max_size + 1):
190 for a, b in _split_n_from(j, weight_of_x):
191 if (
192 a not in t_G.nodes[x_node][PKEY].keys()
193 or b not in t_G.nodes[i_node][PKEY].keys()
194 ):
195 # it's not possible to form this particular weight sum
196 continue
197
198 part1 = t_G.nodes[x_node][PKEY][a]
199 part2 = t_G.nodes[i_node][PKEY][b]
200 part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j)
201
202 if j not in bp_buffer.keys() or bp_buffer[j][1] < value:
203 # we annotate in the buffer the best partition for j
204 bp_buffer[j] = part, value
205
206 # we also keep track of the overall best partition
207 if best_value <= value:
208 best_value = value
209 best_partition = part
210
211 # as illustrated in Lukes, once we finished a child, we can
212 # discharge the partitions we found into the graph
213 # (the key phrase is make all x == x')
214 # so that they are used by the subsequent children
215 for w, (best_part_for_vl, vl) in bp_buffer.items():
216 t_G.nodes[x_node][PKEY][w] = best_part_for_vl
217 bp_buffer.clear()
218
219 # the absolute best partition for this node
220 # across all weights has to be stored at 0
221 t_G.nodes[x_node][PKEY][0] = best_partition
222 t_G.remove_nodes_from(x_descendants)
223
224 if x_node == root:
225 # the 0-labeled partition of root
226 # is the optimal one for the whole tree
227 return t_G.nodes[root][PKEY][0]