Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/community/lukes.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 """Lukes Algorithm for exact optimal weighted tree partitioning.""" | |
| 2 | |
| 3 from copy import deepcopy | |
| 4 from functools import lru_cache | |
| 5 from random import choice | |
| 6 | |
| 7 import networkx as nx | |
| 8 from networkx.utils import not_implemented_for | |
| 9 | |
| 10 __all__ = ["lukes_partitioning"] | |
| 11 | |
| 12 D_EDGE_W = "weight" | |
| 13 D_EDGE_VALUE = 1.0 | |
| 14 D_NODE_W = "weight" | |
| 15 D_NODE_VALUE = 1 | |
| 16 PKEY = "partitions" | |
| 17 CLUSTER_EVAL_CACHE_SIZE = 2048 | |
| 18 | |
| 19 | |
| 20 def _split_n_from(n: int, min_size_of_first_part: int): | |
| 21 # splits j in two parts of which the first is at least | |
| 22 # the second argument | |
| 23 assert n >= min_size_of_first_part | |
| 24 for p1 in range(min_size_of_first_part, n + 1): | |
| 25 yield p1, n - p1 | |
| 26 | |
| 27 | |
| 28 def lukes_partitioning(G, max_size: int, node_weight=None, edge_weight=None) -> list: | |
| 29 | |
| 30 """Optimal partitioning of a weighted tree using the Lukes algorithm. | |
| 31 | |
| 32 This algorithm partitions a connected, acyclic graph featuring integer | |
| 33 node weights and float edge weights. The resulting clusters are such | |
| 34 that the total weight of the nodes in each cluster does not exceed | |
| 35 max_size and that the weight of the edges that are cut by the partition | |
| 36 is minimum. The algorithm is based on LUKES[1]. | |
| 37 | |
| 38 Parameters | |
| 39 ---------- | |
| 40 G : graph | |
| 41 | |
| 42 max_size : int | |
| 43 Maximum weight a partition can have in terms of sum of | |
| 44 node_weight for all nodes in the partition | |
| 45 | |
| 46 edge_weight : key | |
| 47 Edge data key to use as weight. If None, the weights are all | |
| 48 set to one. | |
| 49 | |
| 50 node_weight : key | |
| 51 Node data key to use as weight. If None, the weights are all | |
| 52 set to one. The data must be int. | |
| 53 | |
| 54 Returns | |
| 55 ------- | |
| 56 partition : list | |
| 57 A list of sets of nodes representing the clusters of the | |
| 58 partition. | |
| 59 | |
| 60 Raises | |
| 61 ------- | |
| 62 NotATree | |
| 63 If G is not a tree. | |
| 64 TypeError | |
| 65 If any of the values of node_weight is not int. | |
| 66 | |
| 67 References | |
| 68 ---------- | |
| 69 .. Lukes, J. A. (1974). | |
| 70 "Efficient Algorithm for the Partitioning of Trees." | |
| 71 IBM Journal of Research and Development, 18(3), 217–224. | |
| 72 | |
| 73 """ | |
| 74 # First sanity check and tree preparation | |
| 75 if not nx.is_tree(G): | |
| 76 raise nx.NotATree("lukes_partitioning works only on trees") | |
| 77 else: | |
| 78 if nx.is_directed(G): | |
| 79 root = [n for n, d in G.in_degree() if d == 0] | |
| 80 assert len(root) == 1 | |
| 81 root = root[0] | |
| 82 t_G = deepcopy(G) | |
| 83 else: | |
| 84 root = choice(list(G.nodes)) | |
| 85 # this has the desirable side effect of not inheriting attributes | |
| 86 t_G = nx.dfs_tree(G, root) | |
| 87 | |
| 88 # Since we do not want to screw up the original graph, | |
| 89 # if we have a blank attribute, we make a deepcopy | |
| 90 if edge_weight is None or node_weight is None: | |
| 91 safe_G = deepcopy(G) | |
| 92 if edge_weight is None: | |
| 93 nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W) | |
| 94 edge_weight = D_EDGE_W | |
| 95 if node_weight is None: | |
| 96 nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W) | |
| 97 node_weight = D_NODE_W | |
| 98 else: | |
| 99 safe_G = G | |
| 100 | |
| 101 # Second sanity check | |
| 102 # The values of node_weight MUST BE int. | |
| 103 # I cannot see any room for duck typing without incurring serious | |
| 104 # danger of subtle bugs. | |
| 105 all_n_attr = nx.get_node_attributes(safe_G, node_weight).values() | |
| 106 for x in all_n_attr: | |
| 107 if not isinstance(x, int): | |
| 108 raise TypeError( | |
| 109 "lukes_partitioning needs integer " | |
| 110 f"values for node_weight ({node_weight})" | |
| 111 ) | |
| 112 | |
| 113 # SUBROUTINES ----------------------- | |
| 114 # these functions are defined here for two reasons: | |
| 115 # - brevity: we can leverage global "safe_G" | |
| 116 # - caching: signatures are hashable | |
| 117 | |
| 118 @not_implemented_for("undirected") | |
| 119 # this is intended to be called only on t_G | |
| 120 def _leaves(gr): | |
| 121 for x in gr.nodes: | |
| 122 if not nx.descendants(gr, x): | |
| 123 yield x | |
| 124 | |
| 125 @not_implemented_for("undirected") | |
| 126 def _a_parent_of_leaves_only(gr): | |
| 127 tleaves = set(_leaves(gr)) | |
| 128 for n in set(gr.nodes) - tleaves: | |
| 129 if all([x in tleaves for x in nx.descendants(gr, n)]): | |
| 130 return n | |
| 131 | |
| 132 @lru_cache(CLUSTER_EVAL_CACHE_SIZE) | |
| 133 def _value_of_cluster(cluster: frozenset): | |
| 134 valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster] | |
| 135 return sum([safe_G.edges[e][edge_weight] for e in valid_edges]) | |
| 136 | |
| 137 def _value_of_partition(partition: list): | |
| 138 return sum([_value_of_cluster(frozenset(c)) for c in partition]) | |
| 139 | |
| 140 @lru_cache(CLUSTER_EVAL_CACHE_SIZE) | |
| 141 def _weight_of_cluster(cluster: frozenset): | |
| 142 return sum([safe_G.nodes[n][node_weight] for n in cluster]) | |
| 143 | |
| 144 def _pivot(partition: list, node): | |
| 145 ccx = [c for c in partition if node in c] | |
| 146 assert len(ccx) == 1 | |
| 147 return ccx[0] | |
| 148 | |
| 149 def _concatenate_or_merge(partition_1: list, partition_2: list, x, i, ref_weigth): | |
| 150 | |
| 151 ccx = _pivot(partition_1, x) | |
| 152 cci = _pivot(partition_2, i) | |
| 153 merged_xi = ccx.union(cci) | |
| 154 | |
| 155 # We first check if we can do the merge. | |
| 156 # If so, we do the actual calculations, otherwise we concatenate | |
| 157 if _weight_of_cluster(frozenset(merged_xi)) <= ref_weigth: | |
| 158 cp1 = list(filter(lambda x: x != ccx, partition_1)) | |
| 159 cp2 = list(filter(lambda x: x != cci, partition_2)) | |
| 160 | |
| 161 option_2 = [merged_xi] + cp1 + cp2 | |
| 162 return option_2, _value_of_partition(option_2) | |
| 163 else: | |
| 164 option_1 = partition_1 + partition_2 | |
| 165 return option_1, _value_of_partition(option_1) | |
| 166 | |
| 167 # INITIALIZATION ----------------------- | |
| 168 leaves = set(_leaves(t_G)) | |
| 169 for lv in leaves: | |
| 170 t_G.nodes[lv][PKEY] = dict() | |
| 171 slot = safe_G.nodes[lv][node_weight] | |
| 172 t_G.nodes[lv][PKEY][slot] = [{lv}] | |
| 173 t_G.nodes[lv][PKEY][0] = [{lv}] | |
| 174 | |
| 175 for inner in [x for x in t_G.nodes if x not in leaves]: | |
| 176 t_G.nodes[inner][PKEY] = dict() | |
| 177 slot = safe_G.nodes[inner][node_weight] | |
| 178 t_G.nodes[inner][PKEY][slot] = [{inner}] | |
| 179 | |
| 180 # CORE ALGORITHM ----------------------- | |
| 181 while True: | |
| 182 x_node = _a_parent_of_leaves_only(t_G) | |
| 183 weight_of_x = safe_G.nodes[x_node][node_weight] | |
| 184 best_value = 0 | |
| 185 best_partition = None | |
| 186 bp_buffer = dict() | |
| 187 x_descendants = nx.descendants(t_G, x_node) | |
| 188 for i_node in x_descendants: | |
| 189 for j in range(weight_of_x, max_size + 1): | |
| 190 for a, b in _split_n_from(j, weight_of_x): | |
| 191 if ( | |
| 192 a not in t_G.nodes[x_node][PKEY].keys() | |
| 193 or b not in t_G.nodes[i_node][PKEY].keys() | |
| 194 ): | |
| 195 # it's not possible to form this particular weight sum | |
| 196 continue | |
| 197 | |
| 198 part1 = t_G.nodes[x_node][PKEY][a] | |
| 199 part2 = t_G.nodes[i_node][PKEY][b] | |
| 200 part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j) | |
| 201 | |
| 202 if j not in bp_buffer.keys() or bp_buffer[j][1] < value: | |
| 203 # we annotate in the buffer the best partition for j | |
| 204 bp_buffer[j] = part, value | |
| 205 | |
| 206 # we also keep track of the overall best partition | |
| 207 if best_value <= value: | |
| 208 best_value = value | |
| 209 best_partition = part | |
| 210 | |
| 211 # as illustrated in Lukes, once we finished a child, we can | |
| 212 # discharge the partitions we found into the graph | |
| 213 # (the key phrase is make all x == x') | |
| 214 # so that they are used by the subsequent children | |
| 215 for w, (best_part_for_vl, vl) in bp_buffer.items(): | |
| 216 t_G.nodes[x_node][PKEY][w] = best_part_for_vl | |
| 217 bp_buffer.clear() | |
| 218 | |
| 219 # the absolute best partition for this node | |
| 220 # across all weights has to be stored at 0 | |
| 221 t_G.nodes[x_node][PKEY][0] = best_partition | |
| 222 t_G.remove_nodes_from(x_descendants) | |
| 223 | |
| 224 if x_node == root: | |
| 225 # the 0-labeled partition of root | |
| 226 # is the optimal one for the whole tree | |
| 227 return t_G.nodes[root][PKEY][0] |
