comparison env/lib/python3.9/site-packages/networkx/algorithms/connectivity/kcomponents.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 Moody and White algorithm for k-components
3 """
4 from collections import defaultdict
5 from itertools import combinations
6 from operator import itemgetter
7
8 import networkx as nx
9 from networkx.utils import not_implemented_for
10
11 # Define the default maximum flow function.
12 from networkx.algorithms.flow import edmonds_karp
13
14 default_flow_func = edmonds_karp
15
16 __all__ = ["k_components"]
17
18
19 @not_implemented_for("directed")
20 def k_components(G, flow_func=None):
21 r"""Returns the k-component structure of a graph G.
22
23 A `k`-component is a maximal subgraph of a graph G that has, at least,
24 node connectivity `k`: we need to remove at least `k` nodes to break it
25 into more components. `k`-components have an inherent hierarchical
26 structure because they are nested in terms of connectivity: a connected
27 graph can contain several 2-components, each of which can contain
28 one or more 3-components, and so forth.
29
30 Parameters
31 ----------
32 G : NetworkX graph
33
34 flow_func : function
35 Function to perform the underlying flow computations. Default value
36 :meth:`edmonds_karp`. This function performs better in sparse graphs with
37 right tailed degree distributions. :meth:`shortest_augmenting_path` will
38 perform better in denser graphs.
39
40 Returns
41 -------
42 k_components : dict
43 Dictionary with all connectivity levels `k` in the input Graph as keys
44 and a list of sets of nodes that form a k-component of level `k` as
45 values.
46
47 Raises
48 ------
49 NetworkXNotImplemented
50 If the input graph is directed.
51
52 Examples
53 --------
54 >>> # Petersen graph has 10 nodes and it is triconnected, thus all
55 >>> # nodes are in a single component on all three connectivity levels
56 >>> G = nx.petersen_graph()
57 >>> k_components = nx.k_components(G)
58
59 Notes
60 -----
61 Moody and White [1]_ (appendix A) provide an algorithm for identifying
62 k-components in a graph, which is based on Kanevsky's algorithm [2]_
63 for finding all minimum-size node cut-sets of a graph (implemented in
64 :meth:`all_node_cuts` function):
65
66 1. Compute node connectivity, k, of the input graph G.
67
68 2. Identify all k-cutsets at the current level of connectivity using
69 Kanevsky's algorithm.
70
71 3. Generate new graph components based on the removal of
72 these cutsets. Nodes in a cutset belong to both sides
73 of the induced cut.
74
75 4. If the graph is neither complete nor trivial, return to 1;
76 else end.
77
78 This implementation also uses some heuristics (see [3]_ for details)
79 to speed up the computation.
80
81 See also
82 --------
83 node_connectivity
84 all_node_cuts
85 biconnected_components : special case of this function when k=2
86 k_edge_components : similar to this function, but uses edge-connectivity
87 instead of node-connectivity
88
89 References
90 ----------
91 .. [1] Moody, J. and D. White (2003). Social cohesion and embeddedness:
92 A hierarchical conception of social groups.
93 American Sociological Review 68(1), 103--28.
94 http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf
95
96 .. [2] Kanevsky, A. (1993). Finding all minimum-size separating vertex
97 sets in a graph. Networks 23(6), 533--541.
98 http://onlinelibrary.wiley.com/doi/10.1002/net.3230230604/abstract
99
100 .. [3] Torrents, J. and F. Ferraro (2015). Structural Cohesion:
101 Visualization and Heuristics for Fast Computation.
102 https://arxiv.org/pdf/1503.04476v1
103
104 """
105 # Dictionary with connectivity level (k) as keys and a list of
106 # sets of nodes that form a k-component as values. Note that
107 # k-compoents can overlap (but only k - 1 nodes).
108 k_components = defaultdict(list)
109 # Define default flow function
110 if flow_func is None:
111 flow_func = default_flow_func
112 # Bicomponents as a base to check for higher order k-components
113 for component in nx.connected_components(G):
114 # isolated nodes have connectivity 0
115 comp = set(component)
116 if len(comp) > 1:
117 k_components[1].append(comp)
118 bicomponents = [G.subgraph(c) for c in nx.biconnected_components(G)]
119 for bicomponent in bicomponents:
120 bicomp = set(bicomponent)
121 # avoid considering dyads as bicomponents
122 if len(bicomp) > 2:
123 k_components[2].append(bicomp)
124 for B in bicomponents:
125 if len(B) <= 2:
126 continue
127 k = nx.node_connectivity(B, flow_func=flow_func)
128 if k > 2:
129 k_components[k].append(set(B))
130 # Perform cuts in a DFS like order.
131 cuts = list(nx.all_node_cuts(B, k=k, flow_func=flow_func))
132 stack = [(k, _generate_partition(B, cuts, k))]
133 while stack:
134 (parent_k, partition) = stack[-1]
135 try:
136 nodes = next(partition)
137 C = B.subgraph(nodes)
138 this_k = nx.node_connectivity(C, flow_func=flow_func)
139 if this_k > parent_k and this_k > 2:
140 k_components[this_k].append(set(C))
141 cuts = list(nx.all_node_cuts(C, k=this_k, flow_func=flow_func))
142 if cuts:
143 stack.append((this_k, _generate_partition(C, cuts, this_k)))
144 except StopIteration:
145 stack.pop()
146
147 # This is necessary because k-components may only be reported at their
148 # maximum k level. But we want to return a dictionary in which keys are
149 # connectivity levels and values list of sets of components, without
150 # skipping any connectivity level. Also, it's possible that subsets of
151 # an already detected k-component appear at a level k. Checking for this
152 # in the while loop above penalizes the common case. Thus we also have to
153 # _consolidate all connectivity levels in _reconstruct_k_components.
154 return _reconstruct_k_components(k_components)
155
156
157 def _consolidate(sets, k):
158 """Merge sets that share k or more elements.
159
160 See: http://rosettacode.org/wiki/Set_consolidation
161
162 The iterative python implementation posted there is
163 faster than this because of the overhead of building a
164 Graph and calling nx.connected_components, but it's not
165 clear for us if we can use it in NetworkX because there
166 is no licence for the code.
167
168 """
169 G = nx.Graph()
170 nodes = {i: s for i, s in enumerate(sets)}
171 G.add_nodes_from(nodes)
172 G.add_edges_from(
173 (u, v) for u, v in combinations(nodes, 2) if len(nodes[u] & nodes[v]) >= k
174 )
175 for component in nx.connected_components(G):
176 yield set.union(*[nodes[n] for n in component])
177
178
179 def _generate_partition(G, cuts, k):
180 def has_nbrs_in_partition(G, node, partition):
181 for n in G[node]:
182 if n in partition:
183 return True
184 return False
185
186 components = []
187 nodes = {n for n, d in G.degree() if d > k} - {n for cut in cuts for n in cut}
188 H = G.subgraph(nodes)
189 for cc in nx.connected_components(H):
190 component = set(cc)
191 for cut in cuts:
192 for node in cut:
193 if has_nbrs_in_partition(G, node, cc):
194 component.add(node)
195 if len(component) < G.order():
196 components.append(component)
197 yield from _consolidate(components, k + 1)
198
199
200 def _reconstruct_k_components(k_comps):
201 result = dict()
202 max_k = max(k_comps)
203 for k in reversed(range(1, max_k + 1)):
204 if k == max_k:
205 result[k] = list(_consolidate(k_comps[k], k))
206 elif k not in k_comps:
207 result[k] = list(_consolidate(result[k + 1], k))
208 else:
209 nodes_at_k = set.union(*k_comps[k])
210 to_add = [c for c in result[k + 1] if any(n not in nodes_at_k for n in c)]
211 if to_add:
212 result[k] = list(_consolidate(k_comps[k] + to_add, k))
213 else:
214 result[k] = list(_consolidate(k_comps[k], k))
215 return result
216
217
218 def build_k_number_dict(kcomps):
219 result = {}
220 for k, comps in sorted(kcomps.items(), key=itemgetter(0)):
221 for comp in comps:
222 for node in comp:
223 result[node] = k
224 return result