### annotate env/lib/python3.9/site-packages/networkx/algorithms/approximation/kcomponents.py @ 0:4f3585e2f14bdraftdefaulttip

author shellac Mon, 22 Mar 2021 18:12:50 +0000
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
shellac
parents:
diff changeset
1 """ Fast approximation for k-component structure
shellac
parents:
diff changeset
2 """
shellac
parents:
diff changeset
3 import itertools
shellac
parents:
diff changeset
4 from collections import defaultdict
shellac
parents:
diff changeset
5 from collections.abc import Mapping
shellac
parents:
diff changeset
6
shellac
parents:
diff changeset
7 import networkx as nx
shellac
parents:
diff changeset
8 from networkx.exception import NetworkXError
shellac
parents:
diff changeset
9 from networkx.utils import not_implemented_for
shellac
parents:
diff changeset
10
shellac
parents:
diff changeset
11 from networkx.algorithms.approximation import local_node_connectivity
shellac
parents:
diff changeset
12
shellac
parents:
diff changeset
13
shellac
parents:
diff changeset
14 __all__ = ["k_components"]
shellac
parents:
diff changeset
15
shellac
parents:
diff changeset
16
shellac
parents:
diff changeset
17 not_implemented_for("directed")
shellac
parents:
diff changeset
18
shellac
parents:
diff changeset
19
shellac
parents:
diff changeset
20 def k_components(G, min_density=0.95):
shellac
parents:
diff changeset
21 r"""Returns the approximate k-component structure of a graph G.
shellac
parents:
diff changeset
22
shellac
parents:
diff changeset
23 A k-component is a maximal subgraph of a graph G that has, at least,
shellac
parents:
diff changeset
24 node connectivity k: we need to remove at least k nodes to break it
shellac
parents:
diff changeset
25 into more components. k-components have an inherent hierarchical
shellac
parents:
diff changeset
26 structure because they are nested in terms of connectivity: a connected
shellac
parents:
diff changeset
27 graph can contain several 2-components, each of which can contain
shellac
parents:
diff changeset
28 one or more 3-components, and so forth.
shellac
parents:
diff changeset
29
shellac
parents:
diff changeset
30 This implementation is based on the fast heuristics to approximate
shellac
parents:
diff changeset
31 the k-component structure of a graph [1]_. Which, in turn, it is based on
shellac
parents:
diff changeset
32 a fast approximation algorithm for finding good lower bounds of the number
shellac
parents:
diff changeset
33 of node independent paths between two nodes [2]_.
shellac
parents:
diff changeset
34
shellac
parents:
diff changeset
35 Parameters
shellac
parents:
diff changeset
36 ----------
shellac
parents:
diff changeset
37 G : NetworkX graph
shellac
parents:
diff changeset
38 Undirected graph
shellac
parents:
diff changeset
39
shellac
parents:
diff changeset
40 min_density : Float
shellac
parents:
diff changeset
41 Density relaxation threshold. Default value 0.95
shellac
parents:
diff changeset
42
shellac
parents:
diff changeset
43 Returns
shellac
parents:
diff changeset
44 -------
shellac
parents:
diff changeset
45 k_components : dict
shellac
parents:
diff changeset
46 Dictionary with connectivity level k as key and a list of
shellac
parents:
diff changeset
47 sets of nodes that form a k-component of level k as values.
shellac
parents:
diff changeset
48
shellac
parents:
diff changeset
49
shellac
parents:
diff changeset
50 Examples
shellac
parents:
diff changeset
51 --------
shellac
parents:
diff changeset
52 >>> # Petersen graph has 10 nodes and it is triconnected, thus all
shellac
parents:
diff changeset
53 >>> # nodes are in a single component on all three connectivity levels
shellac
parents:
diff changeset
54 >>> from networkx.algorithms import approximation as apxa
shellac
parents:
diff changeset
55 >>> G = nx.petersen_graph()
shellac
parents:
diff changeset
56 >>> k_components = apxa.k_components(G)
shellac
parents:
diff changeset
57
shellac
parents:
diff changeset
58 Notes
shellac
parents:
diff changeset
59 -----
shellac
parents:
diff changeset
60 The logic of the approximation algorithm for computing the k-component
shellac
parents:
diff changeset
61 structure [1]_ is based on repeatedly applying simple and fast algorithms
shellac
parents:
diff changeset
62 for k-cores and biconnected components in order to narrow down the
shellac
parents:
diff changeset
63 number of pairs of nodes over which we have to compute White and Newman's
shellac
parents:
diff changeset
64 approximation algorithm for finding node independent paths [2]_. More
shellac
parents:
diff changeset
65 formally, this algorithm is based on Whitney's theorem, which states
shellac
parents:
diff changeset
66 an inclusion relation among node connectivity, edge connectivity, and
shellac
parents:
diff changeset
67 minimum degree for any graph G. This theorem implies that every
shellac
parents:
diff changeset
68 k-component is nested inside a k-edge-component, which in turn,
shellac
parents:
diff changeset
69 is contained in a k-core. Thus, this algorithm computes node independent
shellac
parents:
diff changeset
70 paths among pairs of nodes in each biconnected part of each k-core,
shellac
parents:
diff changeset
71 and repeats this procedure for each k from 3 to the maximal core number
shellac
parents:
diff changeset
72 of a node in the input graph.
shellac
parents:
diff changeset
73
shellac
parents:
diff changeset
74 Because, in practice, many nodes of the core of level k inside a
shellac
parents:
diff changeset
75 bicomponent actually are part of a component of level k, the auxiliary
shellac
parents:
diff changeset
76 graph needed for the algorithm is likely to be very dense. Thus, we use
shellac
parents:
diff changeset
77 a complement graph data structure (see AntiGraph) to save memory.
shellac
parents:
diff changeset
78 AntiGraph only stores information of the edges that are *not* present
shellac
parents:
diff changeset
79 in the actual auxiliary graph. When applying algorithms to this
shellac
parents:
diff changeset
80 complement graph data structure, it behaves as if it were the dense
shellac
parents:
diff changeset
81 version.
shellac
parents:
diff changeset
82
shellac
parents:
diff changeset
shellac
parents:
diff changeset
84 --------
shellac
parents:
diff changeset
85 k_components
shellac
parents:
diff changeset
86
shellac
parents:
diff changeset
87 References
shellac
parents:
diff changeset
88 ----------
shellac
parents:
diff changeset
89 .. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion:
shellac
parents:
diff changeset
90 Visualization and Heuristics for Fast Computation.
shellac
parents:
diff changeset
91 https://arxiv.org/pdf/1503.04476v1
shellac
parents:
diff changeset
92
shellac
parents:
diff changeset
93 .. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for
shellac
parents:
diff changeset
94 Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035
shellac
parents:
diff changeset
95 http://eclectic.ss.uci.edu/~drwhite/working.pdf
shellac
parents:
diff changeset
96
shellac
parents:
diff changeset
97 .. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness:
shellac
parents:
diff changeset
98 A hierarchical conception of social groups.
shellac
parents:
diff changeset
99 American Sociological Review 68(1), 103--28.
shellac
parents:
diff changeset
100 http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf
shellac
parents:
diff changeset
101
shellac
parents:
diff changeset
102 """
shellac
parents:
diff changeset
103 # Dictionary with connectivity level (k) as keys and a list of
shellac
parents:
diff changeset
104 # sets of nodes that form a k-component as values
shellac
parents:
diff changeset
105 k_components = defaultdict(list)
shellac
parents:
diff changeset
106 # make a few functions local for speed
shellac
parents:
diff changeset
107 node_connectivity = local_node_connectivity
shellac
parents:
diff changeset
108 k_core = nx.k_core
shellac
parents:
diff changeset
109 core_number = nx.core_number
shellac
parents:
diff changeset
110 biconnected_components = nx.biconnected_components
shellac
parents:
diff changeset
111 density = nx.density
shellac
parents:
diff changeset
112 combinations = itertools.combinations
shellac
parents:
diff changeset
113 # Exact solution for k = {1,2}
shellac
parents:
diff changeset
114 # There is a linear time algorithm for triconnectivity, if we had an
shellac
parents:
diff changeset
115 # implementation available we could start from k = 4.
shellac
parents:
diff changeset
116 for component in nx.connected_components(G):
shellac
parents:
diff changeset
117 # isolated nodes have connectivity 0
shellac
parents:
diff changeset
118 comp = set(component)
shellac
parents:
diff changeset
119 if len(comp) > 1:
shellac
parents:
diff changeset
120 k_components[1].append(comp)
shellac
parents:
diff changeset
121 for bicomponent in nx.biconnected_components(G):
shellac
parents:
diff changeset
122 # avoid considering dyads as bicomponents
shellac
parents:
diff changeset
123 bicomp = set(bicomponent)
shellac
parents:
diff changeset
124 if len(bicomp) > 2:
shellac
parents:
diff changeset
125 k_components[2].append(bicomp)
shellac
parents:
diff changeset
126 # There is no k-component of k > maximum core number
shellac
parents:
diff changeset
127 # \kappa(G) <= \lambda(G) <= \delta(G)
shellac
parents:
diff changeset
128 g_cnumber = core_number(G)
shellac
parents:
diff changeset
129 max_core = max(g_cnumber.values())
shellac
parents:
diff changeset
130 for k in range(3, max_core + 1):
shellac
parents:
diff changeset
131 C = k_core(G, k, core_number=g_cnumber)
shellac
parents:
diff changeset
132 for nodes in biconnected_components(C):
shellac
parents:
diff changeset
133 # Build a subgraph SG induced by the nodes that are part of
shellac
parents:
diff changeset
134 # each biconnected component of the k-core subgraph C.
shellac
parents:
diff changeset
135 if len(nodes) < k:
shellac
parents:
diff changeset
136 continue
shellac
parents:
diff changeset
137 SG = G.subgraph(nodes)
shellac
parents:
diff changeset
138 # Build auxiliary graph
shellac
parents:
diff changeset
139 H = _AntiGraph()
shellac
parents:
diff changeset
shellac
parents:
diff changeset
141 for u, v in combinations(SG, 2):
shellac
parents:
diff changeset
142 K = node_connectivity(SG, u, v, cutoff=k)
shellac
parents:
diff changeset
143 if k > K:
shellac
parents:
diff changeset
shellac
parents:
diff changeset
145 for h_nodes in biconnected_components(H):
shellac
parents:
diff changeset
146 if len(h_nodes) <= k:
shellac
parents:
diff changeset
147 continue
shellac
parents:
diff changeset
148 SH = H.subgraph(h_nodes)
shellac
parents:
diff changeset
149 for Gc in _cliques_heuristic(SG, SH, k, min_density):
shellac
parents:
diff changeset
150 for k_nodes in biconnected_components(Gc):
shellac
parents:
diff changeset
151 Gk = nx.k_core(SG.subgraph(k_nodes), k)
shellac
parents:
diff changeset
152 if len(Gk) <= k:
shellac
parents:
diff changeset
153 continue
shellac
parents:
diff changeset
154 k_components[k].append(set(Gk))
shellac
parents:
diff changeset
155 return k_components
shellac
parents:
diff changeset
156
shellac
parents:
diff changeset
157
shellac
parents:
diff changeset
158 def _cliques_heuristic(G, H, k, min_density):
shellac
parents:
diff changeset
159 h_cnumber = nx.core_number(H)
shellac
parents:
diff changeset
160 for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)):
shellac
parents:
diff changeset
161 cands = {n for n, c in h_cnumber.items() if c == c_value}
shellac
parents:
diff changeset
162 # Skip checking for overlap for the highest core value
shellac
parents:
diff changeset
163 if i == 0:
shellac
parents:
diff changeset
164 overlap = False
shellac
parents:
diff changeset
165 else:
shellac
parents:
diff changeset
166 overlap = set.intersection(
shellac
parents:
diff changeset
167 *[{x for x in H[n] if x not in cands} for n in cands]
shellac
parents:
diff changeset
168 )
shellac
parents:
diff changeset
169 if overlap and len(overlap) < k:
shellac
parents:
diff changeset
170 SH = H.subgraph(cands | overlap)
shellac
parents:
diff changeset
171 else:
shellac
parents:
diff changeset
172 SH = H.subgraph(cands)
shellac
parents:
diff changeset
173 sh_cnumber = nx.core_number(SH)
shellac
parents:
diff changeset
174 SG = nx.k_core(G.subgraph(SH), k)
shellac
parents:
diff changeset
175 while not (_same(sh_cnumber) and nx.density(SH) >= min_density):
shellac
parents:
diff changeset
176 # This subgraph must be writable => .copy()
shellac
parents:
diff changeset
177 SH = H.subgraph(SG).copy()
shellac
parents:
diff changeset
178 if len(SH) <= k:
shellac
parents:
diff changeset
179 break
shellac
parents:
diff changeset
180 sh_cnumber = nx.core_number(SH)
shellac
parents:
diff changeset
181 sh_deg = dict(SH.degree())
shellac
parents:
diff changeset
182 min_deg = min(sh_deg.values())
shellac
parents:
diff changeset
183 SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg)
shellac
parents:
diff changeset
184 SG = nx.k_core(G.subgraph(SH), k)
shellac
parents:
diff changeset
185 else:
shellac
parents:
diff changeset
186 yield SG
shellac
parents:
diff changeset
187
shellac
parents:
diff changeset
188
shellac
parents:
diff changeset
189 def _same(measure, tol=0):
shellac
parents:
diff changeset
190 vals = set(measure.values())
shellac
parents:
diff changeset
191 if (max(vals) - min(vals)) <= tol:
shellac
parents:
diff changeset
192 return True
shellac
parents:
diff changeset
193 return False
shellac
parents:
diff changeset
194
shellac
parents:
diff changeset
195
shellac
parents:
diff changeset
196 class _AntiGraph(nx.Graph):
shellac
parents:
diff changeset
197 """
shellac
parents:
diff changeset
198 Class for complement graphs.
shellac
parents:
diff changeset
199
shellac
parents:
diff changeset
200 The main goal is to be able to work with big and dense graphs with
shellac
parents:
diff changeset
201 a low memory foodprint.
shellac
parents:
diff changeset
202
shellac
parents:
diff changeset
203 In this class you add the edges that *do not exist* in the dense graph,
shellac
parents:
diff changeset
204 the report methods of the class return the neighbors, the edges and
shellac
parents:
diff changeset
205 the degree as if it was the dense graph. Thus it's possible to use
shellac
parents:
diff changeset
206 an instance of this class with some of NetworkX functions. In this
shellac
parents:
diff changeset
207 case we only use k-core, connected_components, and biconnected_components.
shellac
parents:
diff changeset
208 """
shellac
parents:
diff changeset
209
shellac
parents:
diff changeset
210 all_edge_dict = {"weight": 1}
shellac
parents:
diff changeset
211
shellac
parents:
diff changeset
212 def single_edge_dict(self):
shellac
parents:
diff changeset
213 return self.all_edge_dict
shellac
parents:
diff changeset
214
shellac
parents:
diff changeset
215 edge_attr_dict_factory = single_edge_dict
shellac
parents:
diff changeset
216
shellac
parents:
diff changeset
217 def __getitem__(self, n):
shellac
parents:
diff changeset
218 """Returns a dict of neighbors of node n in the dense graph.
shellac
parents:
diff changeset
219
shellac
parents:
diff changeset
220 Parameters
shellac
parents:
diff changeset
221 ----------
shellac
parents:
diff changeset
222 n : node
shellac
parents:
diff changeset
223 A node in the graph.
shellac
parents:
diff changeset
224
shellac
parents:
diff changeset
225 Returns
shellac
parents:
diff changeset
226 -------
shellac
parents:
diff changeset
227 adj_dict : dictionary
shellac
parents:
diff changeset
228 The adjacency dictionary for nodes connected to n.
shellac
parents:
diff changeset
229
shellac
parents:
diff changeset
230 """
shellac
parents:
diff changeset
231 all_edge_dict = self.all_edge_dict
shellac
parents:
diff changeset
232 return {
shellac
parents:
diff changeset
233 node: all_edge_dict for node in set(self._adj) - set(self._adj[n]) - {n}
shellac
parents:
diff changeset
234 }
shellac
parents:
diff changeset
235
shellac
parents:
diff changeset
236 def neighbors(self, n):
shellac
parents:
diff changeset
237 """Returns an iterator over all neighbors of node n in the
shellac
parents:
diff changeset
238 dense graph.
shellac
parents:
diff changeset
239 """
shellac
parents:
diff changeset
240 try:
shellac
parents:
diff changeset
241 return iter(set(self._adj) - set(self._adj[n]) - {n})
shellac
parents:
diff changeset
242 except KeyError as e:
shellac
parents:
diff changeset
243 raise NetworkXError(f"The node {n} is not in the graph.") from e
shellac
parents:
diff changeset
244
shellac
parents:
diff changeset
245 class AntiAtlasView(Mapping):
shellac
parents:
diff changeset
246 """An adjacency inner dict for AntiGraph"""
shellac
parents:
diff changeset
247
shellac
parents:
diff changeset
248 def __init__(self, graph, node):
shellac
parents:
diff changeset
249 self._graph = graph
shellac
parents:
diff changeset
250 self._atlas = graph._adj[node]
shellac
parents:
diff changeset
251 self._node = node
shellac
parents:
diff changeset
252
shellac
parents:
diff changeset
253 def __len__(self):
shellac
parents:
diff changeset
254 return len(self._graph) - len(self._atlas) - 1
shellac
parents:
diff changeset
255
shellac
parents:
diff changeset
256 def __iter__(self):
shellac
parents:
diff changeset
257 return (n for n in self._graph if n not in self._atlas and n != self._node)
shellac
parents:
diff changeset
258
shellac
parents:
diff changeset
259 def __getitem__(self, nbr):
shellac
parents:
diff changeset
260 nbrs = set(self._graph._adj) - set(self._atlas) - {self._node}
shellac
parents:
diff changeset
261 if nbr in nbrs:
shellac
parents:
diff changeset
262 return self._graph.all_edge_dict
shellac
parents:
diff changeset
263 raise KeyError(nbr)
shellac
parents:
diff changeset
264
shellac
parents:
diff changeset
shellac
parents:
diff changeset
266 """An adjacency outer dict for AntiGraph"""
shellac
parents:
diff changeset
267
shellac
parents:
diff changeset
268 def __init__(self, graph):
shellac
parents:
diff changeset
269 self._graph = graph
shellac
parents:
diff changeset
270 self._atlas = graph._adj
shellac
parents:
diff changeset
271
shellac
parents:
diff changeset
272 def __len__(self):
shellac
parents:
diff changeset
273 return len(self._atlas)
shellac
parents:
diff changeset
274
shellac
parents:
diff changeset
275 def __iter__(self):
shellac
parents:
diff changeset
276 return iter(self._graph)
shellac
parents:
diff changeset
277
shellac
parents:
diff changeset
278 def __getitem__(self, node):
shellac
parents:
diff changeset
279 if node not in self._graph:
shellac
parents:
diff changeset
280 raise KeyError(node)
shellac
parents:
diff changeset
281 return self._graph.AntiAtlasView(self._graph, node)
shellac
parents:
diff changeset
282
shellac
parents:
diff changeset
283 @property
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
286
shellac
parents:
diff changeset
287 def subgraph(self, nodes):
shellac
parents:
diff changeset
288 """This subgraph method returns a full AntiGraph. Not a View"""
shellac
parents:
diff changeset
289 nodes = set(nodes)
shellac
parents:
diff changeset
290 G = _AntiGraph()
shellac
parents:
diff changeset
shellac
parents:
diff changeset
292 for n in G:
shellac
parents:
diff changeset
293 Gnbrs = G.adjlist_inner_dict_factory()
shellac
parents:
diff changeset
294 G._adj[n] = Gnbrs
shellac
parents:
diff changeset
295 for nbr, d in self._adj[n].items():
shellac
parents:
diff changeset
296 if nbr in G._adj:
shellac
parents:
diff changeset
297 Gnbrs[nbr] = d
shellac
parents:
diff changeset
298 G._adj[nbr][n] = d
shellac
parents:
diff changeset
299 G.graph = self.graph
shellac
parents:
diff changeset
300 return G
shellac
parents:
diff changeset
301
shellac
parents:
diff changeset
302 class AntiDegreeView(nx.reportviews.DegreeView):
shellac
parents:
diff changeset
303 def __iter__(self):
shellac
parents:
diff changeset
304 all_nodes = set(self._succ)
shellac
parents:
diff changeset
305 for n in self._nodes:
shellac
parents:
diff changeset
306 nbrs = all_nodes - set(self._succ[n]) - {n}
shellac
parents:
diff changeset
307 yield (n, len(nbrs))
shellac
parents:
diff changeset
308
shellac
parents:
diff changeset
309 def __getitem__(self, n):
shellac
parents:
diff changeset
310 nbrs = set(self._succ) - set(self._succ[n]) - {n}
shellac
parents:
diff changeset
311 # AntiGraph is a ThinGraph so all edges have weight 1
shellac
parents:
diff changeset
312 return len(nbrs) + (n in nbrs)
shellac
parents:
diff changeset
313
shellac
parents:
diff changeset
314 @property
shellac
parents:
diff changeset
315 def degree(self):
shellac
parents:
diff changeset
316 """Returns an iterator for (node, degree) and degree for single node.
shellac
parents:
diff changeset
317
shellac
parents:
diff changeset
318 The node degree is the number of edges adjacent to the node.
shellac
parents:
diff changeset
319
shellac
parents:
diff changeset
320 Parameters
shellac
parents:
diff changeset
321 ----------
shellac
parents:
diff changeset
322 nbunch : iterable container, optional (default=all nodes)
shellac
parents:
diff changeset
323 A container of nodes. The container will be iterated
shellac
parents:
diff changeset
324 through once.
shellac
parents:
diff changeset
325
shellac
parents:
diff changeset
326 weight : string or None, optional (default=None)
shellac
parents:
diff changeset
327 The edge attribute that holds the numerical value used
shellac
parents:
diff changeset
328 as a weight. If None, then each edge has weight 1.
shellac
parents:
diff changeset
329 The degree is the sum of the edge weights adjacent to the node.
shellac
parents:
diff changeset
330
shellac
parents:
diff changeset
331 Returns
shellac
parents:
diff changeset
332 -------
shellac
parents:
diff changeset
333 deg:
shellac
parents:
diff changeset
334 Degree of the node, if a single node is passed as argument.
shellac
parents:
diff changeset
335 nd_iter : an iterator
shellac
parents:
diff changeset
336 The iterator returns two-tuples of (node, degree).
shellac
parents:
diff changeset
337
shellac
parents:
diff changeset
shellac
parents:
diff changeset
339 --------
shellac
parents:
diff changeset
340 degree
shellac
parents:
diff changeset
341
shellac
parents:
diff changeset
342 Examples
shellac
parents:
diff changeset
343 --------
shellac
parents:
diff changeset
344 >>> G = nx.path_graph(4)
shellac
parents:
diff changeset
345 >>> G.degree(0) # node 0 with degree 1
shellac
parents:
diff changeset
346 1
shellac
parents:
diff changeset
347 >>> list(G.degree([0, 1]))
shellac
parents:
diff changeset
348 [(0, 1), (1, 2)]
shellac
parents:
diff changeset
349
shellac
parents:
diff changeset
350 """
shellac
parents:
diff changeset
351 return self.AntiDegreeView(self)
shellac
parents:
diff changeset
352
shellac
parents:
diff changeset
shellac
parents:
diff changeset
354 """Returns an iterator of (node, adjacency set) tuples for all nodes
shellac
parents:
diff changeset
355 in the dense graph.
shellac
parents:
diff changeset
356
shellac
parents:
diff changeset
357 This is the fastest way to look at every edge.
shellac
parents:
diff changeset
358 For directed graphs, only outgoing adjacencies are included.
shellac
parents:
diff changeset
359
shellac
parents:
diff changeset
360 Returns
shellac
parents:
diff changeset
361 -------
shellac
parents:
diff changeset
362 adj_iter : iterator
shellac
parents:
diff changeset
363 An iterator of (node, adjacency set) for all nodes in
shellac
parents:
diff changeset
364 the graph.
shellac
parents:
diff changeset
365
shellac
parents:
diff changeset
366 """