comparison env/lib/python3.9/site-packages/networkx/algorithms/approximation/kcomponents.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """ Fast approximation for k-component structure
2 """
3 import itertools
4 from collections import defaultdict
5 from collections.abc import Mapping
6
7 import networkx as nx
8 from networkx.exception import NetworkXError
9 from networkx.utils import not_implemented_for
10
11 from networkx.algorithms.approximation import local_node_connectivity
12
13
14 __all__ = ["k_components"]
15
16
17 not_implemented_for("directed")
18
19
20 def k_components(G, min_density=0.95):
21 r"""Returns the approximate k-component structure of a graph G.
22
23 A `k`-component is a maximal subgraph of a graph G that has, at least,
24 node connectivity `k`: we need to remove at least `k` nodes to break it
25 into more components. `k`-components have an inherent hierarchical
26 structure because they are nested in terms of connectivity: a connected
27 graph can contain several 2-components, each of which can contain
28 one or more 3-components, and so forth.
29
30 This implementation is based on the fast heuristics to approximate
31 the `k`-component structure of a graph [1]_. Which, in turn, it is based on
32 a fast approximation algorithm for finding good lower bounds of the number
33 of node independent paths between two nodes [2]_.
34
35 Parameters
36 ----------
37 G : NetworkX graph
38 Undirected graph
39
40 min_density : Float
41 Density relaxation threshold. Default value 0.95
42
43 Returns
44 -------
45 k_components : dict
46 Dictionary with connectivity level `k` as key and a list of
47 sets of nodes that form a k-component of level `k` as values.
48
49
50 Examples
51 --------
52 >>> # Petersen graph has 10 nodes and it is triconnected, thus all
53 >>> # nodes are in a single component on all three connectivity levels
54 >>> from networkx.algorithms import approximation as apxa
55 >>> G = nx.petersen_graph()
56 >>> k_components = apxa.k_components(G)
57
58 Notes
59 -----
60 The logic of the approximation algorithm for computing the `k`-component
61 structure [1]_ is based on repeatedly applying simple and fast algorithms
62 for `k`-cores and biconnected components in order to narrow down the
63 number of pairs of nodes over which we have to compute White and Newman's
64 approximation algorithm for finding node independent paths [2]_. More
65 formally, this algorithm is based on Whitney's theorem, which states
66 an inclusion relation among node connectivity, edge connectivity, and
67 minimum degree for any graph G. This theorem implies that every
68 `k`-component is nested inside a `k`-edge-component, which in turn,
69 is contained in a `k`-core. Thus, this algorithm computes node independent
70 paths among pairs of nodes in each biconnected part of each `k`-core,
71 and repeats this procedure for each `k` from 3 to the maximal core number
72 of a node in the input graph.
73
74 Because, in practice, many nodes of the core of level `k` inside a
75 bicomponent actually are part of a component of level k, the auxiliary
76 graph needed for the algorithm is likely to be very dense. Thus, we use
77 a complement graph data structure (see `AntiGraph`) to save memory.
78 AntiGraph only stores information of the edges that are *not* present
79 in the actual auxiliary graph. When applying algorithms to this
80 complement graph data structure, it behaves as if it were the dense
81 version.
82
83 See also
84 --------
85 k_components
86
87 References
88 ----------
89 .. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion:
90 Visualization and Heuristics for Fast Computation.
91 https://arxiv.org/pdf/1503.04476v1
92
93 .. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for
94 Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035
95 http://eclectic.ss.uci.edu/~drwhite/working.pdf
96
97 .. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness:
98 A hierarchical conception of social groups.
99 American Sociological Review 68(1), 103--28.
100 http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf
101
102 """
103 # Dictionary with connectivity level (k) as keys and a list of
104 # sets of nodes that form a k-component as values
105 k_components = defaultdict(list)
106 # make a few functions local for speed
107 node_connectivity = local_node_connectivity
108 k_core = nx.k_core
109 core_number = nx.core_number
110 biconnected_components = nx.biconnected_components
111 density = nx.density
112 combinations = itertools.combinations
113 # Exact solution for k = {1,2}
114 # There is a linear time algorithm for triconnectivity, if we had an
115 # implementation available we could start from k = 4.
116 for component in nx.connected_components(G):
117 # isolated nodes have connectivity 0
118 comp = set(component)
119 if len(comp) > 1:
120 k_components[1].append(comp)
121 for bicomponent in nx.biconnected_components(G):
122 # avoid considering dyads as bicomponents
123 bicomp = set(bicomponent)
124 if len(bicomp) > 2:
125 k_components[2].append(bicomp)
126 # There is no k-component of k > maximum core number
127 # \kappa(G) <= \lambda(G) <= \delta(G)
128 g_cnumber = core_number(G)
129 max_core = max(g_cnumber.values())
130 for k in range(3, max_core + 1):
131 C = k_core(G, k, core_number=g_cnumber)
132 for nodes in biconnected_components(C):
133 # Build a subgraph SG induced by the nodes that are part of
134 # each biconnected component of the k-core subgraph C.
135 if len(nodes) < k:
136 continue
137 SG = G.subgraph(nodes)
138 # Build auxiliary graph
139 H = _AntiGraph()
140 H.add_nodes_from(SG.nodes())
141 for u, v in combinations(SG, 2):
142 K = node_connectivity(SG, u, v, cutoff=k)
143 if k > K:
144 H.add_edge(u, v)
145 for h_nodes in biconnected_components(H):
146 if len(h_nodes) <= k:
147 continue
148 SH = H.subgraph(h_nodes)
149 for Gc in _cliques_heuristic(SG, SH, k, min_density):
150 for k_nodes in biconnected_components(Gc):
151 Gk = nx.k_core(SG.subgraph(k_nodes), k)
152 if len(Gk) <= k:
153 continue
154 k_components[k].append(set(Gk))
155 return k_components
156
157
158 def _cliques_heuristic(G, H, k, min_density):
159 h_cnumber = nx.core_number(H)
160 for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)):
161 cands = {n for n, c in h_cnumber.items() if c == c_value}
162 # Skip checking for overlap for the highest core value
163 if i == 0:
164 overlap = False
165 else:
166 overlap = set.intersection(
167 *[{x for x in H[n] if x not in cands} for n in cands]
168 )
169 if overlap and len(overlap) < k:
170 SH = H.subgraph(cands | overlap)
171 else:
172 SH = H.subgraph(cands)
173 sh_cnumber = nx.core_number(SH)
174 SG = nx.k_core(G.subgraph(SH), k)
175 while not (_same(sh_cnumber) and nx.density(SH) >= min_density):
176 # This subgraph must be writable => .copy()
177 SH = H.subgraph(SG).copy()
178 if len(SH) <= k:
179 break
180 sh_cnumber = nx.core_number(SH)
181 sh_deg = dict(SH.degree())
182 min_deg = min(sh_deg.values())
183 SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg)
184 SG = nx.k_core(G.subgraph(SH), k)
185 else:
186 yield SG
187
188
189 def _same(measure, tol=0):
190 vals = set(measure.values())
191 if (max(vals) - min(vals)) <= tol:
192 return True
193 return False
194
195
196 class _AntiGraph(nx.Graph):
197 """
198 Class for complement graphs.
199
200 The main goal is to be able to work with big and dense graphs with
201 a low memory foodprint.
202
203 In this class you add the edges that *do not exist* in the dense graph,
204 the report methods of the class return the neighbors, the edges and
205 the degree as if it was the dense graph. Thus it's possible to use
206 an instance of this class with some of NetworkX functions. In this
207 case we only use k-core, connected_components, and biconnected_components.
208 """
209
210 all_edge_dict = {"weight": 1}
211
212 def single_edge_dict(self):
213 return self.all_edge_dict
214
215 edge_attr_dict_factory = single_edge_dict
216
217 def __getitem__(self, n):
218 """Returns a dict of neighbors of node n in the dense graph.
219
220 Parameters
221 ----------
222 n : node
223 A node in the graph.
224
225 Returns
226 -------
227 adj_dict : dictionary
228 The adjacency dictionary for nodes connected to n.
229
230 """
231 all_edge_dict = self.all_edge_dict
232 return {
233 node: all_edge_dict for node in set(self._adj) - set(self._adj[n]) - {n}
234 }
235
236 def neighbors(self, n):
237 """Returns an iterator over all neighbors of node n in the
238 dense graph.
239 """
240 try:
241 return iter(set(self._adj) - set(self._adj[n]) - {n})
242 except KeyError as e:
243 raise NetworkXError(f"The node {n} is not in the graph.") from e
244
245 class AntiAtlasView(Mapping):
246 """An adjacency inner dict for AntiGraph"""
247
248 def __init__(self, graph, node):
249 self._graph = graph
250 self._atlas = graph._adj[node]
251 self._node = node
252
253 def __len__(self):
254 return len(self._graph) - len(self._atlas) - 1
255
256 def __iter__(self):
257 return (n for n in self._graph if n not in self._atlas and n != self._node)
258
259 def __getitem__(self, nbr):
260 nbrs = set(self._graph._adj) - set(self._atlas) - {self._node}
261 if nbr in nbrs:
262 return self._graph.all_edge_dict
263 raise KeyError(nbr)
264
265 class AntiAdjacencyView(AntiAtlasView):
266 """An adjacency outer dict for AntiGraph"""
267
268 def __init__(self, graph):
269 self._graph = graph
270 self._atlas = graph._adj
271
272 def __len__(self):
273 return len(self._atlas)
274
275 def __iter__(self):
276 return iter(self._graph)
277
278 def __getitem__(self, node):
279 if node not in self._graph:
280 raise KeyError(node)
281 return self._graph.AntiAtlasView(self._graph, node)
282
283 @property
284 def adj(self):
285 return self.AntiAdjacencyView(self)
286
287 def subgraph(self, nodes):
288 """This subgraph method returns a full AntiGraph. Not a View"""
289 nodes = set(nodes)
290 G = _AntiGraph()
291 G.add_nodes_from(nodes)
292 for n in G:
293 Gnbrs = G.adjlist_inner_dict_factory()
294 G._adj[n] = Gnbrs
295 for nbr, d in self._adj[n].items():
296 if nbr in G._adj:
297 Gnbrs[nbr] = d
298 G._adj[nbr][n] = d
299 G.graph = self.graph
300 return G
301
302 class AntiDegreeView(nx.reportviews.DegreeView):
303 def __iter__(self):
304 all_nodes = set(self._succ)
305 for n in self._nodes:
306 nbrs = all_nodes - set(self._succ[n]) - {n}
307 yield (n, len(nbrs))
308
309 def __getitem__(self, n):
310 nbrs = set(self._succ) - set(self._succ[n]) - {n}
311 # AntiGraph is a ThinGraph so all edges have weight 1
312 return len(nbrs) + (n in nbrs)
313
314 @property
315 def degree(self):
316 """Returns an iterator for (node, degree) and degree for single node.
317
318 The node degree is the number of edges adjacent to the node.
319
320 Parameters
321 ----------
322 nbunch : iterable container, optional (default=all nodes)
323 A container of nodes. The container will be iterated
324 through once.
325
326 weight : string or None, optional (default=None)
327 The edge attribute that holds the numerical value used
328 as a weight. If None, then each edge has weight 1.
329 The degree is the sum of the edge weights adjacent to the node.
330
331 Returns
332 -------
333 deg:
334 Degree of the node, if a single node is passed as argument.
335 nd_iter : an iterator
336 The iterator returns two-tuples of (node, degree).
337
338 See Also
339 --------
340 degree
341
342 Examples
343 --------
344 >>> G = nx.path_graph(4)
345 >>> G.degree(0) # node 0 with degree 1
346 1
347 >>> list(G.degree([0, 1]))
348 [(0, 1), (1, 2)]
349
350 """
351 return self.AntiDegreeView(self)
352
353 def adjacency(self):
354 """Returns an iterator of (node, adjacency set) tuples for all nodes
355 in the dense graph.
356
357 This is the fastest way to look at every edge.
358 For directed graphs, only outgoing adjacencies are included.
359
360 Returns
361 -------
362 adj_iter : iterator
363 An iterator of (node, adjacency set) for all nodes in
364 the graph.
365
366 """
367 for n in self._adj:
368 yield (n, set(self._adj) - set(self._adj[n]) - {n})