comparison env/lib/python3.9/site-packages/networkx/algorithms/centrality/current_flow_betweenness.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Current-flow betweenness centrality measures."""
2 import networkx as nx
3 from networkx.algorithms.centrality.flow_matrix import (
4 CGInverseLaplacian,
5 flow_matrix_row,
6 FullInverseLaplacian,
7 laplacian_sparse_matrix,
8 SuperLUInverseLaplacian,
9 )
10 from networkx.utils import (
11 not_implemented_for,
12 reverse_cuthill_mckee_ordering,
13 py_random_state,
14 )
15
16 __all__ = [
17 "current_flow_betweenness_centrality",
18 "approximate_current_flow_betweenness_centrality",
19 "edge_current_flow_betweenness_centrality",
20 ]
21
22
23 @py_random_state(7)
24 @not_implemented_for("directed")
25 def approximate_current_flow_betweenness_centrality(
26 G,
27 normalized=True,
28 weight=None,
29 dtype=float,
30 solver="full",
31 epsilon=0.5,
32 kmax=10000,
33 seed=None,
34 ):
35 r"""Compute the approximate current-flow betweenness centrality for nodes.
36
37 Approximates the current-flow betweenness centrality within absolute
38 error of epsilon with high probability [1]_.
39
40
41 Parameters
42 ----------
43 G : graph
44 A NetworkX graph
45
46 normalized : bool, optional (default=True)
47 If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
48 n is the number of nodes in G.
49
50 weight : string or None, optional (default=None)
51 Key for edge data used as the edge weight.
52 If None, then use 1 as each edge weight.
53
54 dtype : data type (float)
55 Default data type for internal matrices.
56 Set to np.float32 for lower memory consumption.
57
58 solver : string (default='full')
59 Type of linear solver to use for computing the flow matrix.
60 Options are "full" (uses most memory), "lu" (recommended), and
61 "cg" (uses least memory).
62
63 epsilon: float
64 Absolute error tolerance.
65
66 kmax: int
67 Maximum number of sample node pairs to use for approximation.
68
69 seed : integer, random_state, or None (default)
70 Indicator of random number generation state.
71 See :ref:`Randomness<randomness>`.
72
73 Returns
74 -------
75 nodes : dictionary
76 Dictionary of nodes with betweenness centrality as the value.
77
78 See Also
79 --------
80 current_flow_betweenness_centrality
81
82 Notes
83 -----
84 The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$
85 and the space required is $O(m)$ for $n$ nodes and $m$ edges.
86
87 If the edges have a 'weight' attribute they will be used as
88 weights in this algorithm. Unspecified weights are set to 1.
89
90 References
91 ----------
92 .. [1] Ulrik Brandes and Daniel Fleischer:
93 Centrality Measures Based on Current Flow.
94 Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
95 LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
96 http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
97 """
98 try:
99 import numpy as np
100 except ImportError as e:
101 raise ImportError(
102 "current_flow_betweenness_centrality requires NumPy " "http://numpy.org/"
103 ) from e
104 if not nx.is_connected(G):
105 raise nx.NetworkXError("Graph not connected.")
106 solvername = {
107 "full": FullInverseLaplacian,
108 "lu": SuperLUInverseLaplacian,
109 "cg": CGInverseLaplacian,
110 }
111 n = G.number_of_nodes()
112 ordering = list(reverse_cuthill_mckee_ordering(G))
113 # make a copy with integer labels according to rcm ordering
114 # this could be done without a copy if we really wanted to
115 H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
116 L = laplacian_sparse_matrix(
117 H, nodelist=range(n), weight=weight, dtype=dtype, format="csc"
118 )
119 C = solvername[solver](L, dtype=dtype) # initialize solver
120 betweenness = dict.fromkeys(H, 0.0)
121 nb = (n - 1.0) * (n - 2.0) # normalization factor
122 cstar = n * (n - 1) / nb
123 l = 1 # parameter in approximation, adjustable
124 k = l * int(np.ceil((cstar / epsilon) ** 2 * np.log(n)))
125 if k > kmax:
126 msg = f"Number random pairs k>kmax ({k}>{kmax}) "
127 raise nx.NetworkXError(msg, "Increase kmax or epsilon")
128 cstar2k = cstar / (2 * k)
129 for i in range(k):
130 s, t = seed.sample(range(n), 2)
131 b = np.zeros(n, dtype=dtype)
132 b[s] = 1
133 b[t] = -1
134 p = C.solve(b)
135 for v in H:
136 if v == s or v == t:
137 continue
138 for nbr in H[v]:
139 w = H[v][nbr].get(weight, 1.0)
140 betweenness[v] += w * np.abs(p[v] - p[nbr]) * cstar2k
141 if normalized:
142 factor = 1.0
143 else:
144 factor = nb / 2.0
145 # remap to original node names and "unnormalize" if required
146 return {ordering[k]: float(v * factor) for k, v in betweenness.items()}
147
148
149 @not_implemented_for("directed")
150 def current_flow_betweenness_centrality(
151 G, normalized=True, weight=None, dtype=float, solver="full"
152 ):
153 r"""Compute current-flow betweenness centrality for nodes.
154
155 Current-flow betweenness centrality uses an electrical current
156 model for information spreading in contrast to betweenness
157 centrality which uses shortest paths.
158
159 Current-flow betweenness centrality is also known as
160 random-walk betweenness centrality [2]_.
161
162 Parameters
163 ----------
164 G : graph
165 A NetworkX graph
166
167 normalized : bool, optional (default=True)
168 If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
169 n is the number of nodes in G.
170
171 weight : string or None, optional (default=None)
172 Key for edge data used as the edge weight.
173 If None, then use 1 as each edge weight.
174
175 dtype : data type (float)
176 Default data type for internal matrices.
177 Set to np.float32 for lower memory consumption.
178
179 solver : string (default='full')
180 Type of linear solver to use for computing the flow matrix.
181 Options are "full" (uses most memory), "lu" (recommended), and
182 "cg" (uses least memory).
183
184 Returns
185 -------
186 nodes : dictionary
187 Dictionary of nodes with betweenness centrality as the value.
188
189 See Also
190 --------
191 approximate_current_flow_betweenness_centrality
192 betweenness_centrality
193 edge_betweenness_centrality
194 edge_current_flow_betweenness_centrality
195
196 Notes
197 -----
198 Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
199 time [1]_, where $I(n-1)$ is the time needed to compute the
200 inverse Laplacian. For a full matrix this is $O(n^3)$ but using
201 sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
202 Laplacian matrix condition number.
203
204 The space required is $O(nw)$ where $w$ is the width of the sparse
205 Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
206
207 If the edges have a 'weight' attribute they will be used as
208 weights in this algorithm. Unspecified weights are set to 1.
209
210 References
211 ----------
212 .. [1] Centrality Measures Based on Current Flow.
213 Ulrik Brandes and Daniel Fleischer,
214 Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
215 LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
216 http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
217
218 .. [2] A measure of betweenness centrality based on random walks,
219 M. E. J. Newman, Social Networks 27, 39-54 (2005).
220 """
221 if not nx.is_connected(G):
222 raise nx.NetworkXError("Graph not connected.")
223 n = G.number_of_nodes()
224 ordering = list(reverse_cuthill_mckee_ordering(G))
225 # make a copy with integer labels according to rcm ordering
226 # this could be done without a copy if we really wanted to
227 H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
228 betweenness = dict.fromkeys(H, 0.0) # b[v]=0 for v in H
229 for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
230 pos = dict(zip(row.argsort()[::-1], range(n)))
231 for i in range(n):
232 betweenness[s] += (i - pos[i]) * row[i]
233 betweenness[t] += (n - i - 1 - pos[i]) * row[i]
234 if normalized:
235 nb = (n - 1.0) * (n - 2.0) # normalization factor
236 else:
237 nb = 2.0
238 for v in H:
239 betweenness[v] = float((betweenness[v] - v) * 2.0 / nb)
240 return {ordering[k]: v for k, v in betweenness.items()}
241
242
243 @not_implemented_for("directed")
244 def edge_current_flow_betweenness_centrality(
245 G, normalized=True, weight=None, dtype=float, solver="full"
246 ):
247 r"""Compute current-flow betweenness centrality for edges.
248
249 Current-flow betweenness centrality uses an electrical current
250 model for information spreading in contrast to betweenness
251 centrality which uses shortest paths.
252
253 Current-flow betweenness centrality is also known as
254 random-walk betweenness centrality [2]_.
255
256 Parameters
257 ----------
258 G : graph
259 A NetworkX graph
260
261 normalized : bool, optional (default=True)
262 If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
263 n is the number of nodes in G.
264
265 weight : string or None, optional (default=None)
266 Key for edge data used as the edge weight.
267 If None, then use 1 as each edge weight.
268
269 dtype : data type (default=float)
270 Default data type for internal matrices.
271 Set to np.float32 for lower memory consumption.
272
273 solver : string (default='full')
274 Type of linear solver to use for computing the flow matrix.
275 Options are "full" (uses most memory), "lu" (recommended), and
276 "cg" (uses least memory).
277
278 Returns
279 -------
280 nodes : dictionary
281 Dictionary of edge tuples with betweenness centrality as the value.
282
283 Raises
284 ------
285 NetworkXError
286 The algorithm does not support DiGraphs.
287 If the input graph is an instance of DiGraph class, NetworkXError
288 is raised.
289
290 See Also
291 --------
292 betweenness_centrality
293 edge_betweenness_centrality
294 current_flow_betweenness_centrality
295
296 Notes
297 -----
298 Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
299 time [1]_, where $I(n-1)$ is the time needed to compute the
300 inverse Laplacian. For a full matrix this is $O(n^3)$ but using
301 sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
302 Laplacian matrix condition number.
303
304 The space required is $O(nw)$ where $w$ is the width of the sparse
305 Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
306
307 If the edges have a 'weight' attribute they will be used as
308 weights in this algorithm. Unspecified weights are set to 1.
309
310 References
311 ----------
312 .. [1] Centrality Measures Based on Current Flow.
313 Ulrik Brandes and Daniel Fleischer,
314 Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
315 LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
316 http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
317
318 .. [2] A measure of betweenness centrality based on random walks,
319 M. E. J. Newman, Social Networks 27, 39-54 (2005).
320 """
321 from networkx.utils import reverse_cuthill_mckee_ordering
322
323 if not nx.is_connected(G):
324 raise nx.NetworkXError("Graph not connected.")
325 n = G.number_of_nodes()
326 ordering = list(reverse_cuthill_mckee_ordering(G))
327 # make a copy with integer labels according to rcm ordering
328 # this could be done without a copy if we really wanted to
329 H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
330 edges = (tuple(sorted((u, v))) for u, v in H.edges())
331 betweenness = dict.fromkeys(edges, 0.0)
332 if normalized:
333 nb = (n - 1.0) * (n - 2.0) # normalization factor
334 else:
335 nb = 2.0
336 for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
337 pos = dict(zip(row.argsort()[::-1], range(1, n + 1)))
338 for i in range(n):
339 betweenness[e] += (i + 1 - pos[i]) * row[i]
340 betweenness[e] += (n - i - pos[i]) * row[i]
341 betweenness[e] /= nb
342 return {(ordering[s], ordering[t]): float(v) for (s, t), v in betweenness.items()}