comparison env/lib/python3.9/site-packages/networkx/algorithms/centrality/betweenness.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Betweenness centrality measures."""
2 from heapq import heappush, heappop
3 from itertools import count
4 import warnings
5
6 from networkx.utils import py_random_state
7 from networkx.utils.decorators import not_implemented_for
8
9 __all__ = ["betweenness_centrality", "edge_betweenness_centrality", "edge_betweenness"]
10
11
12 @py_random_state(5)
13 @not_implemented_for("multigraph")
14 def betweenness_centrality(
15 G, k=None, normalized=True, weight=None, endpoints=False, seed=None
16 ):
17 r"""Compute the shortest-path betweenness centrality for nodes.
18
19 Betweenness centrality of a node $v$ is the sum of the
20 fraction of all-pairs shortest paths that pass through $v$
21
22 .. math::
23
24 c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
25
26 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
27 shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of
28 those paths passing through some node $v$ other than $s, t$.
29 If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
30 $\sigma(s, t|v) = 0$ [2]_.
31
32 Parameters
33 ----------
34 G : graph
35 A NetworkX graph.
36
37 k : int, optional (default=None)
38 If k is not None use k node samples to estimate betweenness.
39 The value of k <= n where n is the number of nodes in the graph.
40 Higher values give better approximation.
41
42 normalized : bool, optional
43 If True the betweenness values are normalized by `2/((n-1)(n-2))`
44 for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
45 is the number of nodes in G.
46
47 weight : None or string, optional (default=None)
48 If None, all edge weights are considered equal.
49 Otherwise holds the name of the edge attribute used as weight.
50
51 endpoints : bool, optional
52 If True include the endpoints in the shortest path counts.
53
54 seed : integer, random_state, or None (default)
55 Indicator of random number generation state.
56 See :ref:`Randomness<randomness>`.
57 Note that this is only used if k is not None.
58
59 Returns
60 -------
61 nodes : dictionary
62 Dictionary of nodes with betweenness centrality as the value.
63
64 See Also
65 --------
66 edge_betweenness_centrality
67 load_centrality
68
69 Notes
70 -----
71 The algorithm is from Ulrik Brandes [1]_.
72 See [4]_ for the original first published version and [2]_ for details on
73 algorithms for variations and related metrics.
74
75 For approximate betweenness calculations set k=#samples to use
76 k nodes ("pivots") to estimate the betweenness values. For an estimate
77 of the number of pivots needed see [3]_.
78
79 For weighted graphs the edge weights must be greater than zero.
80 Zero edge weights can produce an infinite number of equal length
81 paths between pairs of nodes.
82
83 The total number of paths between source and target is counted
84 differently for directed and undirected graphs. Directed paths
85 are easy to count. Undirected paths are tricky: should a path
86 from "u" to "v" count as 1 undirected path or as 2 directed paths?
87
88 For betweenness_centrality we report the number of undirected
89 paths when G is undirected.
90
91 For betweenness_centrality_subset the reporting is different.
92 If the source and target subsets are the same, then we want
93 to count undirected paths. But if the source and target subsets
94 differ -- for example, if sources is {0} and targets is {1},
95 then we are only counting the paths in one direction. They are
96 undirected paths but we are counting them in a directed way.
97 To count them as undirected paths, each should count as half a path.
98
99 References
100 ----------
101 .. [1] Ulrik Brandes:
102 A Faster Algorithm for Betweenness Centrality.
103 Journal of Mathematical Sociology 25(2):163-177, 2001.
104 http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
105 .. [2] Ulrik Brandes:
106 On Variants of Shortest-Path Betweenness
107 Centrality and their Generic Computation.
108 Social Networks 30(2):136-145, 2008.
109 http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
110 .. [3] Ulrik Brandes and Christian Pich:
111 Centrality Estimation in Large Networks.
112 International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
113 http://www.inf.uni-konstanz.de/algo/publications/bp-celn-06.pdf
114 .. [4] Linton C. Freeman:
115 A set of measures of centrality based on betweenness.
116 Sociometry 40: 35–41, 1977
117 http://moreno.ss.uci.edu/23.pdf
118 """
119 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
120 if k is None:
121 nodes = G
122 else:
123 nodes = seed.sample(G.nodes(), k)
124 for s in nodes:
125 # single source shortest paths
126 if weight is None: # use BFS
127 S, P, sigma = _single_source_shortest_path_basic(G, s)
128 else: # use Dijkstra's algorithm
129 S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
130 # accumulation
131 if endpoints:
132 betweenness = _accumulate_endpoints(betweenness, S, P, sigma, s)
133 else:
134 betweenness = _accumulate_basic(betweenness, S, P, sigma, s)
135 # rescaling
136 betweenness = _rescale(
137 betweenness,
138 len(G),
139 normalized=normalized,
140 directed=G.is_directed(),
141 k=k,
142 endpoints=endpoints,
143 )
144 return betweenness
145
146
147 @py_random_state(4)
148 def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
149 r"""Compute betweenness centrality for edges.
150
151 Betweenness centrality of an edge $e$ is the sum of the
152 fraction of all-pairs shortest paths that pass through $e$
153
154 .. math::
155
156 c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
157
158 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
159 shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of
160 those paths passing through edge $e$ [2]_.
161
162 Parameters
163 ----------
164 G : graph
165 A NetworkX graph.
166
167 k : int, optional (default=None)
168 If k is not None use k node samples to estimate betweenness.
169 The value of k <= n where n is the number of nodes in the graph.
170 Higher values give better approximation.
171
172 normalized : bool, optional
173 If True the betweenness values are normalized by $2/(n(n-1))$
174 for graphs, and $1/(n(n-1))$ for directed graphs where $n$
175 is the number of nodes in G.
176
177 weight : None or string, optional (default=None)
178 If None, all edge weights are considered equal.
179 Otherwise holds the name of the edge attribute used as weight.
180
181 seed : integer, random_state, or None (default)
182 Indicator of random number generation state.
183 See :ref:`Randomness<randomness>`.
184 Note that this is only used if k is not None.
185
186 Returns
187 -------
188 edges : dictionary
189 Dictionary of edges with betweenness centrality as the value.
190
191 See Also
192 --------
193 betweenness_centrality
194 edge_load
195
196 Notes
197 -----
198 The algorithm is from Ulrik Brandes [1]_.
199
200 For weighted graphs the edge weights must be greater than zero.
201 Zero edge weights can produce an infinite number of equal length
202 paths between pairs of nodes.
203
204 References
205 ----------
206 .. [1] A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,
207 Journal of Mathematical Sociology 25(2):163-177, 2001.
208 http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
209 .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
210 Centrality and their Generic Computation.
211 Social Networks 30(2):136-145, 2008.
212 http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
213 """
214 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
215 # b[e]=0 for e in G.edges()
216 betweenness.update(dict.fromkeys(G.edges(), 0.0))
217 if k is None:
218 nodes = G
219 else:
220 nodes = seed.sample(G.nodes(), k)
221 for s in nodes:
222 # single source shortest paths
223 if weight is None: # use BFS
224 S, P, sigma = _single_source_shortest_path_basic(G, s)
225 else: # use Dijkstra's algorithm
226 S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
227 # accumulation
228 betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
229 # rescaling
230 for n in G: # remove nodes to only return edges
231 del betweenness[n]
232 betweenness = _rescale_e(
233 betweenness, len(G), normalized=normalized, directed=G.is_directed()
234 )
235 return betweenness
236
237
238 # obsolete name
239 def edge_betweenness(G, k=None, normalized=True, weight=None, seed=None):
240 warnings.warn(
241 "edge_betweeness is replaced by edge_betweenness_centrality", DeprecationWarning
242 )
243 return edge_betweenness_centrality(G, k, normalized, weight, seed)
244
245
246 # helpers for betweenness centrality
247
248
249 def _single_source_shortest_path_basic(G, s):
250 S = []
251 P = {}
252 for v in G:
253 P[v] = []
254 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
255 D = {}
256 sigma[s] = 1.0
257 D[s] = 0
258 Q = [s]
259 while Q: # use BFS to find shortest paths
260 v = Q.pop(0)
261 S.append(v)
262 Dv = D[v]
263 sigmav = sigma[v]
264 for w in G[v]:
265 if w not in D:
266 Q.append(w)
267 D[w] = Dv + 1
268 if D[w] == Dv + 1: # this is a shortest path, count paths
269 sigma[w] += sigmav
270 P[w].append(v) # predecessors
271 return S, P, sigma
272
273
274 def _single_source_dijkstra_path_basic(G, s, weight):
275 # modified from Eppstein
276 S = []
277 P = {}
278 for v in G:
279 P[v] = []
280 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
281 D = {}
282 sigma[s] = 1.0
283 push = heappush
284 pop = heappop
285 seen = {s: 0}
286 c = count()
287 Q = [] # use Q as heap with (distance,node id) tuples
288 push(Q, (0, next(c), s, s))
289 while Q:
290 (dist, _, pred, v) = pop(Q)
291 if v in D:
292 continue # already searched this node.
293 sigma[v] += sigma[pred] # count paths
294 S.append(v)
295 D[v] = dist
296 for w, edgedata in G[v].items():
297 vw_dist = dist + edgedata.get(weight, 1)
298 if w not in D and (w not in seen or vw_dist < seen[w]):
299 seen[w] = vw_dist
300 push(Q, (vw_dist, next(c), v, w))
301 sigma[w] = 0.0
302 P[w] = [v]
303 elif vw_dist == seen[w]: # handle equal paths
304 sigma[w] += sigma[v]
305 P[w].append(v)
306 return S, P, sigma
307
308
309 def _accumulate_basic(betweenness, S, P, sigma, s):
310 delta = dict.fromkeys(S, 0)
311 while S:
312 w = S.pop()
313 coeff = (1 + delta[w]) / sigma[w]
314 for v in P[w]:
315 delta[v] += sigma[v] * coeff
316 if w != s:
317 betweenness[w] += delta[w]
318 return betweenness
319
320
321 def _accumulate_endpoints(betweenness, S, P, sigma, s):
322 betweenness[s] += len(S) - 1
323 delta = dict.fromkeys(S, 0)
324 while S:
325 w = S.pop()
326 coeff = (1 + delta[w]) / sigma[w]
327 for v in P[w]:
328 delta[v] += sigma[v] * coeff
329 if w != s:
330 betweenness[w] += delta[w] + 1
331 return betweenness
332
333
334 def _accumulate_edges(betweenness, S, P, sigma, s):
335 delta = dict.fromkeys(S, 0)
336 while S:
337 w = S.pop()
338 coeff = (1 + delta[w]) / sigma[w]
339 for v in P[w]:
340 c = sigma[v] * coeff
341 if (v, w) not in betweenness:
342 betweenness[(w, v)] += c
343 else:
344 betweenness[(v, w)] += c
345 delta[v] += c
346 if w != s:
347 betweenness[w] += delta[w]
348 return betweenness
349
350
351 def _rescale(betweenness, n, normalized, directed=False, k=None, endpoints=False):
352 if normalized:
353 if endpoints:
354 if n < 2:
355 scale = None # no normalization
356 else:
357 # Scale factor should include endpoint nodes
358 scale = 1 / (n * (n - 1))
359 elif n <= 2:
360 scale = None # no normalization b=0 for all nodes
361 else:
362 scale = 1 / ((n - 1) * (n - 2))
363 else: # rescale by 2 for undirected graphs
364 if not directed:
365 scale = 0.5
366 else:
367 scale = None
368 if scale is not None:
369 if k is not None:
370 scale = scale * n / k
371 for v in betweenness:
372 betweenness[v] *= scale
373 return betweenness
374
375
376 def _rescale_e(betweenness, n, normalized, directed=False, k=None):
377 if normalized:
378 if n <= 1:
379 scale = None # no normalization b=0 for all nodes
380 else:
381 scale = 1 / (n * (n - 1))
382 else: # rescale by 2 for undirected graphs
383 if not directed:
384 scale = 0.5
385 else:
386 scale = None
387 if scale is not None:
388 if k is not None:
389 scale = scale * n / k
390 for v in betweenness:
391 betweenness[v] *= scale
392 return betweenness