Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/centrality/betweenness.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Betweenness centrality measures.""" | |
2 from heapq import heappush, heappop | |
3 from itertools import count | |
4 import warnings | |
5 | |
6 from networkx.utils import py_random_state | |
7 from networkx.utils.decorators import not_implemented_for | |
8 | |
9 __all__ = ["betweenness_centrality", "edge_betweenness_centrality", "edge_betweenness"] | |
10 | |
11 | |
12 @py_random_state(5) | |
13 @not_implemented_for("multigraph") | |
14 def betweenness_centrality( | |
15 G, k=None, normalized=True, weight=None, endpoints=False, seed=None | |
16 ): | |
17 r"""Compute the shortest-path betweenness centrality for nodes. | |
18 | |
19 Betweenness centrality of a node $v$ is the sum of the | |
20 fraction of all-pairs shortest paths that pass through $v$ | |
21 | |
22 .. math:: | |
23 | |
24 c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)} | |
25 | |
26 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of | |
27 shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of | |
28 those paths passing through some node $v$ other than $s, t$. | |
29 If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$, | |
30 $\sigma(s, t|v) = 0$ [2]_. | |
31 | |
32 Parameters | |
33 ---------- | |
34 G : graph | |
35 A NetworkX graph. | |
36 | |
37 k : int, optional (default=None) | |
38 If k is not None use k node samples to estimate betweenness. | |
39 The value of k <= n where n is the number of nodes in the graph. | |
40 Higher values give better approximation. | |
41 | |
42 normalized : bool, optional | |
43 If True the betweenness values are normalized by `2/((n-1)(n-2))` | |
44 for graphs, and `1/((n-1)(n-2))` for directed graphs where `n` | |
45 is the number of nodes in G. | |
46 | |
47 weight : None or string, optional (default=None) | |
48 If None, all edge weights are considered equal. | |
49 Otherwise holds the name of the edge attribute used as weight. | |
50 | |
51 endpoints : bool, optional | |
52 If True include the endpoints in the shortest path counts. | |
53 | |
54 seed : integer, random_state, or None (default) | |
55 Indicator of random number generation state. | |
56 See :ref:`Randomness<randomness>`. | |
57 Note that this is only used if k is not None. | |
58 | |
59 Returns | |
60 ------- | |
61 nodes : dictionary | |
62 Dictionary of nodes with betweenness centrality as the value. | |
63 | |
64 See Also | |
65 -------- | |
66 edge_betweenness_centrality | |
67 load_centrality | |
68 | |
69 Notes | |
70 ----- | |
71 The algorithm is from Ulrik Brandes [1]_. | |
72 See [4]_ for the original first published version and [2]_ for details on | |
73 algorithms for variations and related metrics. | |
74 | |
75 For approximate betweenness calculations set k=#samples to use | |
76 k nodes ("pivots") to estimate the betweenness values. For an estimate | |
77 of the number of pivots needed see [3]_. | |
78 | |
79 For weighted graphs the edge weights must be greater than zero. | |
80 Zero edge weights can produce an infinite number of equal length | |
81 paths between pairs of nodes. | |
82 | |
83 The total number of paths between source and target is counted | |
84 differently for directed and undirected graphs. Directed paths | |
85 are easy to count. Undirected paths are tricky: should a path | |
86 from "u" to "v" count as 1 undirected path or as 2 directed paths? | |
87 | |
88 For betweenness_centrality we report the number of undirected | |
89 paths when G is undirected. | |
90 | |
91 For betweenness_centrality_subset the reporting is different. | |
92 If the source and target subsets are the same, then we want | |
93 to count undirected paths. But if the source and target subsets | |
94 differ -- for example, if sources is {0} and targets is {1}, | |
95 then we are only counting the paths in one direction. They are | |
96 undirected paths but we are counting them in a directed way. | |
97 To count them as undirected paths, each should count as half a path. | |
98 | |
99 References | |
100 ---------- | |
101 .. [1] Ulrik Brandes: | |
102 A Faster Algorithm for Betweenness Centrality. | |
103 Journal of Mathematical Sociology 25(2):163-177, 2001. | |
104 http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf | |
105 .. [2] Ulrik Brandes: | |
106 On Variants of Shortest-Path Betweenness | |
107 Centrality and their Generic Computation. | |
108 Social Networks 30(2):136-145, 2008. | |
109 http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf | |
110 .. [3] Ulrik Brandes and Christian Pich: | |
111 Centrality Estimation in Large Networks. | |
112 International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007. | |
113 http://www.inf.uni-konstanz.de/algo/publications/bp-celn-06.pdf | |
114 .. [4] Linton C. Freeman: | |
115 A set of measures of centrality based on betweenness. | |
116 Sociometry 40: 35–41, 1977 | |
117 http://moreno.ss.uci.edu/23.pdf | |
118 """ | |
119 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G | |
120 if k is None: | |
121 nodes = G | |
122 else: | |
123 nodes = seed.sample(G.nodes(), k) | |
124 for s in nodes: | |
125 # single source shortest paths | |
126 if weight is None: # use BFS | |
127 S, P, sigma = _single_source_shortest_path_basic(G, s) | |
128 else: # use Dijkstra's algorithm | |
129 S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight) | |
130 # accumulation | |
131 if endpoints: | |
132 betweenness = _accumulate_endpoints(betweenness, S, P, sigma, s) | |
133 else: | |
134 betweenness = _accumulate_basic(betweenness, S, P, sigma, s) | |
135 # rescaling | |
136 betweenness = _rescale( | |
137 betweenness, | |
138 len(G), | |
139 normalized=normalized, | |
140 directed=G.is_directed(), | |
141 k=k, | |
142 endpoints=endpoints, | |
143 ) | |
144 return betweenness | |
145 | |
146 | |
147 @py_random_state(4) | |
148 def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None): | |
149 r"""Compute betweenness centrality for edges. | |
150 | |
151 Betweenness centrality of an edge $e$ is the sum of the | |
152 fraction of all-pairs shortest paths that pass through $e$ | |
153 | |
154 .. math:: | |
155 | |
156 c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)} | |
157 | |
158 where $V$ is the set of nodes, $\sigma(s, t)$ is the number of | |
159 shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of | |
160 those paths passing through edge $e$ [2]_. | |
161 | |
162 Parameters | |
163 ---------- | |
164 G : graph | |
165 A NetworkX graph. | |
166 | |
167 k : int, optional (default=None) | |
168 If k is not None use k node samples to estimate betweenness. | |
169 The value of k <= n where n is the number of nodes in the graph. | |
170 Higher values give better approximation. | |
171 | |
172 normalized : bool, optional | |
173 If True the betweenness values are normalized by $2/(n(n-1))$ | |
174 for graphs, and $1/(n(n-1))$ for directed graphs where $n$ | |
175 is the number of nodes in G. | |
176 | |
177 weight : None or string, optional (default=None) | |
178 If None, all edge weights are considered equal. | |
179 Otherwise holds the name of the edge attribute used as weight. | |
180 | |
181 seed : integer, random_state, or None (default) | |
182 Indicator of random number generation state. | |
183 See :ref:`Randomness<randomness>`. | |
184 Note that this is only used if k is not None. | |
185 | |
186 Returns | |
187 ------- | |
188 edges : dictionary | |
189 Dictionary of edges with betweenness centrality as the value. | |
190 | |
191 See Also | |
192 -------- | |
193 betweenness_centrality | |
194 edge_load | |
195 | |
196 Notes | |
197 ----- | |
198 The algorithm is from Ulrik Brandes [1]_. | |
199 | |
200 For weighted graphs the edge weights must be greater than zero. | |
201 Zero edge weights can produce an infinite number of equal length | |
202 paths between pairs of nodes. | |
203 | |
204 References | |
205 ---------- | |
206 .. [1] A Faster Algorithm for Betweenness Centrality. Ulrik Brandes, | |
207 Journal of Mathematical Sociology 25(2):163-177, 2001. | |
208 http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf | |
209 .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness | |
210 Centrality and their Generic Computation. | |
211 Social Networks 30(2):136-145, 2008. | |
212 http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf | |
213 """ | |
214 betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G | |
215 # b[e]=0 for e in G.edges() | |
216 betweenness.update(dict.fromkeys(G.edges(), 0.0)) | |
217 if k is None: | |
218 nodes = G | |
219 else: | |
220 nodes = seed.sample(G.nodes(), k) | |
221 for s in nodes: | |
222 # single source shortest paths | |
223 if weight is None: # use BFS | |
224 S, P, sigma = _single_source_shortest_path_basic(G, s) | |
225 else: # use Dijkstra's algorithm | |
226 S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight) | |
227 # accumulation | |
228 betweenness = _accumulate_edges(betweenness, S, P, sigma, s) | |
229 # rescaling | |
230 for n in G: # remove nodes to only return edges | |
231 del betweenness[n] | |
232 betweenness = _rescale_e( | |
233 betweenness, len(G), normalized=normalized, directed=G.is_directed() | |
234 ) | |
235 return betweenness | |
236 | |
237 | |
238 # obsolete name | |
239 def edge_betweenness(G, k=None, normalized=True, weight=None, seed=None): | |
240 warnings.warn( | |
241 "edge_betweeness is replaced by edge_betweenness_centrality", DeprecationWarning | |
242 ) | |
243 return edge_betweenness_centrality(G, k, normalized, weight, seed) | |
244 | |
245 | |
246 # helpers for betweenness centrality | |
247 | |
248 | |
249 def _single_source_shortest_path_basic(G, s): | |
250 S = [] | |
251 P = {} | |
252 for v in G: | |
253 P[v] = [] | |
254 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G | |
255 D = {} | |
256 sigma[s] = 1.0 | |
257 D[s] = 0 | |
258 Q = [s] | |
259 while Q: # use BFS to find shortest paths | |
260 v = Q.pop(0) | |
261 S.append(v) | |
262 Dv = D[v] | |
263 sigmav = sigma[v] | |
264 for w in G[v]: | |
265 if w not in D: | |
266 Q.append(w) | |
267 D[w] = Dv + 1 | |
268 if D[w] == Dv + 1: # this is a shortest path, count paths | |
269 sigma[w] += sigmav | |
270 P[w].append(v) # predecessors | |
271 return S, P, sigma | |
272 | |
273 | |
274 def _single_source_dijkstra_path_basic(G, s, weight): | |
275 # modified from Eppstein | |
276 S = [] | |
277 P = {} | |
278 for v in G: | |
279 P[v] = [] | |
280 sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G | |
281 D = {} | |
282 sigma[s] = 1.0 | |
283 push = heappush | |
284 pop = heappop | |
285 seen = {s: 0} | |
286 c = count() | |
287 Q = [] # use Q as heap with (distance,node id) tuples | |
288 push(Q, (0, next(c), s, s)) | |
289 while Q: | |
290 (dist, _, pred, v) = pop(Q) | |
291 if v in D: | |
292 continue # already searched this node. | |
293 sigma[v] += sigma[pred] # count paths | |
294 S.append(v) | |
295 D[v] = dist | |
296 for w, edgedata in G[v].items(): | |
297 vw_dist = dist + edgedata.get(weight, 1) | |
298 if w not in D and (w not in seen or vw_dist < seen[w]): | |
299 seen[w] = vw_dist | |
300 push(Q, (vw_dist, next(c), v, w)) | |
301 sigma[w] = 0.0 | |
302 P[w] = [v] | |
303 elif vw_dist == seen[w]: # handle equal paths | |
304 sigma[w] += sigma[v] | |
305 P[w].append(v) | |
306 return S, P, sigma | |
307 | |
308 | |
309 def _accumulate_basic(betweenness, S, P, sigma, s): | |
310 delta = dict.fromkeys(S, 0) | |
311 while S: | |
312 w = S.pop() | |
313 coeff = (1 + delta[w]) / sigma[w] | |
314 for v in P[w]: | |
315 delta[v] += sigma[v] * coeff | |
316 if w != s: | |
317 betweenness[w] += delta[w] | |
318 return betweenness | |
319 | |
320 | |
321 def _accumulate_endpoints(betweenness, S, P, sigma, s): | |
322 betweenness[s] += len(S) - 1 | |
323 delta = dict.fromkeys(S, 0) | |
324 while S: | |
325 w = S.pop() | |
326 coeff = (1 + delta[w]) / sigma[w] | |
327 for v in P[w]: | |
328 delta[v] += sigma[v] * coeff | |
329 if w != s: | |
330 betweenness[w] += delta[w] + 1 | |
331 return betweenness | |
332 | |
333 | |
334 def _accumulate_edges(betweenness, S, P, sigma, s): | |
335 delta = dict.fromkeys(S, 0) | |
336 while S: | |
337 w = S.pop() | |
338 coeff = (1 + delta[w]) / sigma[w] | |
339 for v in P[w]: | |
340 c = sigma[v] * coeff | |
341 if (v, w) not in betweenness: | |
342 betweenness[(w, v)] += c | |
343 else: | |
344 betweenness[(v, w)] += c | |
345 delta[v] += c | |
346 if w != s: | |
347 betweenness[w] += delta[w] | |
348 return betweenness | |
349 | |
350 | |
351 def _rescale(betweenness, n, normalized, directed=False, k=None, endpoints=False): | |
352 if normalized: | |
353 if endpoints: | |
354 if n < 2: | |
355 scale = None # no normalization | |
356 else: | |
357 # Scale factor should include endpoint nodes | |
358 scale = 1 / (n * (n - 1)) | |
359 elif n <= 2: | |
360 scale = None # no normalization b=0 for all nodes | |
361 else: | |
362 scale = 1 / ((n - 1) * (n - 2)) | |
363 else: # rescale by 2 for undirected graphs | |
364 if not directed: | |
365 scale = 0.5 | |
366 else: | |
367 scale = None | |
368 if scale is not None: | |
369 if k is not None: | |
370 scale = scale * n / k | |
371 for v in betweenness: | |
372 betweenness[v] *= scale | |
373 return betweenness | |
374 | |
375 | |
376 def _rescale_e(betweenness, n, normalized, directed=False, k=None): | |
377 if normalized: | |
378 if n <= 1: | |
379 scale = None # no normalization b=0 for all nodes | |
380 else: | |
381 scale = 1 / (n * (n - 1)) | |
382 else: # rescale by 2 for undirected graphs | |
383 if not directed: | |
384 scale = 0.5 | |
385 else: | |
386 scale = None | |
387 if scale is not None: | |
388 if k is not None: | |
389 scale = scale * n / k | |
390 for v in betweenness: | |
391 betweenness[v] *= scale | |
392 return betweenness |