Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/cluster.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Algorithms to characterize the number of triangles in a graph.""" | |
2 | |
3 from itertools import chain | |
4 from itertools import combinations | |
5 from collections import Counter | |
6 | |
7 from networkx.utils import not_implemented_for | |
8 | |
9 __all__ = [ | |
10 "triangles", | |
11 "average_clustering", | |
12 "clustering", | |
13 "transitivity", | |
14 "square_clustering", | |
15 "generalized_degree", | |
16 ] | |
17 | |
18 | |
19 @not_implemented_for("directed") | |
20 def triangles(G, nodes=None): | |
21 """Compute the number of triangles. | |
22 | |
23 Finds the number of triangles that include a node as one vertex. | |
24 | |
25 Parameters | |
26 ---------- | |
27 G : graph | |
28 A networkx graph | |
29 nodes : container of nodes, optional (default= all nodes in G) | |
30 Compute triangles for nodes in this container. | |
31 | |
32 Returns | |
33 ------- | |
34 out : dictionary | |
35 Number of triangles keyed by node label. | |
36 | |
37 Examples | |
38 -------- | |
39 >>> G = nx.complete_graph(5) | |
40 >>> print(nx.triangles(G, 0)) | |
41 6 | |
42 >>> print(nx.triangles(G)) | |
43 {0: 6, 1: 6, 2: 6, 3: 6, 4: 6} | |
44 >>> print(list(nx.triangles(G, (0, 1)).values())) | |
45 [6, 6] | |
46 | |
47 Notes | |
48 ----- | |
49 When computing triangles for the entire graph each triangle is counted | |
50 three times, once at each node. Self loops are ignored. | |
51 | |
52 """ | |
53 # If `nodes` represents a single node in the graph, return only its number | |
54 # of triangles. | |
55 if nodes in G: | |
56 return next(_triangles_and_degree_iter(G, nodes))[2] // 2 | |
57 # Otherwise, `nodes` represents an iterable of nodes, so return a | |
58 # dictionary mapping node to number of triangles. | |
59 return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)} | |
60 | |
61 | |
62 @not_implemented_for("multigraph") | |
63 def _triangles_and_degree_iter(G, nodes=None): | |
64 """ Return an iterator of (node, degree, triangles, generalized degree). | |
65 | |
66 This double counts triangles so you may want to divide by 2. | |
67 See degree(), triangles() and generalized_degree() for definitions | |
68 and details. | |
69 | |
70 """ | |
71 if nodes is None: | |
72 nodes_nbrs = G.adj.items() | |
73 else: | |
74 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes)) | |
75 | |
76 for v, v_nbrs in nodes_nbrs: | |
77 vs = set(v_nbrs) - {v} | |
78 gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs) | |
79 ntriangles = sum(k * val for k, val in gen_degree.items()) | |
80 yield (v, len(vs), ntriangles, gen_degree) | |
81 | |
82 | |
83 @not_implemented_for("multigraph") | |
84 def _weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"): | |
85 """ Return an iterator of (node, degree, weighted_triangles). | |
86 | |
87 Used for weighted clustering. | |
88 | |
89 """ | |
90 if weight is None or G.number_of_edges() == 0: | |
91 max_weight = 1 | |
92 else: | |
93 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True)) | |
94 if nodes is None: | |
95 nodes_nbrs = G.adj.items() | |
96 else: | |
97 nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes)) | |
98 | |
99 def wt(u, v): | |
100 return G[u][v].get(weight, 1) / max_weight | |
101 | |
102 for i, nbrs in nodes_nbrs: | |
103 inbrs = set(nbrs) - {i} | |
104 weighted_triangles = 0 | |
105 seen = set() | |
106 for j in inbrs: | |
107 seen.add(j) | |
108 # This prevents double counting. | |
109 jnbrs = set(G[j]) - seen | |
110 # Only compute the edge weight once, before the inner inner | |
111 # loop. | |
112 wij = wt(i, j) | |
113 weighted_triangles += sum( | |
114 (wij * wt(j, k) * wt(k, i)) ** (1 / 3) for k in inbrs & jnbrs | |
115 ) | |
116 yield (i, len(inbrs), 2 * weighted_triangles) | |
117 | |
118 | |
119 @not_implemented_for("multigraph") | |
120 def _directed_triangles_and_degree_iter(G, nodes=None): | |
121 """ Return an iterator of | |
122 (node, total_degree, reciprocal_degree, directed_triangles). | |
123 | |
124 Used for directed clustering. | |
125 | |
126 """ | |
127 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes)) | |
128 | |
129 for i, preds, succs in nodes_nbrs: | |
130 ipreds = set(preds) - {i} | |
131 isuccs = set(succs) - {i} | |
132 | |
133 directed_triangles = 0 | |
134 for j in chain(ipreds, isuccs): | |
135 jpreds = set(G._pred[j]) - {j} | |
136 jsuccs = set(G._succ[j]) - {j} | |
137 directed_triangles += sum( | |
138 1 | |
139 for k in chain( | |
140 (ipreds & jpreds), | |
141 (ipreds & jsuccs), | |
142 (isuccs & jpreds), | |
143 (isuccs & jsuccs), | |
144 ) | |
145 ) | |
146 dtotal = len(ipreds) + len(isuccs) | |
147 dbidirectional = len(ipreds & isuccs) | |
148 yield (i, dtotal, dbidirectional, directed_triangles) | |
149 | |
150 | |
151 @not_implemented_for("multigraph") | |
152 def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"): | |
153 """ Return an iterator of | |
154 (node, total_degree, reciprocal_degree, directed_weighted_triangles). | |
155 | |
156 Used for directed weighted clustering. | |
157 | |
158 """ | |
159 if weight is None or G.number_of_edges() == 0: | |
160 max_weight = 1 | |
161 else: | |
162 max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True)) | |
163 | |
164 nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes)) | |
165 | |
166 def wt(u, v): | |
167 return G[u][v].get(weight, 1) / max_weight | |
168 | |
169 for i, preds, succs in nodes_nbrs: | |
170 ipreds = set(preds) - {i} | |
171 isuccs = set(succs) - {i} | |
172 | |
173 directed_triangles = 0 | |
174 for j in ipreds: | |
175 jpreds = set(G._pred[j]) - {j} | |
176 jsuccs = set(G._succ[j]) - {j} | |
177 directed_triangles += sum( | |
178 (wt(j, i) * wt(k, i) * wt(k, j)) ** (1 / 3) for k in ipreds & jpreds | |
179 ) | |
180 directed_triangles += sum( | |
181 (wt(j, i) * wt(k, i) * wt(j, k)) ** (1 / 3) for k in ipreds & jsuccs | |
182 ) | |
183 directed_triangles += sum( | |
184 (wt(j, i) * wt(i, k) * wt(k, j)) ** (1 / 3) for k in isuccs & jpreds | |
185 ) | |
186 directed_triangles += sum( | |
187 (wt(j, i) * wt(i, k) * wt(j, k)) ** (1 / 3) for k in isuccs & jsuccs | |
188 ) | |
189 | |
190 for j in isuccs: | |
191 jpreds = set(G._pred[j]) - {j} | |
192 jsuccs = set(G._succ[j]) - {j} | |
193 directed_triangles += sum( | |
194 (wt(i, j) * wt(k, i) * wt(k, j)) ** (1 / 3) for k in ipreds & jpreds | |
195 ) | |
196 directed_triangles += sum( | |
197 (wt(i, j) * wt(k, i) * wt(j, k)) ** (1 / 3) for k in ipreds & jsuccs | |
198 ) | |
199 directed_triangles += sum( | |
200 (wt(i, j) * wt(i, k) * wt(k, j)) ** (1 / 3) for k in isuccs & jpreds | |
201 ) | |
202 directed_triangles += sum( | |
203 (wt(i, j) * wt(i, k) * wt(j, k)) ** (1 / 3) for k in isuccs & jsuccs | |
204 ) | |
205 | |
206 dtotal = len(ipreds) + len(isuccs) | |
207 dbidirectional = len(ipreds & isuccs) | |
208 yield (i, dtotal, dbidirectional, directed_triangles) | |
209 | |
210 | |
211 def average_clustering(G, nodes=None, weight=None, count_zeros=True): | |
212 r"""Compute the average clustering coefficient for the graph G. | |
213 | |
214 The clustering coefficient for the graph is the average, | |
215 | |
216 .. math:: | |
217 | |
218 C = \frac{1}{n}\sum_{v \in G} c_v, | |
219 | |
220 where :math:`n` is the number of nodes in `G`. | |
221 | |
222 Parameters | |
223 ---------- | |
224 G : graph | |
225 | |
226 nodes : container of nodes, optional (default=all nodes in G) | |
227 Compute average clustering for nodes in this container. | |
228 | |
229 weight : string or None, optional (default=None) | |
230 The edge attribute that holds the numerical value used as a weight. | |
231 If None, then each edge has weight 1. | |
232 | |
233 count_zeros : bool | |
234 If False include only the nodes with nonzero clustering in the average. | |
235 | |
236 Returns | |
237 ------- | |
238 avg : float | |
239 Average clustering | |
240 | |
241 Examples | |
242 -------- | |
243 >>> G = nx.complete_graph(5) | |
244 >>> print(nx.average_clustering(G)) | |
245 1.0 | |
246 | |
247 Notes | |
248 ----- | |
249 This is a space saving routine; it might be faster | |
250 to use the clustering function to get a list and then take the average. | |
251 | |
252 Self loops are ignored. | |
253 | |
254 References | |
255 ---------- | |
256 .. [1] Generalizations of the clustering coefficient to weighted | |
257 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, | |
258 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). | |
259 http://jponnela.com/web_documents/a9.pdf | |
260 .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated | |
261 nodes and leafs on clustering measures for small-world networks. | |
262 https://arxiv.org/abs/0802.2512 | |
263 """ | |
264 c = clustering(G, nodes, weight=weight).values() | |
265 if not count_zeros: | |
266 c = [v for v in c if v > 0] | |
267 return sum(c) / len(c) | |
268 | |
269 | |
270 def clustering(G, nodes=None, weight=None): | |
271 r"""Compute the clustering coefficient for nodes. | |
272 | |
273 For unweighted graphs, the clustering of a node :math:`u` | |
274 is the fraction of possible triangles through that node that exist, | |
275 | |
276 .. math:: | |
277 | |
278 c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)}, | |
279 | |
280 where :math:`T(u)` is the number of triangles through node :math:`u` and | |
281 :math:`deg(u)` is the degree of :math:`u`. | |
282 | |
283 For weighted graphs, there are several ways to define clustering [1]_. | |
284 the one used here is defined | |
285 as the geometric average of the subgraph edge weights [2]_, | |
286 | |
287 .. math:: | |
288 | |
289 c_u = \frac{1}{deg(u)(deg(u)-1))} | |
290 \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}. | |
291 | |
292 The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight | |
293 in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`. | |
294 | |
295 The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`. | |
296 | |
297 For directed graphs, the clustering is similarly defined as the fraction | |
298 of all possible directed triangles or geometric average of the subgraph | |
299 edge weights for unweighted and weighted directed graph respectively [3]_. | |
300 | |
301 .. math:: | |
302 | |
303 c_u = \frac{1}{deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u)} | |
304 T(u), | |
305 | |
306 where :math:`T(u)` is the number of directed triangles through node | |
307 :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of | |
308 :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of | |
309 :math:`u`. | |
310 | |
311 Parameters | |
312 ---------- | |
313 G : graph | |
314 | |
315 nodes : container of nodes, optional (default=all nodes in G) | |
316 Compute clustering for nodes in this container. | |
317 | |
318 weight : string or None, optional (default=None) | |
319 The edge attribute that holds the numerical value used as a weight. | |
320 If None, then each edge has weight 1. | |
321 | |
322 Returns | |
323 ------- | |
324 out : float, or dictionary | |
325 Clustering coefficient at specified nodes | |
326 | |
327 Examples | |
328 -------- | |
329 >>> G = nx.complete_graph(5) | |
330 >>> print(nx.clustering(G, 0)) | |
331 1.0 | |
332 >>> print(nx.clustering(G)) | |
333 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} | |
334 | |
335 Notes | |
336 ----- | |
337 Self loops are ignored. | |
338 | |
339 References | |
340 ---------- | |
341 .. [1] Generalizations of the clustering coefficient to weighted | |
342 complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela, | |
343 K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007). | |
344 http://jponnela.com/web_documents/a9.pdf | |
345 .. [2] Intensity and coherence of motifs in weighted complex | |
346 networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski, | |
347 Physical Review E, 71(6), 065103 (2005). | |
348 .. [3] Clustering in complex directed networks by G. Fagiolo, | |
349 Physical Review E, 76(2), 026107 (2007). | |
350 """ | |
351 if G.is_directed(): | |
352 if weight is not None: | |
353 td_iter = _directed_weighted_triangles_and_degree_iter(G, nodes, weight) | |
354 clusterc = { | |
355 v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) | |
356 for v, dt, db, t in td_iter | |
357 } | |
358 else: | |
359 td_iter = _directed_triangles_and_degree_iter(G, nodes) | |
360 clusterc = { | |
361 v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2) | |
362 for v, dt, db, t in td_iter | |
363 } | |
364 else: | |
365 if weight is not None: | |
366 td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight) | |
367 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter} | |
368 else: | |
369 td_iter = _triangles_and_degree_iter(G, nodes) | |
370 clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter} | |
371 if nodes in G: | |
372 # Return the value of the sole entry in the dictionary. | |
373 return clusterc[nodes] | |
374 return clusterc | |
375 | |
376 | |
377 def transitivity(G): | |
378 r"""Compute graph transitivity, the fraction of all possible triangles | |
379 present in G. | |
380 | |
381 Possible triangles are identified by the number of "triads" | |
382 (two edges with a shared vertex). | |
383 | |
384 The transitivity is | |
385 | |
386 .. math:: | |
387 | |
388 T = 3\frac{\#triangles}{\#triads}. | |
389 | |
390 Parameters | |
391 ---------- | |
392 G : graph | |
393 | |
394 Returns | |
395 ------- | |
396 out : float | |
397 Transitivity | |
398 | |
399 Examples | |
400 -------- | |
401 >>> G = nx.complete_graph(5) | |
402 >>> print(nx.transitivity(G)) | |
403 1.0 | |
404 """ | |
405 triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G)) | |
406 contri = sum(d * (d - 1) for v, d, t, _ in _triangles_and_degree_iter(G)) | |
407 return 0 if triangles == 0 else triangles / contri | |
408 | |
409 | |
410 def square_clustering(G, nodes=None): | |
411 r""" Compute the squares clustering coefficient for nodes. | |
412 | |
413 For each node return the fraction of possible squares that exist at | |
414 the node [1]_ | |
415 | |
416 .. math:: | |
417 C_4(v) = \frac{ \sum_{u=1}^{k_v} | |
418 \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v} | |
419 \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]}, | |
420 | |
421 where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and | |
422 :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u - | |
423 (1+q_v(u,w)+\theta_{uv}))(k_w - (1+q_v(u,w)+\theta_{uw}))`, where | |
424 :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0 | |
425 otherwise. | |
426 | |
427 Parameters | |
428 ---------- | |
429 G : graph | |
430 | |
431 nodes : container of nodes, optional (default=all nodes in G) | |
432 Compute clustering for nodes in this container. | |
433 | |
434 Returns | |
435 ------- | |
436 c4 : dictionary | |
437 A dictionary keyed by node with the square clustering coefficient value. | |
438 | |
439 Examples | |
440 -------- | |
441 >>> G = nx.complete_graph(5) | |
442 >>> print(nx.square_clustering(G, 0)) | |
443 1.0 | |
444 >>> print(nx.square_clustering(G)) | |
445 {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0} | |
446 | |
447 Notes | |
448 ----- | |
449 While :math:`C_3(v)` (triangle clustering) gives the probability that | |
450 two neighbors of node v are connected with each other, :math:`C_4(v)` is | |
451 the probability that two neighbors of node v share a common | |
452 neighbor different from v. This algorithm can be applied to both | |
453 bipartite and unipartite networks. | |
454 | |
455 References | |
456 ---------- | |
457 .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005 | |
458 Cycles and clustering in bipartite networks. | |
459 Physical Review E (72) 056127. | |
460 """ | |
461 if nodes is None: | |
462 node_iter = G | |
463 else: | |
464 node_iter = G.nbunch_iter(nodes) | |
465 clustering = {} | |
466 for v in node_iter: | |
467 clustering[v] = 0 | |
468 potential = 0 | |
469 for u, w in combinations(G[v], 2): | |
470 squares = len((set(G[u]) & set(G[w])) - {v}) | |
471 clustering[v] += squares | |
472 degm = squares + 1 | |
473 if w in G[u]: | |
474 degm += 1 | |
475 potential += (len(G[u]) - degm) * (len(G[w]) - degm) + squares | |
476 if potential > 0: | |
477 clustering[v] /= potential | |
478 if nodes in G: | |
479 # Return the value of the sole entry in the dictionary. | |
480 return clustering[nodes] | |
481 return clustering | |
482 | |
483 | |
484 @not_implemented_for("directed") | |
485 def generalized_degree(G, nodes=None): | |
486 r""" Compute the generalized degree for nodes. | |
487 | |
488 For each node, the generalized degree shows how many edges of given | |
489 triangle multiplicity the node is connected to. The triangle multiplicity | |
490 of an edge is the number of triangles an edge participates in. The | |
491 generalized degree of node :math:`i` can be written as a vector | |
492 :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where | |
493 :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that | |
494 participate in :math:`j` triangles. | |
495 | |
496 Parameters | |
497 ---------- | |
498 G : graph | |
499 | |
500 nodes : container of nodes, optional (default=all nodes in G) | |
501 Compute the generalized degree for nodes in this container. | |
502 | |
503 Returns | |
504 ------- | |
505 out : Counter, or dictionary of Counters | |
506 Generalized degree of specified nodes. The Counter is keyed by edge | |
507 triangle multiplicity. | |
508 | |
509 Examples | |
510 -------- | |
511 >>> G = nx.complete_graph(5) | |
512 >>> print(nx.generalized_degree(G, 0)) | |
513 Counter({3: 4}) | |
514 >>> print(nx.generalized_degree(G)) | |
515 {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})} | |
516 | |
517 To recover the number of triangles attached to a node: | |
518 | |
519 >>> k1 = nx.generalized_degree(G, 0) | |
520 >>> sum([k * v for k, v in k1.items()]) / 2 == nx.triangles(G, 0) | |
521 True | |
522 | |
523 Notes | |
524 ----- | |
525 In a network of N nodes, the highest triangle multiplicty an edge can have | |
526 is N-2. | |
527 | |
528 The return value does not include a `zero` entry if no edges of a | |
529 particular triangle multiplicity are present. | |
530 | |
531 The number of triangles node :math:`i` is attached to can be recovered from | |
532 the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, | |
533 k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`. | |
534 | |
535 References | |
536 ---------- | |
537 .. [1] Networks with arbitrary edge multiplicities by V. Zlatić, | |
538 D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters), | |
539 Volume 97, Number 2 (2012). | |
540 https://iopscience.iop.org/article/10.1209/0295-5075/97/28005 | |
541 """ | |
542 if nodes in G: | |
543 return next(_triangles_and_degree_iter(G, nodes))[3] | |
544 return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)} |