Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/link_prediction.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ | |
2 Link prediction algorithms. | |
3 """ | |
4 | |
5 | |
6 from math import log | |
7 | |
8 import networkx as nx | |
9 from networkx.utils import not_implemented_for | |
10 | |
11 __all__ = [ | |
12 "resource_allocation_index", | |
13 "jaccard_coefficient", | |
14 "adamic_adar_index", | |
15 "preferential_attachment", | |
16 "cn_soundarajan_hopcroft", | |
17 "ra_index_soundarajan_hopcroft", | |
18 "within_inter_cluster", | |
19 "common_neighbor_centrality", | |
20 ] | |
21 | |
22 | |
23 def _apply_prediction(G, func, ebunch=None): | |
24 """Applies the given function to each edge in the specified iterable | |
25 of edges. | |
26 | |
27 `G` is an instance of :class:`networkx.Graph`. | |
28 | |
29 `func` is a function on two inputs, each of which is a node in the | |
30 graph. The function can return anything, but it should return a | |
31 value representing a prediction of the likelihood of a "link" | |
32 joining the two nodes. | |
33 | |
34 `ebunch` is an iterable of pairs of nodes. If not specified, all | |
35 non-edges in the graph `G` will be used. | |
36 | |
37 """ | |
38 if ebunch is None: | |
39 ebunch = nx.non_edges(G) | |
40 return ((u, v, func(u, v)) for u, v in ebunch) | |
41 | |
42 | |
43 @not_implemented_for("directed") | |
44 @not_implemented_for("multigraph") | |
45 def resource_allocation_index(G, ebunch=None): | |
46 r"""Compute the resource allocation index of all node pairs in ebunch. | |
47 | |
48 Resource allocation index of `u` and `v` is defined as | |
49 | |
50 .. math:: | |
51 | |
52 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|} | |
53 | |
54 where $\Gamma(u)$ denotes the set of neighbors of $u$. | |
55 | |
56 Parameters | |
57 ---------- | |
58 G : graph | |
59 A NetworkX undirected graph. | |
60 | |
61 ebunch : iterable of node pairs, optional (default = None) | |
62 Resource allocation index will be computed for each pair of | |
63 nodes given in the iterable. The pairs must be given as | |
64 2-tuples (u, v) where u and v are nodes in the graph. If ebunch | |
65 is None then all non-existent edges in the graph will be used. | |
66 Default value: None. | |
67 | |
68 Returns | |
69 ------- | |
70 piter : iterator | |
71 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a | |
72 pair of nodes and p is their resource allocation index. | |
73 | |
74 Examples | |
75 -------- | |
76 >>> G = nx.complete_graph(5) | |
77 >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)]) | |
78 >>> for u, v, p in preds: | |
79 ... print(f"({u}, {v}) -> {p:.8f}") | |
80 (0, 1) -> 0.75000000 | |
81 (2, 3) -> 0.75000000 | |
82 | |
83 References | |
84 ---------- | |
85 .. [1] T. Zhou, L. Lu, Y.-C. Zhang. | |
86 Predicting missing links via local information. | |
87 Eur. Phys. J. B 71 (2009) 623. | |
88 https://arxiv.org/pdf/0901.0553.pdf | |
89 """ | |
90 | |
91 def predict(u, v): | |
92 return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v)) | |
93 | |
94 return _apply_prediction(G, predict, ebunch) | |
95 | |
96 | |
97 @not_implemented_for("directed") | |
98 @not_implemented_for("multigraph") | |
99 def jaccard_coefficient(G, ebunch=None): | |
100 r"""Compute the Jaccard coefficient of all node pairs in ebunch. | |
101 | |
102 Jaccard coefficient of nodes `u` and `v` is defined as | |
103 | |
104 .. math:: | |
105 | |
106 \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|} | |
107 | |
108 where $\Gamma(u)$ denotes the set of neighbors of $u$. | |
109 | |
110 Parameters | |
111 ---------- | |
112 G : graph | |
113 A NetworkX undirected graph. | |
114 | |
115 ebunch : iterable of node pairs, optional (default = None) | |
116 Jaccard coefficient will be computed for each pair of nodes | |
117 given in the iterable. The pairs must be given as 2-tuples | |
118 (u, v) where u and v are nodes in the graph. If ebunch is None | |
119 then all non-existent edges in the graph will be used. | |
120 Default value: None. | |
121 | |
122 Returns | |
123 ------- | |
124 piter : iterator | |
125 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a | |
126 pair of nodes and p is their Jaccard coefficient. | |
127 | |
128 Examples | |
129 -------- | |
130 >>> G = nx.complete_graph(5) | |
131 >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)]) | |
132 >>> for u, v, p in preds: | |
133 ... print(f"({u}, {v}) -> {p:.8f}") | |
134 (0, 1) -> 0.60000000 | |
135 (2, 3) -> 0.60000000 | |
136 | |
137 References | |
138 ---------- | |
139 .. [1] D. Liben-Nowell, J. Kleinberg. | |
140 The Link Prediction Problem for Social Networks (2004). | |
141 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf | |
142 """ | |
143 | |
144 def predict(u, v): | |
145 union_size = len(set(G[u]) | set(G[v])) | |
146 if union_size == 0: | |
147 return 0 | |
148 return len(list(nx.common_neighbors(G, u, v))) / union_size | |
149 | |
150 return _apply_prediction(G, predict, ebunch) | |
151 | |
152 | |
153 @not_implemented_for("directed") | |
154 @not_implemented_for("multigraph") | |
155 def adamic_adar_index(G, ebunch=None): | |
156 r"""Compute the Adamic-Adar index of all node pairs in ebunch. | |
157 | |
158 Adamic-Adar index of `u` and `v` is defined as | |
159 | |
160 .. math:: | |
161 | |
162 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|} | |
163 | |
164 where $\Gamma(u)$ denotes the set of neighbors of $u$. | |
165 This index leads to zero-division for nodes only connected via self-loops. | |
166 It is intended to be used when no self-loops are present. | |
167 | |
168 Parameters | |
169 ---------- | |
170 G : graph | |
171 NetworkX undirected graph. | |
172 | |
173 ebunch : iterable of node pairs, optional (default = None) | |
174 Adamic-Adar index will be computed for each pair of nodes given | |
175 in the iterable. The pairs must be given as 2-tuples (u, v) | |
176 where u and v are nodes in the graph. If ebunch is None then all | |
177 non-existent edges in the graph will be used. | |
178 Default value: None. | |
179 | |
180 Returns | |
181 ------- | |
182 piter : iterator | |
183 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a | |
184 pair of nodes and p is their Adamic-Adar index. | |
185 | |
186 Examples | |
187 -------- | |
188 >>> G = nx.complete_graph(5) | |
189 >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)]) | |
190 >>> for u, v, p in preds: | |
191 ... print(f"({u}, {v}) -> {p:.8f}") | |
192 (0, 1) -> 2.16404256 | |
193 (2, 3) -> 2.16404256 | |
194 | |
195 References | |
196 ---------- | |
197 .. [1] D. Liben-Nowell, J. Kleinberg. | |
198 The Link Prediction Problem for Social Networks (2004). | |
199 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf | |
200 """ | |
201 | |
202 def predict(u, v): | |
203 return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v)) | |
204 | |
205 return _apply_prediction(G, predict, ebunch) | |
206 | |
207 | |
208 @not_implemented_for("directed") | |
209 @not_implemented_for("multigraph") | |
210 def common_neighbor_centrality(G, ebunch=None, alpha=0.8): | |
211 r"""Return the CCPA score for each pair of nodes. | |
212 | |
213 Compute the Common Neighbor and Centrality based Parameterized Algorithm(CCPA) | |
214 score of all node pairs in ebunch. | |
215 | |
216 CCPA score of `u` and `v` is defined as | |
217 | |
218 .. math:: | |
219 | |
220 \alpha \cdot (|\Gamma (u){\cap }^{}\Gamma (v)|)+(1-\alpha )\cdot \frac{N}{{d}_{uv}} | |
221 | |
222 where $\Gamma(u)$ denotes the set of neighbors of $u$, $\Gamma(v)$ denotes the | |
223 set of neighbors of $v$, $\alpha$ is parameter varies between [0,1], $N$ denotes | |
224 total number of nodes in the Graph and ${d}_{uv}$ denotes shortest distance | |
225 between $u$ and $v$. | |
226 | |
227 This algorithm is based on two vital properties of nodes, namely the number | |
228 of common neighbors and their centrality. Common neighbor refers to the common | |
229 nodes between two nodes. Centrality refers to the prestige that a node enjoys | |
230 in a network. | |
231 | |
232 .. seealso:: | |
233 | |
234 :func:`common_neighbors` | |
235 | |
236 Parameters | |
237 ---------- | |
238 G : graph | |
239 NetworkX undirected graph. | |
240 | |
241 ebunch : iterable of node pairs, optional (default = None) | |
242 Preferential attachment score will be computed for each pair of | |
243 nodes given in the iterable. The pairs must be given as | |
244 2-tuples (u, v) where u and v are nodes in the graph. If ebunch | |
245 is None then all non-existent edges in the graph will be used. | |
246 Default value: None. | |
247 | |
248 alpha : Parameter defined for participation of Common Neighbor | |
249 and Centrality Algorithm share. Default value set to 0.8 | |
250 because author found better performance at 0.8 for all the | |
251 dataset. | |
252 Default value: 0.8 | |
253 | |
254 | |
255 Returns | |
256 ------- | |
257 piter : iterator | |
258 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a | |
259 pair of nodes and p is their Common Neighbor and Centrality based | |
260 Parameterized Algorithm(CCPA) score. | |
261 | |
262 Examples | |
263 -------- | |
264 >>> G = nx.complete_graph(5) | |
265 >>> preds = nx.common_neighbor_centrality(G, [(0, 1), (2, 3)]) | |
266 >>> for u, v, p in preds: | |
267 ... print(f"({u}, {v}) -> {p}") | |
268 (0, 1) -> 3.4000000000000004 | |
269 (2, 3) -> 3.4000000000000004 | |
270 | |
271 References | |
272 ---------- | |
273 .. [1] Ahmad, I., Akhtar, M.U., Noor, S. et al. | |
274 Missing Link Prediction using Common Neighbor and Centrality based Parameterized Algorithm. | |
275 Sci Rep 10, 364 (2020). | |
276 https://doi.org/10.1038/s41598-019-57304-y | |
277 """ | |
278 shortest_path = nx.shortest_path(G) | |
279 | |
280 def predict(u, v): | |
281 return alpha * len(list(nx.common_neighbors(G, u, v))) + (1 - alpha) * ( | |
282 G.number_of_nodes() / (len(shortest_path[u][v]) - 1) | |
283 ) | |
284 | |
285 return _apply_prediction(G, predict, ebunch) | |
286 | |
287 | |
288 @not_implemented_for("directed") | |
289 @not_implemented_for("multigraph") | |
290 def preferential_attachment(G, ebunch=None): | |
291 r"""Compute the preferential attachment score of all node pairs in ebunch. | |
292 | |
293 Preferential attachment score of `u` and `v` is defined as | |
294 | |
295 .. math:: | |
296 | |
297 |\Gamma(u)| |\Gamma(v)| | |
298 | |
299 where $\Gamma(u)$ denotes the set of neighbors of $u$. | |
300 | |
301 Parameters | |
302 ---------- | |
303 G : graph | |
304 NetworkX undirected graph. | |
305 | |
306 ebunch : iterable of node pairs, optional (default = None) | |
307 Preferential attachment score will be computed for each pair of | |
308 nodes given in the iterable. The pairs must be given as | |
309 2-tuples (u, v) where u and v are nodes in the graph. If ebunch | |
310 is None then all non-existent edges in the graph will be used. | |
311 Default value: None. | |
312 | |
313 Returns | |
314 ------- | |
315 piter : iterator | |
316 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a | |
317 pair of nodes and p is their preferential attachment score. | |
318 | |
319 Examples | |
320 -------- | |
321 >>> G = nx.complete_graph(5) | |
322 >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)]) | |
323 >>> for u, v, p in preds: | |
324 ... print(f"({u}, {v}) -> {p}") | |
325 (0, 1) -> 16 | |
326 (2, 3) -> 16 | |
327 | |
328 References | |
329 ---------- | |
330 .. [1] D. Liben-Nowell, J. Kleinberg. | |
331 The Link Prediction Problem for Social Networks (2004). | |
332 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf | |
333 """ | |
334 | |
335 def predict(u, v): | |
336 return G.degree(u) * G.degree(v) | |
337 | |
338 return _apply_prediction(G, predict, ebunch) | |
339 | |
340 | |
341 @not_implemented_for("directed") | |
342 @not_implemented_for("multigraph") | |
343 def cn_soundarajan_hopcroft(G, ebunch=None, community="community"): | |
344 r"""Count the number of common neighbors of all node pairs in ebunch | |
345 using community information. | |
346 | |
347 For two nodes $u$ and $v$, this function computes the number of | |
348 common neighbors and bonus one for each common neighbor belonging to | |
349 the same community as $u$ and $v$. Mathematically, | |
350 | |
351 .. math:: | |
352 | |
353 |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w) | |
354 | |
355 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$ | |
356 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of | |
357 neighbors of $u$. | |
358 | |
359 Parameters | |
360 ---------- | |
361 G : graph | |
362 A NetworkX undirected graph. | |
363 | |
364 ebunch : iterable of node pairs, optional (default = None) | |
365 The score will be computed for each pair of nodes given in the | |
366 iterable. The pairs must be given as 2-tuples (u, v) where u | |
367 and v are nodes in the graph. If ebunch is None then all | |
368 non-existent edges in the graph will be used. | |
369 Default value: None. | |
370 | |
371 community : string, optional (default = 'community') | |
372 Nodes attribute name containing the community information. | |
373 G[u][community] identifies which community u belongs to. Each | |
374 node belongs to at most one community. Default value: 'community'. | |
375 | |
376 Returns | |
377 ------- | |
378 piter : iterator | |
379 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a | |
380 pair of nodes and p is their score. | |
381 | |
382 Examples | |
383 -------- | |
384 >>> G = nx.path_graph(3) | |
385 >>> G.nodes[0]["community"] = 0 | |
386 >>> G.nodes[1]["community"] = 0 | |
387 >>> G.nodes[2]["community"] = 0 | |
388 >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)]) | |
389 >>> for u, v, p in preds: | |
390 ... print(f"({u}, {v}) -> {p}") | |
391 (0, 2) -> 2 | |
392 | |
393 References | |
394 ---------- | |
395 .. [1] Sucheta Soundarajan and John Hopcroft. | |
396 Using community information to improve the precision of link | |
397 prediction methods. | |
398 In Proceedings of the 21st international conference companion on | |
399 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608. | |
400 http://doi.acm.org/10.1145/2187980.2188150 | |
401 """ | |
402 | |
403 def predict(u, v): | |
404 Cu = _community(G, u, community) | |
405 Cv = _community(G, v, community) | |
406 cnbors = list(nx.common_neighbors(G, u, v)) | |
407 neighbors = ( | |
408 sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0 | |
409 ) | |
410 return len(cnbors) + neighbors | |
411 | |
412 return _apply_prediction(G, predict, ebunch) | |
413 | |
414 | |
415 @not_implemented_for("directed") | |
416 @not_implemented_for("multigraph") | |
417 def ra_index_soundarajan_hopcroft(G, ebunch=None, community="community"): | |
418 r"""Compute the resource allocation index of all node pairs in | |
419 ebunch using community information. | |
420 | |
421 For two nodes $u$ and $v$, this function computes the resource | |
422 allocation index considering only common neighbors belonging to the | |
423 same community as $u$ and $v$. Mathematically, | |
424 | |
425 .. math:: | |
426 | |
427 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|} | |
428 | |
429 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$ | |
430 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of | |
431 neighbors of $u$. | |
432 | |
433 Parameters | |
434 ---------- | |
435 G : graph | |
436 A NetworkX undirected graph. | |
437 | |
438 ebunch : iterable of node pairs, optional (default = None) | |
439 The score will be computed for each pair of nodes given in the | |
440 iterable. The pairs must be given as 2-tuples (u, v) where u | |
441 and v are nodes in the graph. If ebunch is None then all | |
442 non-existent edges in the graph will be used. | |
443 Default value: None. | |
444 | |
445 community : string, optional (default = 'community') | |
446 Nodes attribute name containing the community information. | |
447 G[u][community] identifies which community u belongs to. Each | |
448 node belongs to at most one community. Default value: 'community'. | |
449 | |
450 Returns | |
451 ------- | |
452 piter : iterator | |
453 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a | |
454 pair of nodes and p is their score. | |
455 | |
456 Examples | |
457 -------- | |
458 >>> G = nx.Graph() | |
459 >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)]) | |
460 >>> G.nodes[0]["community"] = 0 | |
461 >>> G.nodes[1]["community"] = 0 | |
462 >>> G.nodes[2]["community"] = 1 | |
463 >>> G.nodes[3]["community"] = 0 | |
464 >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)]) | |
465 >>> for u, v, p in preds: | |
466 ... print(f"({u}, {v}) -> {p:.8f}") | |
467 (0, 3) -> 0.50000000 | |
468 | |
469 References | |
470 ---------- | |
471 .. [1] Sucheta Soundarajan and John Hopcroft. | |
472 Using community information to improve the precision of link | |
473 prediction methods. | |
474 In Proceedings of the 21st international conference companion on | |
475 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608. | |
476 http://doi.acm.org/10.1145/2187980.2188150 | |
477 """ | |
478 | |
479 def predict(u, v): | |
480 Cu = _community(G, u, community) | |
481 Cv = _community(G, v, community) | |
482 if Cu != Cv: | |
483 return 0 | |
484 cnbors = nx.common_neighbors(G, u, v) | |
485 return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu) | |
486 | |
487 return _apply_prediction(G, predict, ebunch) | |
488 | |
489 | |
490 @not_implemented_for("directed") | |
491 @not_implemented_for("multigraph") | |
492 def within_inter_cluster(G, ebunch=None, delta=0.001, community="community"): | |
493 """Compute the ratio of within- and inter-cluster common neighbors | |
494 of all node pairs in ebunch. | |
495 | |
496 For two nodes `u` and `v`, if a common neighbor `w` belongs to the | |
497 same community as them, `w` is considered as within-cluster common | |
498 neighbor of `u` and `v`. Otherwise, it is considered as | |
499 inter-cluster common neighbor of `u` and `v`. The ratio between the | |
500 size of the set of within- and inter-cluster common neighbors is | |
501 defined as the WIC measure. [1]_ | |
502 | |
503 Parameters | |
504 ---------- | |
505 G : graph | |
506 A NetworkX undirected graph. | |
507 | |
508 ebunch : iterable of node pairs, optional (default = None) | |
509 The WIC measure will be computed for each pair of nodes given in | |
510 the iterable. The pairs must be given as 2-tuples (u, v) where | |
511 u and v are nodes in the graph. If ebunch is None then all | |
512 non-existent edges in the graph will be used. | |
513 Default value: None. | |
514 | |
515 delta : float, optional (default = 0.001) | |
516 Value to prevent division by zero in case there is no | |
517 inter-cluster common neighbor between two nodes. See [1]_ for | |
518 details. Default value: 0.001. | |
519 | |
520 community : string, optional (default = 'community') | |
521 Nodes attribute name containing the community information. | |
522 G[u][community] identifies which community u belongs to. Each | |
523 node belongs to at most one community. Default value: 'community'. | |
524 | |
525 Returns | |
526 ------- | |
527 piter : iterator | |
528 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a | |
529 pair of nodes and p is their WIC measure. | |
530 | |
531 Examples | |
532 -------- | |
533 >>> G = nx.Graph() | |
534 >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)]) | |
535 >>> G.nodes[0]["community"] = 0 | |
536 >>> G.nodes[1]["community"] = 1 | |
537 >>> G.nodes[2]["community"] = 0 | |
538 >>> G.nodes[3]["community"] = 0 | |
539 >>> G.nodes[4]["community"] = 0 | |
540 >>> preds = nx.within_inter_cluster(G, [(0, 4)]) | |
541 >>> for u, v, p in preds: | |
542 ... print(f"({u}, {v}) -> {p:.8f}") | |
543 (0, 4) -> 1.99800200 | |
544 >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5) | |
545 >>> for u, v, p in preds: | |
546 ... print(f"({u}, {v}) -> {p:.8f}") | |
547 (0, 4) -> 1.33333333 | |
548 | |
549 References | |
550 ---------- | |
551 .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes. | |
552 Link prediction in complex networks based on cluster information. | |
553 In Proceedings of the 21st Brazilian conference on Advances in | |
554 Artificial Intelligence (SBIA'12) | |
555 https://doi.org/10.1007/978-3-642-34459-6_10 | |
556 """ | |
557 if delta <= 0: | |
558 raise nx.NetworkXAlgorithmError("Delta must be greater than zero") | |
559 | |
560 def predict(u, v): | |
561 Cu = _community(G, u, community) | |
562 Cv = _community(G, v, community) | |
563 if Cu != Cv: | |
564 return 0 | |
565 cnbors = set(nx.common_neighbors(G, u, v)) | |
566 within = {w for w in cnbors if _community(G, w, community) == Cu} | |
567 inter = cnbors - within | |
568 return len(within) / (len(inter) + delta) | |
569 | |
570 return _apply_prediction(G, predict, ebunch) | |
571 | |
572 | |
573 def _community(G, u, community): | |
574 """Get the community of the given node.""" | |
575 node_u = G.nodes[u] | |
576 try: | |
577 return node_u[community] | |
578 except KeyError as e: | |
579 raise nx.NetworkXAlgorithmError("No community information") from e |