comparison env/lib/python3.9/site-packages/networkx/algorithms/link_prediction.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 Link prediction algorithms.
3 """
4
5
6 from math import log
7
8 import networkx as nx
9 from networkx.utils import not_implemented_for
10
11 __all__ = [
12 "resource_allocation_index",
13 "jaccard_coefficient",
14 "adamic_adar_index",
15 "preferential_attachment",
16 "cn_soundarajan_hopcroft",
17 "ra_index_soundarajan_hopcroft",
18 "within_inter_cluster",
19 "common_neighbor_centrality",
20 ]
21
22
23 def _apply_prediction(G, func, ebunch=None):
24 """Applies the given function to each edge in the specified iterable
25 of edges.
26
27 `G` is an instance of :class:`networkx.Graph`.
28
29 `func` is a function on two inputs, each of which is a node in the
30 graph. The function can return anything, but it should return a
31 value representing a prediction of the likelihood of a "link"
32 joining the two nodes.
33
34 `ebunch` is an iterable of pairs of nodes. If not specified, all
35 non-edges in the graph `G` will be used.
36
37 """
38 if ebunch is None:
39 ebunch = nx.non_edges(G)
40 return ((u, v, func(u, v)) for u, v in ebunch)
41
42
43 @not_implemented_for("directed")
44 @not_implemented_for("multigraph")
45 def resource_allocation_index(G, ebunch=None):
46 r"""Compute the resource allocation index of all node pairs in ebunch.
47
48 Resource allocation index of `u` and `v` is defined as
49
50 .. math::
51
52 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|}
53
54 where $\Gamma(u)$ denotes the set of neighbors of $u$.
55
56 Parameters
57 ----------
58 G : graph
59 A NetworkX undirected graph.
60
61 ebunch : iterable of node pairs, optional (default = None)
62 Resource allocation index will be computed for each pair of
63 nodes given in the iterable. The pairs must be given as
64 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
65 is None then all non-existent edges in the graph will be used.
66 Default value: None.
67
68 Returns
69 -------
70 piter : iterator
71 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
72 pair of nodes and p is their resource allocation index.
73
74 Examples
75 --------
76 >>> G = nx.complete_graph(5)
77 >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)])
78 >>> for u, v, p in preds:
79 ... print(f"({u}, {v}) -> {p:.8f}")
80 (0, 1) -> 0.75000000
81 (2, 3) -> 0.75000000
82
83 References
84 ----------
85 .. [1] T. Zhou, L. Lu, Y.-C. Zhang.
86 Predicting missing links via local information.
87 Eur. Phys. J. B 71 (2009) 623.
88 https://arxiv.org/pdf/0901.0553.pdf
89 """
90
91 def predict(u, v):
92 return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v))
93
94 return _apply_prediction(G, predict, ebunch)
95
96
97 @not_implemented_for("directed")
98 @not_implemented_for("multigraph")
99 def jaccard_coefficient(G, ebunch=None):
100 r"""Compute the Jaccard coefficient of all node pairs in ebunch.
101
102 Jaccard coefficient of nodes `u` and `v` is defined as
103
104 .. math::
105
106 \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|}
107
108 where $\Gamma(u)$ denotes the set of neighbors of $u$.
109
110 Parameters
111 ----------
112 G : graph
113 A NetworkX undirected graph.
114
115 ebunch : iterable of node pairs, optional (default = None)
116 Jaccard coefficient will be computed for each pair of nodes
117 given in the iterable. The pairs must be given as 2-tuples
118 (u, v) where u and v are nodes in the graph. If ebunch is None
119 then all non-existent edges in the graph will be used.
120 Default value: None.
121
122 Returns
123 -------
124 piter : iterator
125 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
126 pair of nodes and p is their Jaccard coefficient.
127
128 Examples
129 --------
130 >>> G = nx.complete_graph(5)
131 >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)])
132 >>> for u, v, p in preds:
133 ... print(f"({u}, {v}) -> {p:.8f}")
134 (0, 1) -> 0.60000000
135 (2, 3) -> 0.60000000
136
137 References
138 ----------
139 .. [1] D. Liben-Nowell, J. Kleinberg.
140 The Link Prediction Problem for Social Networks (2004).
141 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
142 """
143
144 def predict(u, v):
145 union_size = len(set(G[u]) | set(G[v]))
146 if union_size == 0:
147 return 0
148 return len(list(nx.common_neighbors(G, u, v))) / union_size
149
150 return _apply_prediction(G, predict, ebunch)
151
152
153 @not_implemented_for("directed")
154 @not_implemented_for("multigraph")
155 def adamic_adar_index(G, ebunch=None):
156 r"""Compute the Adamic-Adar index of all node pairs in ebunch.
157
158 Adamic-Adar index of `u` and `v` is defined as
159
160 .. math::
161
162 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|}
163
164 where $\Gamma(u)$ denotes the set of neighbors of $u$.
165 This index leads to zero-division for nodes only connected via self-loops.
166 It is intended to be used when no self-loops are present.
167
168 Parameters
169 ----------
170 G : graph
171 NetworkX undirected graph.
172
173 ebunch : iterable of node pairs, optional (default = None)
174 Adamic-Adar index will be computed for each pair of nodes given
175 in the iterable. The pairs must be given as 2-tuples (u, v)
176 where u and v are nodes in the graph. If ebunch is None then all
177 non-existent edges in the graph will be used.
178 Default value: None.
179
180 Returns
181 -------
182 piter : iterator
183 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
184 pair of nodes and p is their Adamic-Adar index.
185
186 Examples
187 --------
188 >>> G = nx.complete_graph(5)
189 >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)])
190 >>> for u, v, p in preds:
191 ... print(f"({u}, {v}) -> {p:.8f}")
192 (0, 1) -> 2.16404256
193 (2, 3) -> 2.16404256
194
195 References
196 ----------
197 .. [1] D. Liben-Nowell, J. Kleinberg.
198 The Link Prediction Problem for Social Networks (2004).
199 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
200 """
201
202 def predict(u, v):
203 return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v))
204
205 return _apply_prediction(G, predict, ebunch)
206
207
208 @not_implemented_for("directed")
209 @not_implemented_for("multigraph")
210 def common_neighbor_centrality(G, ebunch=None, alpha=0.8):
211 r"""Return the CCPA score for each pair of nodes.
212
213 Compute the Common Neighbor and Centrality based Parameterized Algorithm(CCPA)
214 score of all node pairs in ebunch.
215
216 CCPA score of `u` and `v` is defined as
217
218 .. math::
219
220 \alpha \cdot (|\Gamma (u){\cap }^{}\Gamma (v)|)+(1-\alpha )\cdot \frac{N}{{d}_{uv}}
221
222 where $\Gamma(u)$ denotes the set of neighbors of $u$, $\Gamma(v)$ denotes the
223 set of neighbors of $v$, $\alpha$ is parameter varies between [0,1], $N$ denotes
224 total number of nodes in the Graph and ${d}_{uv}$ denotes shortest distance
225 between $u$ and $v$.
226
227 This algorithm is based on two vital properties of nodes, namely the number
228 of common neighbors and their centrality. Common neighbor refers to the common
229 nodes between two nodes. Centrality refers to the prestige that a node enjoys
230 in a network.
231
232 .. seealso::
233
234 :func:`common_neighbors`
235
236 Parameters
237 ----------
238 G : graph
239 NetworkX undirected graph.
240
241 ebunch : iterable of node pairs, optional (default = None)
242 Preferential attachment score will be computed for each pair of
243 nodes given in the iterable. The pairs must be given as
244 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
245 is None then all non-existent edges in the graph will be used.
246 Default value: None.
247
248 alpha : Parameter defined for participation of Common Neighbor
249 and Centrality Algorithm share. Default value set to 0.8
250 because author found better performance at 0.8 for all the
251 dataset.
252 Default value: 0.8
253
254
255 Returns
256 -------
257 piter : iterator
258 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
259 pair of nodes and p is their Common Neighbor and Centrality based
260 Parameterized Algorithm(CCPA) score.
261
262 Examples
263 --------
264 >>> G = nx.complete_graph(5)
265 >>> preds = nx.common_neighbor_centrality(G, [(0, 1), (2, 3)])
266 >>> for u, v, p in preds:
267 ... print(f"({u}, {v}) -> {p}")
268 (0, 1) -> 3.4000000000000004
269 (2, 3) -> 3.4000000000000004
270
271 References
272 ----------
273 .. [1] Ahmad, I., Akhtar, M.U., Noor, S. et al.
274 Missing Link Prediction using Common Neighbor and Centrality based Parameterized Algorithm.
275 Sci Rep 10, 364 (2020).
276 https://doi.org/10.1038/s41598-019-57304-y
277 """
278 shortest_path = nx.shortest_path(G)
279
280 def predict(u, v):
281 return alpha * len(list(nx.common_neighbors(G, u, v))) + (1 - alpha) * (
282 G.number_of_nodes() / (len(shortest_path[u][v]) - 1)
283 )
284
285 return _apply_prediction(G, predict, ebunch)
286
287
288 @not_implemented_for("directed")
289 @not_implemented_for("multigraph")
290 def preferential_attachment(G, ebunch=None):
291 r"""Compute the preferential attachment score of all node pairs in ebunch.
292
293 Preferential attachment score of `u` and `v` is defined as
294
295 .. math::
296
297 |\Gamma(u)| |\Gamma(v)|
298
299 where $\Gamma(u)$ denotes the set of neighbors of $u$.
300
301 Parameters
302 ----------
303 G : graph
304 NetworkX undirected graph.
305
306 ebunch : iterable of node pairs, optional (default = None)
307 Preferential attachment score will be computed for each pair of
308 nodes given in the iterable. The pairs must be given as
309 2-tuples (u, v) where u and v are nodes in the graph. If ebunch
310 is None then all non-existent edges in the graph will be used.
311 Default value: None.
312
313 Returns
314 -------
315 piter : iterator
316 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
317 pair of nodes and p is their preferential attachment score.
318
319 Examples
320 --------
321 >>> G = nx.complete_graph(5)
322 >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)])
323 >>> for u, v, p in preds:
324 ... print(f"({u}, {v}) -> {p}")
325 (0, 1) -> 16
326 (2, 3) -> 16
327
328 References
329 ----------
330 .. [1] D. Liben-Nowell, J. Kleinberg.
331 The Link Prediction Problem for Social Networks (2004).
332 http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
333 """
334
335 def predict(u, v):
336 return G.degree(u) * G.degree(v)
337
338 return _apply_prediction(G, predict, ebunch)
339
340
341 @not_implemented_for("directed")
342 @not_implemented_for("multigraph")
343 def cn_soundarajan_hopcroft(G, ebunch=None, community="community"):
344 r"""Count the number of common neighbors of all node pairs in ebunch
345 using community information.
346
347 For two nodes $u$ and $v$, this function computes the number of
348 common neighbors and bonus one for each common neighbor belonging to
349 the same community as $u$ and $v$. Mathematically,
350
351 .. math::
352
353 |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w)
354
355 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
356 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
357 neighbors of $u$.
358
359 Parameters
360 ----------
361 G : graph
362 A NetworkX undirected graph.
363
364 ebunch : iterable of node pairs, optional (default = None)
365 The score will be computed for each pair of nodes given in the
366 iterable. The pairs must be given as 2-tuples (u, v) where u
367 and v are nodes in the graph. If ebunch is None then all
368 non-existent edges in the graph will be used.
369 Default value: None.
370
371 community : string, optional (default = 'community')
372 Nodes attribute name containing the community information.
373 G[u][community] identifies which community u belongs to. Each
374 node belongs to at most one community. Default value: 'community'.
375
376 Returns
377 -------
378 piter : iterator
379 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
380 pair of nodes and p is their score.
381
382 Examples
383 --------
384 >>> G = nx.path_graph(3)
385 >>> G.nodes[0]["community"] = 0
386 >>> G.nodes[1]["community"] = 0
387 >>> G.nodes[2]["community"] = 0
388 >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)])
389 >>> for u, v, p in preds:
390 ... print(f"({u}, {v}) -> {p}")
391 (0, 2) -> 2
392
393 References
394 ----------
395 .. [1] Sucheta Soundarajan and John Hopcroft.
396 Using community information to improve the precision of link
397 prediction methods.
398 In Proceedings of the 21st international conference companion on
399 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
400 http://doi.acm.org/10.1145/2187980.2188150
401 """
402
403 def predict(u, v):
404 Cu = _community(G, u, community)
405 Cv = _community(G, v, community)
406 cnbors = list(nx.common_neighbors(G, u, v))
407 neighbors = (
408 sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0
409 )
410 return len(cnbors) + neighbors
411
412 return _apply_prediction(G, predict, ebunch)
413
414
415 @not_implemented_for("directed")
416 @not_implemented_for("multigraph")
417 def ra_index_soundarajan_hopcroft(G, ebunch=None, community="community"):
418 r"""Compute the resource allocation index of all node pairs in
419 ebunch using community information.
420
421 For two nodes $u$ and $v$, this function computes the resource
422 allocation index considering only common neighbors belonging to the
423 same community as $u$ and $v$. Mathematically,
424
425 .. math::
426
427 \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|}
428
429 where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
430 and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
431 neighbors of $u$.
432
433 Parameters
434 ----------
435 G : graph
436 A NetworkX undirected graph.
437
438 ebunch : iterable of node pairs, optional (default = None)
439 The score will be computed for each pair of nodes given in the
440 iterable. The pairs must be given as 2-tuples (u, v) where u
441 and v are nodes in the graph. If ebunch is None then all
442 non-existent edges in the graph will be used.
443 Default value: None.
444
445 community : string, optional (default = 'community')
446 Nodes attribute name containing the community information.
447 G[u][community] identifies which community u belongs to. Each
448 node belongs to at most one community. Default value: 'community'.
449
450 Returns
451 -------
452 piter : iterator
453 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
454 pair of nodes and p is their score.
455
456 Examples
457 --------
458 >>> G = nx.Graph()
459 >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)])
460 >>> G.nodes[0]["community"] = 0
461 >>> G.nodes[1]["community"] = 0
462 >>> G.nodes[2]["community"] = 1
463 >>> G.nodes[3]["community"] = 0
464 >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)])
465 >>> for u, v, p in preds:
466 ... print(f"({u}, {v}) -> {p:.8f}")
467 (0, 3) -> 0.50000000
468
469 References
470 ----------
471 .. [1] Sucheta Soundarajan and John Hopcroft.
472 Using community information to improve the precision of link
473 prediction methods.
474 In Proceedings of the 21st international conference companion on
475 World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
476 http://doi.acm.org/10.1145/2187980.2188150
477 """
478
479 def predict(u, v):
480 Cu = _community(G, u, community)
481 Cv = _community(G, v, community)
482 if Cu != Cv:
483 return 0
484 cnbors = nx.common_neighbors(G, u, v)
485 return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu)
486
487 return _apply_prediction(G, predict, ebunch)
488
489
490 @not_implemented_for("directed")
491 @not_implemented_for("multigraph")
492 def within_inter_cluster(G, ebunch=None, delta=0.001, community="community"):
493 """Compute the ratio of within- and inter-cluster common neighbors
494 of all node pairs in ebunch.
495
496 For two nodes `u` and `v`, if a common neighbor `w` belongs to the
497 same community as them, `w` is considered as within-cluster common
498 neighbor of `u` and `v`. Otherwise, it is considered as
499 inter-cluster common neighbor of `u` and `v`. The ratio between the
500 size of the set of within- and inter-cluster common neighbors is
501 defined as the WIC measure. [1]_
502
503 Parameters
504 ----------
505 G : graph
506 A NetworkX undirected graph.
507
508 ebunch : iterable of node pairs, optional (default = None)
509 The WIC measure will be computed for each pair of nodes given in
510 the iterable. The pairs must be given as 2-tuples (u, v) where
511 u and v are nodes in the graph. If ebunch is None then all
512 non-existent edges in the graph will be used.
513 Default value: None.
514
515 delta : float, optional (default = 0.001)
516 Value to prevent division by zero in case there is no
517 inter-cluster common neighbor between two nodes. See [1]_ for
518 details. Default value: 0.001.
519
520 community : string, optional (default = 'community')
521 Nodes attribute name containing the community information.
522 G[u][community] identifies which community u belongs to. Each
523 node belongs to at most one community. Default value: 'community'.
524
525 Returns
526 -------
527 piter : iterator
528 An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
529 pair of nodes and p is their WIC measure.
530
531 Examples
532 --------
533 >>> G = nx.Graph()
534 >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)])
535 >>> G.nodes[0]["community"] = 0
536 >>> G.nodes[1]["community"] = 1
537 >>> G.nodes[2]["community"] = 0
538 >>> G.nodes[3]["community"] = 0
539 >>> G.nodes[4]["community"] = 0
540 >>> preds = nx.within_inter_cluster(G, [(0, 4)])
541 >>> for u, v, p in preds:
542 ... print(f"({u}, {v}) -> {p:.8f}")
543 (0, 4) -> 1.99800200
544 >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5)
545 >>> for u, v, p in preds:
546 ... print(f"({u}, {v}) -> {p:.8f}")
547 (0, 4) -> 1.33333333
548
549 References
550 ----------
551 .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes.
552 Link prediction in complex networks based on cluster information.
553 In Proceedings of the 21st Brazilian conference on Advances in
554 Artificial Intelligence (SBIA'12)
555 https://doi.org/10.1007/978-3-642-34459-6_10
556 """
557 if delta <= 0:
558 raise nx.NetworkXAlgorithmError("Delta must be greater than zero")
559
560 def predict(u, v):
561 Cu = _community(G, u, community)
562 Cv = _community(G, v, community)
563 if Cu != Cv:
564 return 0
565 cnbors = set(nx.common_neighbors(G, u, v))
566 within = {w for w in cnbors if _community(G, w, community) == Cu}
567 inter = cnbors - within
568 return len(within) / (len(inter) + delta)
569
570 return _apply_prediction(G, predict, ebunch)
571
572
573 def _community(G, u, community):
574 """Get the community of the given node."""
575 node_u = G.nodes[u]
576 try:
577 return node_u[community]
578 except KeyError as e:
579 raise nx.NetworkXAlgorithmError("No community information") from e