Mercurial > repos > shellac > sam_consensus_v3
diff env/lib/python3.9/site-packages/networkx/algorithms/link_prediction.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/networkx/algorithms/link_prediction.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,579 @@ +""" +Link prediction algorithms. +""" + + +from math import log + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = [ + "resource_allocation_index", + "jaccard_coefficient", + "adamic_adar_index", + "preferential_attachment", + "cn_soundarajan_hopcroft", + "ra_index_soundarajan_hopcroft", + "within_inter_cluster", + "common_neighbor_centrality", +] + + +def _apply_prediction(G, func, ebunch=None): + """Applies the given function to each edge in the specified iterable + of edges. + + `G` is an instance of :class:`networkx.Graph`. + + `func` is a function on two inputs, each of which is a node in the + graph. The function can return anything, but it should return a + value representing a prediction of the likelihood of a "link" + joining the two nodes. + + `ebunch` is an iterable of pairs of nodes. If not specified, all + non-edges in the graph `G` will be used. + + """ + if ebunch is None: + ebunch = nx.non_edges(G) + return ((u, v, func(u, v)) for u, v in ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def resource_allocation_index(G, ebunch=None): + r"""Compute the resource allocation index of all node pairs in ebunch. + + Resource allocation index of `u` and `v` is defined as + + .. math:: + + \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|} + + where $\Gamma(u)$ denotes the set of neighbors of $u$. + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Resource allocation index will be computed for each pair of + nodes given in the iterable. The pairs must be given as + 2-tuples (u, v) where u and v are nodes in the graph. If ebunch + is None then all non-existent edges in the graph will be used. + Default value: None. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their resource allocation index. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 1) -> 0.75000000 + (2, 3) -> 0.75000000 + + References + ---------- + .. [1] T. Zhou, L. Lu, Y.-C. Zhang. + Predicting missing links via local information. + Eur. Phys. J. B 71 (2009) 623. + https://arxiv.org/pdf/0901.0553.pdf + """ + + def predict(u, v): + return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v)) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def jaccard_coefficient(G, ebunch=None): + r"""Compute the Jaccard coefficient of all node pairs in ebunch. + + Jaccard coefficient of nodes `u` and `v` is defined as + + .. math:: + + \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|} + + where $\Gamma(u)$ denotes the set of neighbors of $u$. + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Jaccard coefficient will be computed for each pair of nodes + given in the iterable. The pairs must be given as 2-tuples + (u, v) where u and v are nodes in the graph. If ebunch is None + then all non-existent edges in the graph will be used. + Default value: None. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their Jaccard coefficient. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 1) -> 0.60000000 + (2, 3) -> 0.60000000 + + References + ---------- + .. [1] D. Liben-Nowell, J. Kleinberg. + The Link Prediction Problem for Social Networks (2004). + http://www.cs.cornell.edu/home/kleinber/link-pred.pdf + """ + + def predict(u, v): + union_size = len(set(G[u]) | set(G[v])) + if union_size == 0: + return 0 + return len(list(nx.common_neighbors(G, u, v))) / union_size + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def adamic_adar_index(G, ebunch=None): + r"""Compute the Adamic-Adar index of all node pairs in ebunch. + + Adamic-Adar index of `u` and `v` is defined as + + .. math:: + + \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|} + + where $\Gamma(u)$ denotes the set of neighbors of $u$. + This index leads to zero-division for nodes only connected via self-loops. + It is intended to be used when no self-loops are present. + + Parameters + ---------- + G : graph + NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Adamic-Adar index will be computed for each pair of nodes given + in the iterable. The pairs must be given as 2-tuples (u, v) + where u and v are nodes in the graph. If ebunch is None then all + non-existent edges in the graph will be used. + Default value: None. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their Adamic-Adar index. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 1) -> 2.16404256 + (2, 3) -> 2.16404256 + + References + ---------- + .. [1] D. Liben-Nowell, J. Kleinberg. + The Link Prediction Problem for Social Networks (2004). + http://www.cs.cornell.edu/home/kleinber/link-pred.pdf + """ + + def predict(u, v): + return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v)) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def common_neighbor_centrality(G, ebunch=None, alpha=0.8): + r"""Return the CCPA score for each pair of nodes. + + Compute the Common Neighbor and Centrality based Parameterized Algorithm(CCPA) + score of all node pairs in ebunch. + + CCPA score of `u` and `v` is defined as + + .. math:: + + \alpha \cdot (|\Gamma (u){\cap }^{}\Gamma (v)|)+(1-\alpha )\cdot \frac{N}{{d}_{uv}} + + where $\Gamma(u)$ denotes the set of neighbors of $u$, $\Gamma(v)$ denotes the + set of neighbors of $v$, $\alpha$ is parameter varies between [0,1], $N$ denotes + total number of nodes in the Graph and ${d}_{uv}$ denotes shortest distance + between $u$ and $v$. + + This algorithm is based on two vital properties of nodes, namely the number + of common neighbors and their centrality. Common neighbor refers to the common + nodes between two nodes. Centrality refers to the prestige that a node enjoys + in a network. + + .. seealso:: + + :func:`common_neighbors` + + Parameters + ---------- + G : graph + NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Preferential attachment score will be computed for each pair of + nodes given in the iterable. The pairs must be given as + 2-tuples (u, v) where u and v are nodes in the graph. If ebunch + is None then all non-existent edges in the graph will be used. + Default value: None. + + alpha : Parameter defined for participation of Common Neighbor + and Centrality Algorithm share. Default value set to 0.8 + because author found better performance at 0.8 for all the + dataset. + Default value: 0.8 + + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their Common Neighbor and Centrality based + Parameterized Algorithm(CCPA) score. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.common_neighbor_centrality(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p}") + (0, 1) -> 3.4000000000000004 + (2, 3) -> 3.4000000000000004 + + References + ---------- + .. [1] Ahmad, I., Akhtar, M.U., Noor, S. et al. + Missing Link Prediction using Common Neighbor and Centrality based Parameterized Algorithm. + Sci Rep 10, 364 (2020). + https://doi.org/10.1038/s41598-019-57304-y + """ + shortest_path = nx.shortest_path(G) + + def predict(u, v): + return alpha * len(list(nx.common_neighbors(G, u, v))) + (1 - alpha) * ( + G.number_of_nodes() / (len(shortest_path[u][v]) - 1) + ) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def preferential_attachment(G, ebunch=None): + r"""Compute the preferential attachment score of all node pairs in ebunch. + + Preferential attachment score of `u` and `v` is defined as + + .. math:: + + |\Gamma(u)| |\Gamma(v)| + + where $\Gamma(u)$ denotes the set of neighbors of $u$. + + Parameters + ---------- + G : graph + NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + Preferential attachment score will be computed for each pair of + nodes given in the iterable. The pairs must be given as + 2-tuples (u, v) where u and v are nodes in the graph. If ebunch + is None then all non-existent edges in the graph will be used. + Default value: None. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their preferential attachment score. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p}") + (0, 1) -> 16 + (2, 3) -> 16 + + References + ---------- + .. [1] D. Liben-Nowell, J. Kleinberg. + The Link Prediction Problem for Social Networks (2004). + http://www.cs.cornell.edu/home/kleinber/link-pred.pdf + """ + + def predict(u, v): + return G.degree(u) * G.degree(v) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def cn_soundarajan_hopcroft(G, ebunch=None, community="community"): + r"""Count the number of common neighbors of all node pairs in ebunch + using community information. + + For two nodes $u$ and $v$, this function computes the number of + common neighbors and bonus one for each common neighbor belonging to + the same community as $u$ and $v$. Mathematically, + + .. math:: + + |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w) + + where $f(w)$ equals 1 if $w$ belongs to the same community as $u$ + and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of + neighbors of $u$. + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + The score will be computed for each pair of nodes given in the + iterable. The pairs must be given as 2-tuples (u, v) where u + and v are nodes in the graph. If ebunch is None then all + non-existent edges in the graph will be used. + Default value: None. + + community : string, optional (default = 'community') + Nodes attribute name containing the community information. + G[u][community] identifies which community u belongs to. Each + node belongs to at most one community. Default value: 'community'. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their score. + + Examples + -------- + >>> G = nx.path_graph(3) + >>> G.nodes[0]["community"] = 0 + >>> G.nodes[1]["community"] = 0 + >>> G.nodes[2]["community"] = 0 + >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p}") + (0, 2) -> 2 + + References + ---------- + .. [1] Sucheta Soundarajan and John Hopcroft. + Using community information to improve the precision of link + prediction methods. + In Proceedings of the 21st international conference companion on + World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608. + http://doi.acm.org/10.1145/2187980.2188150 + """ + + def predict(u, v): + Cu = _community(G, u, community) + Cv = _community(G, v, community) + cnbors = list(nx.common_neighbors(G, u, v)) + neighbors = ( + sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0 + ) + return len(cnbors) + neighbors + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def ra_index_soundarajan_hopcroft(G, ebunch=None, community="community"): + r"""Compute the resource allocation index of all node pairs in + ebunch using community information. + + For two nodes $u$ and $v$, this function computes the resource + allocation index considering only common neighbors belonging to the + same community as $u$ and $v$. Mathematically, + + .. math:: + + \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|} + + where $f(w)$ equals 1 if $w$ belongs to the same community as $u$ + and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of + neighbors of $u$. + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + The score will be computed for each pair of nodes given in the + iterable. The pairs must be given as 2-tuples (u, v) where u + and v are nodes in the graph. If ebunch is None then all + non-existent edges in the graph will be used. + Default value: None. + + community : string, optional (default = 'community') + Nodes attribute name containing the community information. + G[u][community] identifies which community u belongs to. Each + node belongs to at most one community. Default value: 'community'. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their score. + + Examples + -------- + >>> G = nx.Graph() + >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)]) + >>> G.nodes[0]["community"] = 0 + >>> G.nodes[1]["community"] = 0 + >>> G.nodes[2]["community"] = 1 + >>> G.nodes[3]["community"] = 0 + >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 3) -> 0.50000000 + + References + ---------- + .. [1] Sucheta Soundarajan and John Hopcroft. + Using community information to improve the precision of link + prediction methods. + In Proceedings of the 21st international conference companion on + World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608. + http://doi.acm.org/10.1145/2187980.2188150 + """ + + def predict(u, v): + Cu = _community(G, u, community) + Cv = _community(G, v, community) + if Cu != Cv: + return 0 + cnbors = nx.common_neighbors(G, u, v) + return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu) + + return _apply_prediction(G, predict, ebunch) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def within_inter_cluster(G, ebunch=None, delta=0.001, community="community"): + """Compute the ratio of within- and inter-cluster common neighbors + of all node pairs in ebunch. + + For two nodes `u` and `v`, if a common neighbor `w` belongs to the + same community as them, `w` is considered as within-cluster common + neighbor of `u` and `v`. Otherwise, it is considered as + inter-cluster common neighbor of `u` and `v`. The ratio between the + size of the set of within- and inter-cluster common neighbors is + defined as the WIC measure. [1]_ + + Parameters + ---------- + G : graph + A NetworkX undirected graph. + + ebunch : iterable of node pairs, optional (default = None) + The WIC measure will be computed for each pair of nodes given in + the iterable. The pairs must be given as 2-tuples (u, v) where + u and v are nodes in the graph. If ebunch is None then all + non-existent edges in the graph will be used. + Default value: None. + + delta : float, optional (default = 0.001) + Value to prevent division by zero in case there is no + inter-cluster common neighbor between two nodes. See [1]_ for + details. Default value: 0.001. + + community : string, optional (default = 'community') + Nodes attribute name containing the community information. + G[u][community] identifies which community u belongs to. Each + node belongs to at most one community. Default value: 'community'. + + Returns + ------- + piter : iterator + An iterator of 3-tuples in the form (u, v, p) where (u, v) is a + pair of nodes and p is their WIC measure. + + Examples + -------- + >>> G = nx.Graph() + >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)]) + >>> G.nodes[0]["community"] = 0 + >>> G.nodes[1]["community"] = 1 + >>> G.nodes[2]["community"] = 0 + >>> G.nodes[3]["community"] = 0 + >>> G.nodes[4]["community"] = 0 + >>> preds = nx.within_inter_cluster(G, [(0, 4)]) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 4) -> 1.99800200 + >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5) + >>> for u, v, p in preds: + ... print(f"({u}, {v}) -> {p:.8f}") + (0, 4) -> 1.33333333 + + References + ---------- + .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes. + Link prediction in complex networks based on cluster information. + In Proceedings of the 21st Brazilian conference on Advances in + Artificial Intelligence (SBIA'12) + https://doi.org/10.1007/978-3-642-34459-6_10 + """ + if delta <= 0: + raise nx.NetworkXAlgorithmError("Delta must be greater than zero") + + def predict(u, v): + Cu = _community(G, u, community) + Cv = _community(G, v, community) + if Cu != Cv: + return 0 + cnbors = set(nx.common_neighbors(G, u, v)) + within = {w for w in cnbors if _community(G, w, community) == Cu} + inter = cnbors - within + return len(within) / (len(inter) + delta) + + return _apply_prediction(G, predict, ebunch) + + +def _community(G, u, community): + """Get the community of the given node.""" + node_u = G.nodes[u] + try: + return node_u[community] + except KeyError as e: + raise nx.NetworkXAlgorithmError("No community information") from e