diff env/lib/python3.9/site-packages/networkx/algorithms/link_prediction.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/networkx/algorithms/link_prediction.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,579 @@
+"""
+Link prediction algorithms.
+"""
+
+
+from math import log
+
+import networkx as nx
+from networkx.utils import not_implemented_for
+
+__all__ = [
+    "resource_allocation_index",
+    "jaccard_coefficient",
+    "adamic_adar_index",
+    "preferential_attachment",
+    "cn_soundarajan_hopcroft",
+    "ra_index_soundarajan_hopcroft",
+    "within_inter_cluster",
+    "common_neighbor_centrality",
+]
+
+
+def _apply_prediction(G, func, ebunch=None):
+    """Applies the given function to each edge in the specified iterable
+    of edges.
+
+    `G` is an instance of :class:`networkx.Graph`.
+
+    `func` is a function on two inputs, each of which is a node in the
+    graph. The function can return anything, but it should return a
+    value representing a prediction of the likelihood of a "link"
+    joining the two nodes.
+
+    `ebunch` is an iterable of pairs of nodes. If not specified, all
+    non-edges in the graph `G` will be used.
+
+    """
+    if ebunch is None:
+        ebunch = nx.non_edges(G)
+    return ((u, v, func(u, v)) for u, v in ebunch)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+def resource_allocation_index(G, ebunch=None):
+    r"""Compute the resource allocation index of all node pairs in ebunch.
+
+    Resource allocation index of `u` and `v` is defined as
+
+    .. math::
+
+        \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|}
+
+    where $\Gamma(u)$ denotes the set of neighbors of $u$.
+
+    Parameters
+    ----------
+    G : graph
+        A NetworkX undirected graph.
+
+    ebunch : iterable of node pairs, optional (default = None)
+        Resource allocation index will be computed for each pair of
+        nodes given in the iterable. The pairs must be given as
+        2-tuples (u, v) where u and v are nodes in the graph. If ebunch
+        is None then all non-existent edges in the graph will be used.
+        Default value: None.
+
+    Returns
+    -------
+    piter : iterator
+        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
+        pair of nodes and p is their resource allocation index.
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)])
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p:.8f}")
+    (0, 1) -> 0.75000000
+    (2, 3) -> 0.75000000
+
+    References
+    ----------
+    .. [1] T. Zhou, L. Lu, Y.-C. Zhang.
+       Predicting missing links via local information.
+       Eur. Phys. J. B 71 (2009) 623.
+       https://arxiv.org/pdf/0901.0553.pdf
+    """
+
+    def predict(u, v):
+        return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v))
+
+    return _apply_prediction(G, predict, ebunch)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+def jaccard_coefficient(G, ebunch=None):
+    r"""Compute the Jaccard coefficient of all node pairs in ebunch.
+
+    Jaccard coefficient of nodes `u` and `v` is defined as
+
+    .. math::
+
+        \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|}
+
+    where $\Gamma(u)$ denotes the set of neighbors of $u$.
+
+    Parameters
+    ----------
+    G : graph
+        A NetworkX undirected graph.
+
+    ebunch : iterable of node pairs, optional (default = None)
+        Jaccard coefficient will be computed for each pair of nodes
+        given in the iterable. The pairs must be given as 2-tuples
+        (u, v) where u and v are nodes in the graph. If ebunch is None
+        then all non-existent edges in the graph will be used.
+        Default value: None.
+
+    Returns
+    -------
+    piter : iterator
+        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
+        pair of nodes and p is their Jaccard coefficient.
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)])
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p:.8f}")
+    (0, 1) -> 0.60000000
+    (2, 3) -> 0.60000000
+
+    References
+    ----------
+    .. [1] D. Liben-Nowell, J. Kleinberg.
+           The Link Prediction Problem for Social Networks (2004).
+           http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
+    """
+
+    def predict(u, v):
+        union_size = len(set(G[u]) | set(G[v]))
+        if union_size == 0:
+            return 0
+        return len(list(nx.common_neighbors(G, u, v))) / union_size
+
+    return _apply_prediction(G, predict, ebunch)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+def adamic_adar_index(G, ebunch=None):
+    r"""Compute the Adamic-Adar index of all node pairs in ebunch.
+
+    Adamic-Adar index of `u` and `v` is defined as
+
+    .. math::
+
+        \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|}
+
+    where $\Gamma(u)$ denotes the set of neighbors of $u$.
+    This index leads to zero-division for nodes only connected via self-loops.
+    It is intended to be used when no self-loops are present.
+
+    Parameters
+    ----------
+    G : graph
+        NetworkX undirected graph.
+
+    ebunch : iterable of node pairs, optional (default = None)
+        Adamic-Adar index will be computed for each pair of nodes given
+        in the iterable. The pairs must be given as 2-tuples (u, v)
+        where u and v are nodes in the graph. If ebunch is None then all
+        non-existent edges in the graph will be used.
+        Default value: None.
+
+    Returns
+    -------
+    piter : iterator
+        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
+        pair of nodes and p is their Adamic-Adar index.
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)])
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p:.8f}")
+    (0, 1) -> 2.16404256
+    (2, 3) -> 2.16404256
+
+    References
+    ----------
+    .. [1] D. Liben-Nowell, J. Kleinberg.
+           The Link Prediction Problem for Social Networks (2004).
+           http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
+    """
+
+    def predict(u, v):
+        return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v))
+
+    return _apply_prediction(G, predict, ebunch)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+def common_neighbor_centrality(G, ebunch=None, alpha=0.8):
+    r"""Return the CCPA score for each pair of nodes.
+    
+    Compute the Common Neighbor and Centrality based Parameterized Algorithm(CCPA)
+    score of all node pairs in ebunch.
+
+    CCPA score of `u` and `v` is defined as
+
+    .. math::
+
+        \alpha \cdot (|\Gamma (u){\cap }^{}\Gamma (v)|)+(1-\alpha )\cdot \frac{N}{{d}_{uv}}
+
+    where $\Gamma(u)$ denotes the set of neighbors of $u$, $\Gamma(v)$ denotes the
+    set of neighbors of $v$, $\alpha$ is  parameter varies between [0,1], $N$ denotes
+    total number of nodes in the Graph and ${d}_{uv}$ denotes shortest distance
+    between $u$ and $v$.
+
+    This algorithm is based on two vital properties of nodes, namely the number
+    of common neighbors and their centrality. Common neighbor refers to the common
+    nodes between two nodes. Centrality refers to the prestige that a node enjoys
+    in a network.
+
+    .. seealso::
+
+        :func:`common_neighbors`
+
+    Parameters
+    ----------
+    G : graph
+        NetworkX undirected graph.
+
+    ebunch : iterable of node pairs, optional (default = None)
+        Preferential attachment score will be computed for each pair of
+        nodes given in the iterable. The pairs must be given as
+        2-tuples (u, v) where u and v are nodes in the graph. If ebunch
+        is None then all non-existent edges in the graph will be used.
+        Default value: None.
+    
+    alpha : Parameter defined for participation of Common Neighbor 
+            and Centrality Algorithm share. Default value set to 0.8
+            because author found better performance at 0.8 for all the 
+            dataset.
+            Default value: 0.8
+
+
+    Returns
+    -------
+    piter : iterator
+        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
+        pair of nodes and p is their Common Neighbor and Centrality based 
+        Parameterized Algorithm(CCPA) score.
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> preds = nx.common_neighbor_centrality(G, [(0, 1), (2, 3)])
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p}")
+    (0, 1) -> 3.4000000000000004
+    (2, 3) -> 3.4000000000000004
+
+    References
+    ----------
+    .. [1] Ahmad, I., Akhtar, M.U., Noor, S. et al. 
+           Missing Link Prediction using Common Neighbor and Centrality based Parameterized Algorithm. 
+           Sci Rep 10, 364 (2020). 
+           https://doi.org/10.1038/s41598-019-57304-y
+    """
+    shortest_path = nx.shortest_path(G)
+
+    def predict(u, v):
+        return alpha * len(list(nx.common_neighbors(G, u, v))) + (1 - alpha) * (
+            G.number_of_nodes() / (len(shortest_path[u][v]) - 1)
+        )
+
+    return _apply_prediction(G, predict, ebunch)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+def preferential_attachment(G, ebunch=None):
+    r"""Compute the preferential attachment score of all node pairs in ebunch.
+
+    Preferential attachment score of `u` and `v` is defined as
+
+    .. math::
+
+        |\Gamma(u)| |\Gamma(v)|
+
+    where $\Gamma(u)$ denotes the set of neighbors of $u$.
+
+    Parameters
+    ----------
+    G : graph
+        NetworkX undirected graph.
+
+    ebunch : iterable of node pairs, optional (default = None)
+        Preferential attachment score will be computed for each pair of
+        nodes given in the iterable. The pairs must be given as
+        2-tuples (u, v) where u and v are nodes in the graph. If ebunch
+        is None then all non-existent edges in the graph will be used.
+        Default value: None.
+
+    Returns
+    -------
+    piter : iterator
+        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
+        pair of nodes and p is their preferential attachment score.
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)])
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p}")
+    (0, 1) -> 16
+    (2, 3) -> 16
+
+    References
+    ----------
+    .. [1] D. Liben-Nowell, J. Kleinberg.
+           The Link Prediction Problem for Social Networks (2004).
+           http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
+    """
+
+    def predict(u, v):
+        return G.degree(u) * G.degree(v)
+
+    return _apply_prediction(G, predict, ebunch)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+def cn_soundarajan_hopcroft(G, ebunch=None, community="community"):
+    r"""Count the number of common neighbors of all node pairs in ebunch
+        using community information.
+
+    For two nodes $u$ and $v$, this function computes the number of
+    common neighbors and bonus one for each common neighbor belonging to
+    the same community as $u$ and $v$. Mathematically,
+
+    .. math::
+
+        |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w)
+
+    where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
+    and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
+    neighbors of $u$.
+
+    Parameters
+    ----------
+    G : graph
+        A NetworkX undirected graph.
+
+    ebunch : iterable of node pairs, optional (default = None)
+        The score will be computed for each pair of nodes given in the
+        iterable. The pairs must be given as 2-tuples (u, v) where u
+        and v are nodes in the graph. If ebunch is None then all
+        non-existent edges in the graph will be used.
+        Default value: None.
+
+    community : string, optional (default = 'community')
+        Nodes attribute name containing the community information.
+        G[u][community] identifies which community u belongs to. Each
+        node belongs to at most one community. Default value: 'community'.
+
+    Returns
+    -------
+    piter : iterator
+        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
+        pair of nodes and p is their score.
+
+    Examples
+    --------
+    >>> G = nx.path_graph(3)
+    >>> G.nodes[0]["community"] = 0
+    >>> G.nodes[1]["community"] = 0
+    >>> G.nodes[2]["community"] = 0
+    >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)])
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p}")
+    (0, 2) -> 2
+
+    References
+    ----------
+    .. [1] Sucheta Soundarajan and John Hopcroft.
+       Using community information to improve the precision of link
+       prediction methods.
+       In Proceedings of the 21st international conference companion on
+       World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
+       http://doi.acm.org/10.1145/2187980.2188150
+    """
+
+    def predict(u, v):
+        Cu = _community(G, u, community)
+        Cv = _community(G, v, community)
+        cnbors = list(nx.common_neighbors(G, u, v))
+        neighbors = (
+            sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0
+        )
+        return len(cnbors) + neighbors
+
+    return _apply_prediction(G, predict, ebunch)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+def ra_index_soundarajan_hopcroft(G, ebunch=None, community="community"):
+    r"""Compute the resource allocation index of all node pairs in
+    ebunch using community information.
+
+    For two nodes $u$ and $v$, this function computes the resource
+    allocation index considering only common neighbors belonging to the
+    same community as $u$ and $v$. Mathematically,
+
+    .. math::
+
+        \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|}
+
+    where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
+    and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
+    neighbors of $u$.
+
+    Parameters
+    ----------
+    G : graph
+        A NetworkX undirected graph.
+
+    ebunch : iterable of node pairs, optional (default = None)
+        The score will be computed for each pair of nodes given in the
+        iterable. The pairs must be given as 2-tuples (u, v) where u
+        and v are nodes in the graph. If ebunch is None then all
+        non-existent edges in the graph will be used.
+        Default value: None.
+
+    community : string, optional (default = 'community')
+        Nodes attribute name containing the community information.
+        G[u][community] identifies which community u belongs to. Each
+        node belongs to at most one community. Default value: 'community'.
+
+    Returns
+    -------
+    piter : iterator
+        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
+        pair of nodes and p is their score.
+
+    Examples
+    --------
+    >>> G = nx.Graph()
+    >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)])
+    >>> G.nodes[0]["community"] = 0
+    >>> G.nodes[1]["community"] = 0
+    >>> G.nodes[2]["community"] = 1
+    >>> G.nodes[3]["community"] = 0
+    >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)])
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p:.8f}")
+    (0, 3) -> 0.50000000
+
+    References
+    ----------
+    .. [1] Sucheta Soundarajan and John Hopcroft.
+       Using community information to improve the precision of link
+       prediction methods.
+       In Proceedings of the 21st international conference companion on
+       World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
+       http://doi.acm.org/10.1145/2187980.2188150
+    """
+
+    def predict(u, v):
+        Cu = _community(G, u, community)
+        Cv = _community(G, v, community)
+        if Cu != Cv:
+            return 0
+        cnbors = nx.common_neighbors(G, u, v)
+        return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu)
+
+    return _apply_prediction(G, predict, ebunch)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+def within_inter_cluster(G, ebunch=None, delta=0.001, community="community"):
+    """Compute the ratio of within- and inter-cluster common neighbors
+    of all node pairs in ebunch.
+
+    For two nodes `u` and `v`, if a common neighbor `w` belongs to the
+    same community as them, `w` is considered as within-cluster common
+    neighbor of `u` and `v`. Otherwise, it is considered as
+    inter-cluster common neighbor of `u` and `v`. The ratio between the
+    size of the set of within- and inter-cluster common neighbors is
+    defined as the WIC measure. [1]_
+
+    Parameters
+    ----------
+    G : graph
+        A NetworkX undirected graph.
+
+    ebunch : iterable of node pairs, optional (default = None)
+        The WIC measure will be computed for each pair of nodes given in
+        the iterable. The pairs must be given as 2-tuples (u, v) where
+        u and v are nodes in the graph. If ebunch is None then all
+        non-existent edges in the graph will be used.
+        Default value: None.
+
+    delta : float, optional (default = 0.001)
+        Value to prevent division by zero in case there is no
+        inter-cluster common neighbor between two nodes. See [1]_ for
+        details. Default value: 0.001.
+
+    community : string, optional (default = 'community')
+        Nodes attribute name containing the community information.
+        G[u][community] identifies which community u belongs to. Each
+        node belongs to at most one community. Default value: 'community'.
+
+    Returns
+    -------
+    piter : iterator
+        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
+        pair of nodes and p is their WIC measure.
+
+    Examples
+    --------
+    >>> G = nx.Graph()
+    >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)])
+    >>> G.nodes[0]["community"] = 0
+    >>> G.nodes[1]["community"] = 1
+    >>> G.nodes[2]["community"] = 0
+    >>> G.nodes[3]["community"] = 0
+    >>> G.nodes[4]["community"] = 0
+    >>> preds = nx.within_inter_cluster(G, [(0, 4)])
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p:.8f}")
+    (0, 4) -> 1.99800200
+    >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5)
+    >>> for u, v, p in preds:
+    ...     print(f"({u}, {v}) -> {p:.8f}")
+    (0, 4) -> 1.33333333
+
+    References
+    ----------
+    .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes.
+       Link prediction in complex networks based on cluster information.
+       In Proceedings of the 21st Brazilian conference on Advances in
+       Artificial Intelligence (SBIA'12)
+       https://doi.org/10.1007/978-3-642-34459-6_10
+    """
+    if delta <= 0:
+        raise nx.NetworkXAlgorithmError("Delta must be greater than zero")
+
+    def predict(u, v):
+        Cu = _community(G, u, community)
+        Cv = _community(G, v, community)
+        if Cu != Cv:
+            return 0
+        cnbors = set(nx.common_neighbors(G, u, v))
+        within = {w for w in cnbors if _community(G, w, community) == Cu}
+        inter = cnbors - within
+        return len(within) / (len(inter) + delta)
+
+    return _apply_prediction(G, predict, ebunch)
+
+
+def _community(G, u, community):
+    """Get the community of the given node."""
+    node_u = G.nodes[u]
+    try:
+        return node_u[community]
+    except KeyError as e:
+        raise nx.NetworkXAlgorithmError("No community information") from e