diff env/lib/python3.9/site-packages/networkx/algorithms/centrality/current_flow_betweenness.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/networkx/algorithms/centrality/current_flow_betweenness.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,342 @@
+"""Current-flow betweenness centrality measures."""
+import networkx as nx
+from networkx.algorithms.centrality.flow_matrix import (
+    CGInverseLaplacian,
+    flow_matrix_row,
+    FullInverseLaplacian,
+    laplacian_sparse_matrix,
+    SuperLUInverseLaplacian,
+)
+from networkx.utils import (
+    not_implemented_for,
+    reverse_cuthill_mckee_ordering,
+    py_random_state,
+)
+
+__all__ = [
+    "current_flow_betweenness_centrality",
+    "approximate_current_flow_betweenness_centrality",
+    "edge_current_flow_betweenness_centrality",
+]
+
+
+@py_random_state(7)
+@not_implemented_for("directed")
+def approximate_current_flow_betweenness_centrality(
+    G,
+    normalized=True,
+    weight=None,
+    dtype=float,
+    solver="full",
+    epsilon=0.5,
+    kmax=10000,
+    seed=None,
+):
+    r"""Compute the approximate current-flow betweenness centrality for nodes.
+
+    Approximates the current-flow betweenness centrality within absolute
+    error of epsilon with high probability [1]_.
+
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph
+
+    normalized : bool, optional (default=True)
+      If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
+      n is the number of nodes in G.
+
+    weight : string or None, optional (default=None)
+      Key for edge data used as the edge weight.
+      If None, then use 1 as each edge weight.
+
+    dtype : data type (float)
+      Default data type for internal matrices.
+      Set to np.float32 for lower memory consumption.
+
+    solver : string (default='full')
+       Type of linear solver to use for computing the flow matrix.
+       Options are "full" (uses most memory), "lu" (recommended), and
+       "cg" (uses least memory).
+
+    epsilon: float
+        Absolute error tolerance.
+
+    kmax: int
+       Maximum number of sample node pairs to use for approximation.
+
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    Returns
+    -------
+    nodes : dictionary
+       Dictionary of nodes with betweenness centrality as the value.
+
+    See Also
+    --------
+    current_flow_betweenness_centrality
+
+    Notes
+    -----
+    The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$
+    and the space required is $O(m)$ for $n$ nodes and $m$ edges.
+
+    If the edges have a 'weight' attribute they will be used as
+    weights in this algorithm.  Unspecified weights are set to 1.
+
+    References
+    ----------
+    .. [1] Ulrik Brandes and Daniel Fleischer:
+       Centrality Measures Based on Current Flow.
+       Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
+       LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
+       http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
+    """
+    try:
+        import numpy as np
+    except ImportError as e:
+        raise ImportError(
+            "current_flow_betweenness_centrality requires NumPy " "http://numpy.org/"
+        ) from e
+    if not nx.is_connected(G):
+        raise nx.NetworkXError("Graph not connected.")
+    solvername = {
+        "full": FullInverseLaplacian,
+        "lu": SuperLUInverseLaplacian,
+        "cg": CGInverseLaplacian,
+    }
+    n = G.number_of_nodes()
+    ordering = list(reverse_cuthill_mckee_ordering(G))
+    # make a copy with integer labels according to rcm ordering
+    # this could be done without a copy if we really wanted to
+    H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
+    L = laplacian_sparse_matrix(
+        H, nodelist=range(n), weight=weight, dtype=dtype, format="csc"
+    )
+    C = solvername[solver](L, dtype=dtype)  # initialize solver
+    betweenness = dict.fromkeys(H, 0.0)
+    nb = (n - 1.0) * (n - 2.0)  # normalization factor
+    cstar = n * (n - 1) / nb
+    l = 1  # parameter in approximation, adjustable
+    k = l * int(np.ceil((cstar / epsilon) ** 2 * np.log(n)))
+    if k > kmax:
+        msg = f"Number random pairs k>kmax ({k}>{kmax}) "
+        raise nx.NetworkXError(msg, "Increase kmax or epsilon")
+    cstar2k = cstar / (2 * k)
+    for i in range(k):
+        s, t = seed.sample(range(n), 2)
+        b = np.zeros(n, dtype=dtype)
+        b[s] = 1
+        b[t] = -1
+        p = C.solve(b)
+        for v in H:
+            if v == s or v == t:
+                continue
+            for nbr in H[v]:
+                w = H[v][nbr].get(weight, 1.0)
+                betweenness[v] += w * np.abs(p[v] - p[nbr]) * cstar2k
+    if normalized:
+        factor = 1.0
+    else:
+        factor = nb / 2.0
+    # remap to original node names and "unnormalize" if required
+    return {ordering[k]: float(v * factor) for k, v in betweenness.items()}
+
+
+@not_implemented_for("directed")
+def current_flow_betweenness_centrality(
+    G, normalized=True, weight=None, dtype=float, solver="full"
+):
+    r"""Compute current-flow betweenness centrality for nodes.
+
+    Current-flow betweenness centrality uses an electrical current
+    model for information spreading in contrast to betweenness
+    centrality which uses shortest paths.
+
+    Current-flow betweenness centrality is also known as
+    random-walk betweenness centrality [2]_.
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph
+
+    normalized : bool, optional (default=True)
+      If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
+      n is the number of nodes in G.
+
+    weight : string or None, optional (default=None)
+      Key for edge data used as the edge weight.
+      If None, then use 1 as each edge weight.
+
+    dtype : data type (float)
+      Default data type for internal matrices.
+      Set to np.float32 for lower memory consumption.
+
+    solver : string (default='full')
+       Type of linear solver to use for computing the flow matrix.
+       Options are "full" (uses most memory), "lu" (recommended), and
+       "cg" (uses least memory).
+
+    Returns
+    -------
+    nodes : dictionary
+       Dictionary of nodes with betweenness centrality as the value.
+
+    See Also
+    --------
+    approximate_current_flow_betweenness_centrality
+    betweenness_centrality
+    edge_betweenness_centrality
+    edge_current_flow_betweenness_centrality
+
+    Notes
+    -----
+    Current-flow betweenness can be computed in  $O(I(n-1)+mn \log n)$
+    time [1]_, where $I(n-1)$ is the time needed to compute the
+    inverse Laplacian.  For a full matrix this is $O(n^3)$ but using
+    sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
+    Laplacian matrix condition number.
+
+    The space required is $O(nw)$ where $w$ is the width of the sparse
+    Laplacian matrix.  Worse case is $w=n$ for $O(n^2)$.
+
+    If the edges have a 'weight' attribute they will be used as
+    weights in this algorithm.  Unspecified weights are set to 1.
+
+    References
+    ----------
+    .. [1] Centrality Measures Based on Current Flow.
+       Ulrik Brandes and Daniel Fleischer,
+       Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
+       LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
+       http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
+
+    .. [2] A measure of betweenness centrality based on random walks,
+       M. E. J. Newman, Social Networks 27, 39-54 (2005).
+    """
+    if not nx.is_connected(G):
+        raise nx.NetworkXError("Graph not connected.")
+    n = G.number_of_nodes()
+    ordering = list(reverse_cuthill_mckee_ordering(G))
+    # make a copy with integer labels according to rcm ordering
+    # this could be done without a copy if we really wanted to
+    H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
+    betweenness = dict.fromkeys(H, 0.0)  # b[v]=0 for v in H
+    for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
+        pos = dict(zip(row.argsort()[::-1], range(n)))
+        for i in range(n):
+            betweenness[s] += (i - pos[i]) * row[i]
+            betweenness[t] += (n - i - 1 - pos[i]) * row[i]
+    if normalized:
+        nb = (n - 1.0) * (n - 2.0)  # normalization factor
+    else:
+        nb = 2.0
+    for v in H:
+        betweenness[v] = float((betweenness[v] - v) * 2.0 / nb)
+    return {ordering[k]: v for k, v in betweenness.items()}
+
+
+@not_implemented_for("directed")
+def edge_current_flow_betweenness_centrality(
+    G, normalized=True, weight=None, dtype=float, solver="full"
+):
+    r"""Compute current-flow betweenness centrality for edges.
+
+    Current-flow betweenness centrality uses an electrical current
+    model for information spreading in contrast to betweenness
+    centrality which uses shortest paths.
+
+    Current-flow betweenness centrality is also known as
+    random-walk betweenness centrality [2]_.
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph
+
+    normalized : bool, optional (default=True)
+      If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
+      n is the number of nodes in G.
+
+    weight : string or None, optional (default=None)
+      Key for edge data used as the edge weight.
+      If None, then use 1 as each edge weight.
+
+    dtype : data type (default=float)
+      Default data type for internal matrices.
+      Set to np.float32 for lower memory consumption.
+
+    solver : string (default='full')
+       Type of linear solver to use for computing the flow matrix.
+       Options are "full" (uses most memory), "lu" (recommended), and
+       "cg" (uses least memory).
+
+    Returns
+    -------
+    nodes : dictionary
+       Dictionary of edge tuples with betweenness centrality as the value.
+
+    Raises
+    ------
+    NetworkXError
+        The algorithm does not support DiGraphs.
+        If the input graph is an instance of DiGraph class, NetworkXError
+        is raised.
+
+    See Also
+    --------
+    betweenness_centrality
+    edge_betweenness_centrality
+    current_flow_betweenness_centrality
+
+    Notes
+    -----
+    Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
+    time [1]_, where $I(n-1)$ is the time needed to compute the
+    inverse Laplacian.  For a full matrix this is $O(n^3)$ but using
+    sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
+    Laplacian matrix condition number.
+
+    The space required is $O(nw)$ where $w$ is the width of the sparse
+    Laplacian matrix.  Worse case is $w=n$ for $O(n^2)$.
+
+    If the edges have a 'weight' attribute they will be used as
+    weights in this algorithm.  Unspecified weights are set to 1.
+
+    References
+    ----------
+    .. [1] Centrality Measures Based on Current Flow.
+       Ulrik Brandes and Daniel Fleischer,
+       Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
+       LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
+       http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
+
+    .. [2] A measure of betweenness centrality based on random walks,
+       M. E. J. Newman, Social Networks 27, 39-54 (2005).
+    """
+    from networkx.utils import reverse_cuthill_mckee_ordering
+
+    if not nx.is_connected(G):
+        raise nx.NetworkXError("Graph not connected.")
+    n = G.number_of_nodes()
+    ordering = list(reverse_cuthill_mckee_ordering(G))
+    # make a copy with integer labels according to rcm ordering
+    # this could be done without a copy if we really wanted to
+    H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
+    edges = (tuple(sorted((u, v))) for u, v in H.edges())
+    betweenness = dict.fromkeys(edges, 0.0)
+    if normalized:
+        nb = (n - 1.0) * (n - 2.0)  # normalization factor
+    else:
+        nb = 2.0
+    for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
+        pos = dict(zip(row.argsort()[::-1], range(1, n + 1)))
+        for i in range(n):
+            betweenness[e] += (i + 1 - pos[i]) * row[i]
+            betweenness[e] += (n - i - pos[i]) * row[i]
+        betweenness[e] /= nb
+    return {(ordering[s], ordering[t]): float(v) for (s, t), v in betweenness.items()}