diff env/lib/python3.9/site-packages/networkx/algorithms/cluster.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/networkx/algorithms/cluster.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,544 @@
+"""Algorithms to characterize the number of triangles in a graph."""
+
+from itertools import chain
+from itertools import combinations
+from collections import Counter
+
+from networkx.utils import not_implemented_for
+
+__all__ = [
+    "triangles",
+    "average_clustering",
+    "clustering",
+    "transitivity",
+    "square_clustering",
+    "generalized_degree",
+]
+
+
+@not_implemented_for("directed")
+def triangles(G, nodes=None):
+    """Compute the number of triangles.
+
+    Finds the number of triangles that include a node as one vertex.
+
+    Parameters
+    ----------
+    G : graph
+       A networkx graph
+    nodes : container of nodes, optional (default= all nodes in G)
+       Compute triangles for nodes in this container.
+
+    Returns
+    -------
+    out : dictionary
+       Number of triangles keyed by node label.
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> print(nx.triangles(G, 0))
+    6
+    >>> print(nx.triangles(G))
+    {0: 6, 1: 6, 2: 6, 3: 6, 4: 6}
+    >>> print(list(nx.triangles(G, (0, 1)).values()))
+    [6, 6]
+
+    Notes
+    -----
+    When computing triangles for the entire graph each triangle is counted
+    three times, once at each node.  Self loops are ignored.
+
+    """
+    # If `nodes` represents a single node in the graph, return only its number
+    # of triangles.
+    if nodes in G:
+        return next(_triangles_and_degree_iter(G, nodes))[2] // 2
+    # Otherwise, `nodes` represents an iterable of nodes, so return a
+    # dictionary mapping node to number of triangles.
+    return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)}
+
+
+@not_implemented_for("multigraph")
+def _triangles_and_degree_iter(G, nodes=None):
+    """ Return an iterator of (node, degree, triangles, generalized degree).
+
+    This double counts triangles so you may want to divide by 2.
+    See degree(), triangles() and generalized_degree() for definitions
+    and details.
+
+    """
+    if nodes is None:
+        nodes_nbrs = G.adj.items()
+    else:
+        nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
+
+    for v, v_nbrs in nodes_nbrs:
+        vs = set(v_nbrs) - {v}
+        gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
+        ntriangles = sum(k * val for k, val in gen_degree.items())
+        yield (v, len(vs), ntriangles, gen_degree)
+
+
+@not_implemented_for("multigraph")
+def _weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"):
+    """ Return an iterator of (node, degree, weighted_triangles).
+
+    Used for weighted clustering.
+
+    """
+    if weight is None or G.number_of_edges() == 0:
+        max_weight = 1
+    else:
+        max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
+    if nodes is None:
+        nodes_nbrs = G.adj.items()
+    else:
+        nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
+
+    def wt(u, v):
+        return G[u][v].get(weight, 1) / max_weight
+
+    for i, nbrs in nodes_nbrs:
+        inbrs = set(nbrs) - {i}
+        weighted_triangles = 0
+        seen = set()
+        for j in inbrs:
+            seen.add(j)
+            # This prevents double counting.
+            jnbrs = set(G[j]) - seen
+            # Only compute the edge weight once, before the inner inner
+            # loop.
+            wij = wt(i, j)
+            weighted_triangles += sum(
+                (wij * wt(j, k) * wt(k, i)) ** (1 / 3) for k in inbrs & jnbrs
+            )
+        yield (i, len(inbrs), 2 * weighted_triangles)
+
+
+@not_implemented_for("multigraph")
+def _directed_triangles_and_degree_iter(G, nodes=None):
+    """ Return an iterator of
+    (node, total_degree, reciprocal_degree, directed_triangles).
+
+    Used for directed clustering.
+
+    """
+    nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
+
+    for i, preds, succs in nodes_nbrs:
+        ipreds = set(preds) - {i}
+        isuccs = set(succs) - {i}
+
+        directed_triangles = 0
+        for j in chain(ipreds, isuccs):
+            jpreds = set(G._pred[j]) - {j}
+            jsuccs = set(G._succ[j]) - {j}
+            directed_triangles += sum(
+                1
+                for k in chain(
+                    (ipreds & jpreds),
+                    (ipreds & jsuccs),
+                    (isuccs & jpreds),
+                    (isuccs & jsuccs),
+                )
+            )
+        dtotal = len(ipreds) + len(isuccs)
+        dbidirectional = len(ipreds & isuccs)
+        yield (i, dtotal, dbidirectional, directed_triangles)
+
+
+@not_implemented_for("multigraph")
+def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"):
+    """ Return an iterator of
+    (node, total_degree, reciprocal_degree, directed_weighted_triangles).
+
+    Used for directed weighted clustering.
+
+    """
+    if weight is None or G.number_of_edges() == 0:
+        max_weight = 1
+    else:
+        max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
+
+    nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
+
+    def wt(u, v):
+        return G[u][v].get(weight, 1) / max_weight
+
+    for i, preds, succs in nodes_nbrs:
+        ipreds = set(preds) - {i}
+        isuccs = set(succs) - {i}
+
+        directed_triangles = 0
+        for j in ipreds:
+            jpreds = set(G._pred[j]) - {j}
+            jsuccs = set(G._succ[j]) - {j}
+            directed_triangles += sum(
+                (wt(j, i) * wt(k, i) * wt(k, j)) ** (1 / 3) for k in ipreds & jpreds
+            )
+            directed_triangles += sum(
+                (wt(j, i) * wt(k, i) * wt(j, k)) ** (1 / 3) for k in ipreds & jsuccs
+            )
+            directed_triangles += sum(
+                (wt(j, i) * wt(i, k) * wt(k, j)) ** (1 / 3) for k in isuccs & jpreds
+            )
+            directed_triangles += sum(
+                (wt(j, i) * wt(i, k) * wt(j, k)) ** (1 / 3) for k in isuccs & jsuccs
+            )
+
+        for j in isuccs:
+            jpreds = set(G._pred[j]) - {j}
+            jsuccs = set(G._succ[j]) - {j}
+            directed_triangles += sum(
+                (wt(i, j) * wt(k, i) * wt(k, j)) ** (1 / 3) for k in ipreds & jpreds
+            )
+            directed_triangles += sum(
+                (wt(i, j) * wt(k, i) * wt(j, k)) ** (1 / 3) for k in ipreds & jsuccs
+            )
+            directed_triangles += sum(
+                (wt(i, j) * wt(i, k) * wt(k, j)) ** (1 / 3) for k in isuccs & jpreds
+            )
+            directed_triangles += sum(
+                (wt(i, j) * wt(i, k) * wt(j, k)) ** (1 / 3) for k in isuccs & jsuccs
+            )
+
+        dtotal = len(ipreds) + len(isuccs)
+        dbidirectional = len(ipreds & isuccs)
+        yield (i, dtotal, dbidirectional, directed_triangles)
+
+
+def average_clustering(G, nodes=None, weight=None, count_zeros=True):
+    r"""Compute the average clustering coefficient for the graph G.
+
+    The clustering coefficient for the graph is the average,
+
+    .. math::
+
+       C = \frac{1}{n}\sum_{v \in G} c_v,
+
+    where :math:`n` is the number of nodes in `G`.
+
+    Parameters
+    ----------
+    G : graph
+
+    nodes : container of nodes, optional (default=all nodes in G)
+       Compute average clustering for nodes in this container.
+
+    weight : string or None, optional (default=None)
+       The edge attribute that holds the numerical value used as a weight.
+       If None, then each edge has weight 1.
+
+    count_zeros : bool
+       If False include only the nodes with nonzero clustering in the average.
+
+    Returns
+    -------
+    avg : float
+       Average clustering
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> print(nx.average_clustering(G))
+    1.0
+
+    Notes
+    -----
+    This is a space saving routine; it might be faster
+    to use the clustering function to get a list and then take the average.
+
+    Self loops are ignored.
+
+    References
+    ----------
+    .. [1] Generalizations of the clustering coefficient to weighted
+       complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
+       K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
+       http://jponnela.com/web_documents/a9.pdf
+    .. [2] Marcus Kaiser,  Mean clustering coefficients: the role of isolated
+       nodes and leafs on clustering measures for small-world networks.
+       https://arxiv.org/abs/0802.2512
+    """
+    c = clustering(G, nodes, weight=weight).values()
+    if not count_zeros:
+        c = [v for v in c if v > 0]
+    return sum(c) / len(c)
+
+
+def clustering(G, nodes=None, weight=None):
+    r"""Compute the clustering coefficient for nodes.
+
+    For unweighted graphs, the clustering of a node :math:`u`
+    is the fraction of possible triangles through that node that exist,
+
+    .. math::
+
+      c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},
+
+    where :math:`T(u)` is the number of triangles through node :math:`u` and
+    :math:`deg(u)` is the degree of :math:`u`.
+
+    For weighted graphs, there are several ways to define clustering [1]_.
+    the one used here is defined
+    as the geometric average of the subgraph edge weights [2]_,
+
+    .. math::
+
+       c_u = \frac{1}{deg(u)(deg(u)-1))}
+             \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.
+
+    The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight
+    in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`.
+
+    The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`.
+
+    For directed graphs, the clustering is similarly defined as the fraction
+    of all possible directed triangles or geometric average of the subgraph
+    edge weights for unweighted and weighted directed graph respectively [3]_.
+
+    .. math::
+
+       c_u = \frac{1}{deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u)}
+             T(u),
+
+    where :math:`T(u)` is the number of directed triangles through node
+    :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of
+    :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of
+    :math:`u`.
+
+    Parameters
+    ----------
+    G : graph
+
+    nodes : container of nodes, optional (default=all nodes in G)
+       Compute clustering for nodes in this container.
+
+    weight : string or None, optional (default=None)
+       The edge attribute that holds the numerical value used as a weight.
+       If None, then each edge has weight 1.
+
+    Returns
+    -------
+    out : float, or dictionary
+       Clustering coefficient at specified nodes
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> print(nx.clustering(G, 0))
+    1.0
+    >>> print(nx.clustering(G))
+    {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
+
+    Notes
+    -----
+    Self loops are ignored.
+
+    References
+    ----------
+    .. [1] Generalizations of the clustering coefficient to weighted
+       complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
+       K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
+       http://jponnela.com/web_documents/a9.pdf
+    .. [2] Intensity and coherence of motifs in weighted complex
+       networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
+       Physical Review E, 71(6), 065103 (2005).
+    .. [3] Clustering in complex directed networks by G. Fagiolo,
+       Physical Review E, 76(2), 026107 (2007).
+    """
+    if G.is_directed():
+        if weight is not None:
+            td_iter = _directed_weighted_triangles_and_degree_iter(G, nodes, weight)
+            clusterc = {
+                v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
+                for v, dt, db, t in td_iter
+            }
+        else:
+            td_iter = _directed_triangles_and_degree_iter(G, nodes)
+            clusterc = {
+                v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
+                for v, dt, db, t in td_iter
+            }
+    else:
+        if weight is not None:
+            td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight)
+            clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter}
+        else:
+            td_iter = _triangles_and_degree_iter(G, nodes)
+            clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter}
+    if nodes in G:
+        # Return the value of the sole entry in the dictionary.
+        return clusterc[nodes]
+    return clusterc
+
+
+def transitivity(G):
+    r"""Compute graph transitivity, the fraction of all possible triangles
+    present in G.
+
+    Possible triangles are identified by the number of "triads"
+    (two edges with a shared vertex).
+
+    The transitivity is
+
+    .. math::
+
+        T = 3\frac{\#triangles}{\#triads}.
+
+    Parameters
+    ----------
+    G : graph
+
+    Returns
+    -------
+    out : float
+       Transitivity
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> print(nx.transitivity(G))
+    1.0
+    """
+    triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G))
+    contri = sum(d * (d - 1) for v, d, t, _ in _triangles_and_degree_iter(G))
+    return 0 if triangles == 0 else triangles / contri
+
+
+def square_clustering(G, nodes=None):
+    r""" Compute the squares clustering coefficient for nodes.
+
+    For each node return the fraction of possible squares that exist at
+    the node [1]_
+
+    .. math::
+       C_4(v) = \frac{ \sum_{u=1}^{k_v}
+       \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v}
+       \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]},
+
+    where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and
+    :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u -
+    (1+q_v(u,w)+\theta_{uv}))(k_w - (1+q_v(u,w)+\theta_{uw}))`, where
+    :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0
+    otherwise.
+
+    Parameters
+    ----------
+    G : graph
+
+    nodes : container of nodes, optional (default=all nodes in G)
+       Compute clustering for nodes in this container.
+
+    Returns
+    -------
+    c4 : dictionary
+       A dictionary keyed by node with the square clustering coefficient value.
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> print(nx.square_clustering(G, 0))
+    1.0
+    >>> print(nx.square_clustering(G))
+    {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
+
+    Notes
+    -----
+    While :math:`C_3(v)` (triangle clustering) gives the probability that
+    two neighbors of node v are connected with each other, :math:`C_4(v)` is
+    the probability that two neighbors of node v share a common
+    neighbor different from v. This algorithm can be applied to both
+    bipartite and unipartite networks.
+
+    References
+    ----------
+    .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005
+        Cycles and clustering in bipartite networks.
+        Physical Review E (72) 056127.
+    """
+    if nodes is None:
+        node_iter = G
+    else:
+        node_iter = G.nbunch_iter(nodes)
+    clustering = {}
+    for v in node_iter:
+        clustering[v] = 0
+        potential = 0
+        for u, w in combinations(G[v], 2):
+            squares = len((set(G[u]) & set(G[w])) - {v})
+            clustering[v] += squares
+            degm = squares + 1
+            if w in G[u]:
+                degm += 1
+            potential += (len(G[u]) - degm) * (len(G[w]) - degm) + squares
+        if potential > 0:
+            clustering[v] /= potential
+    if nodes in G:
+        # Return the value of the sole entry in the dictionary.
+        return clustering[nodes]
+    return clustering
+
+
+@not_implemented_for("directed")
+def generalized_degree(G, nodes=None):
+    r""" Compute the generalized degree for nodes.
+
+    For each node, the generalized degree shows how many edges of given
+    triangle multiplicity the node is connected to. The triangle multiplicity
+    of an edge is the number of triangles an edge participates in. The
+    generalized degree of node :math:`i` can be written as a vector
+    :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where
+    :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that
+    participate in :math:`j` triangles.
+
+    Parameters
+    ----------
+    G : graph
+
+    nodes : container of nodes, optional (default=all nodes in G)
+       Compute the generalized degree for nodes in this container.
+
+    Returns
+    -------
+    out : Counter, or dictionary of Counters
+       Generalized degree of specified nodes. The Counter is keyed by edge
+       triangle multiplicity.
+
+    Examples
+    --------
+    >>> G = nx.complete_graph(5)
+    >>> print(nx.generalized_degree(G, 0))
+    Counter({3: 4})
+    >>> print(nx.generalized_degree(G))
+    {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})}
+
+    To recover the number of triangles attached to a node:
+
+    >>> k1 = nx.generalized_degree(G, 0)
+    >>> sum([k * v for k, v in k1.items()]) / 2 == nx.triangles(G, 0)
+    True
+
+    Notes
+    -----
+    In a network of N nodes, the highest triangle multiplicty an edge can have
+    is N-2.
+
+    The return value does not include a `zero` entry if no edges of a
+    particular triangle multiplicity are present.
+
+    The number of triangles node :math:`i` is attached to can be recovered from
+    the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc,
+    k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`.
+
+    References
+    ----------
+    .. [1] Networks with arbitrary edge multiplicities by V. Zlatić,
+        D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters),
+        Volume 97, Number 2 (2012).
+        https://iopscience.iop.org/article/10.1209/0295-5075/97/28005
+    """
+    if nodes in G:
+        return next(_triangles_and_degree_iter(G, nodes))[3]
+    return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)}