diff env/lib/python3.9/site-packages/networkx/algorithms/community/quality.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/networkx/algorithms/community/quality.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,334 @@
+"""Functions for measuring the quality of a partition (into
+communities).
+
+"""
+
+from functools import wraps
+from itertools import product
+
+import networkx as nx
+from networkx import NetworkXError
+from networkx.utils import not_implemented_for
+from networkx.algorithms.community.community_utils import is_partition
+
+__all__ = ["coverage", "modularity", "performance"]
+
+
+class NotAPartition(NetworkXError):
+    """Raised if a given collection is not a partition.
+
+    """
+
+    def __init__(self, G, collection):
+        msg = f"{G} is not a valid partition of the graph {collection}"
+        super().__init__(msg)
+
+
+def require_partition(func):
+    """Decorator to check that a valid partition is input to a function
+
+    Raises :exc:`networkx.NetworkXError` if the partition is not valid.
+
+    This decorator should be used on functions whose first two arguments
+    are a graph and a partition of the nodes of that graph (in that
+    order)::
+
+        >>> @require_partition
+        ... def foo(G, partition):
+        ...     print("partition is valid!")
+        ...
+        >>> G = nx.complete_graph(5)
+        >>> partition = [{0, 1}, {2, 3}, {4}]
+        >>> foo(G, partition)
+        partition is valid!
+        >>> partition = [{0}, {2, 3}, {4}]
+        >>> foo(G, partition)
+        Traceback (most recent call last):
+          ...
+        networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
+        >>> partition = [{0, 1}, {1, 2, 3}, {4}]
+        >>> foo(G, partition)
+        Traceback (most recent call last):
+          ...
+        networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
+
+    """
+
+    @wraps(func)
+    def new_func(*args, **kw):
+        # Here we assume that the first two arguments are (G, partition).
+        if not is_partition(*args[:2]):
+            raise nx.NetworkXError(
+                "`partition` is not a valid partition of" " the nodes of G"
+            )
+        return func(*args, **kw)
+
+    return new_func
+
+
+def intra_community_edges(G, partition):
+    """Returns the number of intra-community edges for a partition of `G`.
+
+    Parameters
+    ----------
+    G : NetworkX graph.
+
+    partition : iterable of sets of nodes
+        This must be a partition of the nodes of `G`.
+
+    The "intra-community edges" are those edges joining a pair of nodes
+    in the same block of the partition.
+
+    """
+    return sum(G.subgraph(block).size() for block in partition)
+
+
+def inter_community_edges(G, partition):
+    """Returns the number of inter-community edges for a prtition of `G`.
+    according to the given
+    partition of the nodes of `G`.
+
+    Parameters
+    ----------
+    G : NetworkX graph.
+
+    partition : iterable of sets of nodes
+        This must be a partition of the nodes of `G`.
+
+    The *inter-community edges* are those edges joining a pair of nodes
+    in different blocks of the partition.
+
+    Implementation note: this function creates an intermediate graph
+    that may require the same amount of memory as that of `G`.
+
+    """
+    # Alternate implementation that does not require constructing a new
+    # graph object (but does require constructing an affiliation
+    # dictionary):
+    #
+    #     aff = dict(chain.from_iterable(((v, block) for v in block)
+    #                                    for block in partition))
+    #     return sum(1 for u, v in G.edges() if aff[u] != aff[v])
+    #
+    MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph
+    return nx.quotient_graph(G, partition, create_using=MG).size()
+
+
+def inter_community_non_edges(G, partition):
+    """Returns the number of inter-community non-edges according to the
+    given partition of the nodes of `G`.
+
+    `G` must be a NetworkX graph.
+
+    `partition` must be a partition of the nodes of `G`.
+
+    A *non-edge* is a pair of nodes (undirected if `G` is undirected)
+    that are not adjacent in `G`. The *inter-community non-edges* are
+    those non-edges on a pair of nodes in different blocks of the
+    partition.
+
+    Implementation note: this function creates two intermediate graphs,
+    which may require up to twice the amount of memory as required to
+    store `G`.
+
+    """
+    # Alternate implementation that does not require constructing two
+    # new graph objects (but does require constructing an affiliation
+    # dictionary):
+    #
+    #     aff = dict(chain.from_iterable(((v, block) for v in block)
+    #                                    for block in partition))
+    #     return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v])
+    #
+    return inter_community_edges(nx.complement(G), partition)
+
+
+@not_implemented_for("multigraph")
+@require_partition
+def performance(G, partition):
+    """Returns the performance of a partition.
+
+    The *performance* of a partition is the ratio of the number of
+    intra-community edges plus inter-community non-edges with the total
+    number of potential edges.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+        A simple graph (directed or undirected).
+
+    partition : sequence
+        Partition of the nodes of `G`, represented as a sequence of
+        sets of nodes. Each block of the partition represents a
+        community.
+
+    Returns
+    -------
+    float
+        The performance of the partition, as defined above.
+
+    Raises
+    ------
+    NetworkXError
+        If `partition` is not a valid partition of the nodes of `G`.
+
+    References
+    ----------
+    .. [1] Santo Fortunato.
+           "Community Detection in Graphs".
+           *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
+           <https://arxiv.org/abs/0906.0612>
+
+    """
+    # Compute the number of intra-community edges and inter-community
+    # edges.
+    intra_edges = intra_community_edges(G, partition)
+    inter_edges = inter_community_non_edges(G, partition)
+    # Compute the number of edges in the complete graph (directed or
+    # undirected, as it depends on `G`) on `n` nodes.
+    #
+    # (If `G` is an undirected graph, we divide by two since we have
+    # double-counted each potential edge. We use integer division since
+    # `total_pairs` is guaranteed to be even.)
+    n = len(G)
+    total_pairs = n * (n - 1)
+    if not G.is_directed():
+        total_pairs //= 2
+    return (intra_edges + inter_edges) / total_pairs
+
+
+@require_partition
+def coverage(G, partition):
+    """Returns the coverage of a partition.
+
+    The *coverage* of a partition is the ratio of the number of
+    intra-community edges to the total number of edges in the graph.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+
+    partition : sequence
+        Partition of the nodes of `G`, represented as a sequence of
+        sets of nodes. Each block of the partition represents a
+        community.
+
+    Returns
+    -------
+    float
+        The coverage of the partition, as defined above.
+
+    Raises
+    ------
+    NetworkXError
+        If `partition` is not a valid partition of the nodes of `G`.
+
+    Notes
+    -----
+    If `G` is a multigraph, the multiplicity of edges is counted.
+
+    References
+    ----------
+    .. [1] Santo Fortunato.
+           "Community Detection in Graphs".
+           *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
+           <https://arxiv.org/abs/0906.0612>
+
+    """
+    intra_edges = intra_community_edges(G, partition)
+    total_edges = G.number_of_edges()
+    return intra_edges / total_edges
+
+
+def modularity(G, communities, weight="weight"):
+    r"""Returns the modularity of the given partition of the graph.
+
+    Modularity is defined in [1]_ as
+
+    .. math::
+
+        Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \frac{k_ik_j}{2m}\right)
+            \delta(c_i,c_j)
+
+    where $m$ is the number of edges, $A$ is the adjacency matrix of
+    `G`, $k_i$ is the degree of $i$ and $\delta(c_i, c_j)$
+    is 1 if $i$ and $j$ are in the same community and 0 otherwise.
+
+    According to [2]_ (and verified by some algebra) this can be reduced to
+
+    .. math::
+       Q = \sum_{c=1}^{n}
+       \left[ \frac{L_c}{m} - \left( \frac{k_c}{2m} \right) ^2 \right]
+
+    where the sum iterates over all communities $c$, $m$ is the number of edges,
+    $L_c$ is the number of intra-community links for community $c$,
+    $k_c$ is the sum of degrees of the nodes in community $c$.
+
+    The second formula is the one actually used in calculation of the modularity.
+
+    Parameters
+    ----------
+    G : NetworkX Graph
+
+    communities : list or iterable of set of nodes
+        These node sets must represent a partition of G's nodes.
+
+    weight : string or None, optional (default="weight")
+            The edge attribute that holds the numerical value used
+            as a weight. If None or an edge does not have that attribute,
+            then that edge has weight 1.
+
+    Returns
+    -------
+    Q : float
+        The modularity of the paritition.
+
+    Raises
+    ------
+    NotAPartition
+        If `communities` is not a partition of the nodes of `G`.
+
+    Examples
+    --------
+    >>> import networkx.algorithms.community as nx_comm
+    >>> G = nx.barbell_graph(3, 0)
+    >>> nx_comm.modularity(G, [{0, 1, 2}, {3, 4, 5}])
+    0.35714285714285715
+    >>> nx_comm.modularity(G, nx_comm.label_propagation_communities(G))
+    0.35714285714285715
+
+    References
+    ----------
+    .. [1] M. E. J. Newman *Networks: An Introduction*, page 224.
+       Oxford University Press, 2011.
+    .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
+       "Finding community structure in very large networks."
+       Physical review E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
+    """
+    if not isinstance(communities, list):
+        communities = list(communities)
+    if not is_partition(G, communities):
+        raise NotAPartition(G, communities)
+
+    directed = G.is_directed()
+    if directed:
+        out_degree = dict(G.out_degree(weight=weight))
+        in_degree = dict(G.in_degree(weight=weight))
+        m = sum(out_degree.values())
+        norm = 1 / m ** 2
+    else:
+        out_degree = in_degree = dict(G.degree(weight=weight))
+        deg_sum = sum(out_degree.values())
+        m = deg_sum / 2
+        norm = 1 / deg_sum ** 2
+
+    def community_contribution(community):
+        comm = set(community)
+        L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)
+
+        out_degree_sum = sum(out_degree[u] for u in comm)
+        in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum
+
+        return L_c / m - out_degree_sum * in_degree_sum * norm
+
+    return sum(map(community_contribution, communities))