Mercurial > repos > shellac > sam_consensus_v3

diff env/lib/python3.9/site-packages/networkx/algorithms/community/modularity_max.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author: shellac
date: Mon, 22 Mar 2021 18:12:50 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/networkx/algorithms/community/modularity_max.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,265 @@
+# TODO:
+#   - Alter equations for weighted case
+#   - Write tests for weighted case
+"""Functions for detecting communities based on modularity.
+"""
+
+from networkx.algorithms.community.quality import modularity
+
+from networkx.utils.mapped_queue import MappedQueue
+
+__all__ = [
+    "greedy_modularity_communities",
+    "naive_greedy_modularity_communities",
+    "_naive_greedy_modularity_communities",
+]
+
+
+def greedy_modularity_communities(G, weight=None):
+    """Find communities in graph using Clauset-Newman-Moore greedy modularity
+    maximization. This method currently supports the Graph class and does not
+    consider edge weights.
+
+    Greedy modularity maximization begins with each node in its own community
+    and joins the pair of communities that most increases modularity until no
+    such pair exists.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+
+    Returns
+    -------
+    Yields sets of nodes, one for each community.
+
+    Examples
+    --------
+    >>> from networkx.algorithms.community import greedy_modularity_communities
+    >>> G = nx.karate_club_graph()
+    >>> c = list(greedy_modularity_communities(G))
+    >>> sorted(c[0])
+    [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
+
+    References
+    ----------
+    .. [1] M. E. J Newman 'Networks: An Introduction', page 224
+       Oxford University Press 2011.
+    .. [2] Clauset, A., Newman, M. E., & Moore, C.
+       "Finding community structure in very large networks."
+       Physical Review E 70(6), 2004.
+    """
+
+    # Count nodes and edges
+    N = len(G.nodes())
+    m = sum([d.get("weight", 1) for u, v, d in G.edges(data=True)])
+    q0 = 1.0 / (2.0 * m)
+
+    # Map node labels to contiguous integers
+    label_for_node = {i: v for i, v in enumerate(G.nodes())}
+    node_for_label = {label_for_node[i]: i for i in range(N)}
+
+    # Calculate degrees
+    k_for_label = G.degree(G.nodes(), weight=weight)
+    k = [k_for_label[label_for_node[i]] for i in range(N)]
+
+    # Initialize community and merge lists
+    communities = {i: frozenset([i]) for i in range(N)}
+    merges = []
+
+    # Initial modularity
+    partition = [[label_for_node[x] for x in c] for c in communities.values()]
+    q_cnm = modularity(G, partition)
+
+    # Initialize data structures
+    # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji)
+    # a[i]: fraction of edges within community i
+    # dq_dict[i][j]: dQ for merging community i, j
+    # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ
+    # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij)
+    a = [k[i] * q0 for i in range(N)]
+    dq_dict = {
+        i: {
+            j: 2 * q0 - 2 * k[i] * k[j] * q0 * q0
+            for j in [node_for_label[u] for u in G.neighbors(label_for_node[i])]
+            if j != i
+        }
+        for i in range(N)
+    }
+    dq_heap = [
+        MappedQueue([(-dq, i, j) for j, dq in dq_dict[i].items()]) for i in range(N)
+    ]
+    H = MappedQueue([dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0])
+
+    # Merge communities until we can't improve modularity
+    while len(H) > 1:
+        # Find best merge
+        # Remove from heap of row maxes
+        # Ties will be broken by choosing the pair with lowest min community id
+        try:
+            dq, i, j = H.pop()
+        except IndexError:
+            break
+        dq = -dq
+        # Remove best merge from row i heap
+        dq_heap[i].pop()
+        # Push new row max onto H
+        if len(dq_heap[i]) > 0:
+            H.push(dq_heap[i].h[0])
+        # If this element was also at the root of row j, we need to remove the
+        # duplicate entry from H
+        if dq_heap[j].h[0] == (-dq, j, i):
+            H.remove((-dq, j, i))
+            # Remove best merge from row j heap
+            dq_heap[j].remove((-dq, j, i))
+            # Push new row max onto H
+            if len(dq_heap[j]) > 0:
+                H.push(dq_heap[j].h[0])
+        else:
+            # Duplicate wasn't in H, just remove from row j heap
+            dq_heap[j].remove((-dq, j, i))
+        # Stop when change is non-positive
+        if dq <= 0:
+            break
+
+        # Perform merge
+        communities[j] = frozenset(communities[i] | communities[j])
+        del communities[i]
+        merges.append((i, j, dq))
+        # New modularity
+        q_cnm += dq
+        # Get list of communities connected to merged communities
+        i_set = set(dq_dict[i].keys())
+        j_set = set(dq_dict[j].keys())
+        all_set = (i_set | j_set) - {i, j}
+        both_set = i_set & j_set
+        # Merge i into j and update dQ
+        for k in all_set:
+            # Calculate new dq value
+            if k in both_set:
+                dq_jk = dq_dict[j][k] + dq_dict[i][k]
+            elif k in j_set:
+                dq_jk = dq_dict[j][k] - 2.0 * a[i] * a[k]
+            else:
+                # k in i_set
+                dq_jk = dq_dict[i][k] - 2.0 * a[j] * a[k]
+            # Update rows j and k
+            for row, col in [(j, k), (k, j)]:
+                # Save old value for finding heap index
+                if k in j_set:
+                    d_old = (-dq_dict[row][col], row, col)
+                else:
+                    d_old = None
+                # Update dict for j,k only (i is removed below)
+                dq_dict[row][col] = dq_jk
+                # Save old max of per-row heap
+                if len(dq_heap[row]) > 0:
+                    d_oldmax = dq_heap[row].h[0]
+                else:
+                    d_oldmax = None
+                # Add/update heaps
+                d = (-dq_jk, row, col)
+                if d_old is None:
+                    # We're creating a new nonzero element, add to heap
+                    dq_heap[row].push(d)
+                else:
+                    # Update existing element in per-row heap
+                    dq_heap[row].update(d_old, d)
+                # Update heap of row maxes if necessary
+                if d_oldmax is None:
+                    # No entries previously in this row, push new max
+                    H.push(d)
+                else:
+                    # We've updated an entry in this row, has the max changed?
+                    if dq_heap[row].h[0] != d_oldmax:
+                        H.update(d_oldmax, dq_heap[row].h[0])
+
+        # Remove row/col i from matrix
+        i_neighbors = dq_dict[i].keys()
+        for k in i_neighbors:
+            # Remove from dict
+            dq_old = dq_dict[k][i]
+            del dq_dict[k][i]
+            # Remove from heaps if we haven't already
+            if k != j:
+                # Remove both row and column
+                for row, col in [(k, i), (i, k)]:
+                    # Check if replaced dq is row max
+                    d_old = (-dq_old, row, col)
+                    if dq_heap[row].h[0] == d_old:
+                        # Update per-row heap and heap of row maxes
+                        dq_heap[row].remove(d_old)
+                        H.remove(d_old)
+                        # Update row max
+                        if len(dq_heap[row]) > 0:
+                            H.push(dq_heap[row].h[0])
+                    else:
+                        # Only update per-row heap
+                        dq_heap[row].remove(d_old)
+
+        del dq_dict[i]
+        # Mark row i as deleted, but keep placeholder
+        dq_heap[i] = MappedQueue()
+        # Merge i into j and update a
+        a[j] += a[i]
+        a[i] = 0
+
+    communities = [
+        frozenset([label_for_node[i] for i in c]) for c in communities.values()
+    ]
+    return sorted(communities, key=len, reverse=True)
+
+
+def naive_greedy_modularity_communities(G):
+    """Find communities in graph using the greedy modularity maximization.
+    This implementation is O(n^4), much slower than alternatives, but it is
+    provided as an easy-to-understand reference implementation.
+    """
+    # First create one community for each node
+    communities = list([frozenset([u]) for u in G.nodes()])
+    # Track merges
+    merges = []
+    # Greedily merge communities until no improvement is possible
+    old_modularity = None
+    new_modularity = modularity(G, communities)
+    while old_modularity is None or new_modularity > old_modularity:
+        # Save modularity for comparison
+        old_modularity = new_modularity
+        # Find best pair to merge
+        trial_communities = list(communities)
+        to_merge = None
+        for i, u in enumerate(communities):
+            for j, v in enumerate(communities):
+                # Skip i=j and empty communities
+                if j <= i or len(u) == 0 or len(v) == 0:
+                    continue
+                # Merge communities u and v
+                trial_communities[j] = u | v
+                trial_communities[i] = frozenset([])
+                trial_modularity = modularity(G, trial_communities)
+                if trial_modularity >= new_modularity:
+                    # Check if strictly better or tie
+                    if trial_modularity > new_modularity:
+                        # Found new best, save modularity and group indexes
+                        new_modularity = trial_modularity
+                        to_merge = (i, j, new_modularity - old_modularity)
+                    elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]):
+                        # Break ties by choosing pair with lowest min id
+                        new_modularity = trial_modularity
+                        to_merge = (i, j, new_modularity - old_modularity)
+                # Un-merge
+                trial_communities[i] = u
+                trial_communities[j] = v
+        if to_merge is not None:
+            # If the best merge improves modularity, use it
+            merges.append(to_merge)
+            i, j, dq = to_merge
+            u, v = communities[i], communities[j]
+            communities[j] = u | v
+            communities[i] = frozenset([])
+    # Remove empty communities and sort
+    communities = [c for c in communities if len(c) > 0]
+    yield from sorted(communities, key=lambda x: len(x), reverse=True)
+
+
+# old name
+_naive_greedy_modularity_communities = naive_greedy_modularity_communities
author	shellac
date	Mon, 22 Mar 2021 18:12:50 +0000
parents
children