Mercurial > repos > shellac > sam_consensus_v3
diff env/lib/python3.9/site-packages/networkx/algorithms/community/modularity_max.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/networkx/algorithms/community/modularity_max.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,265 @@ +# TODO: +# - Alter equations for weighted case +# - Write tests for weighted case +"""Functions for detecting communities based on modularity. +""" + +from networkx.algorithms.community.quality import modularity + +from networkx.utils.mapped_queue import MappedQueue + +__all__ = [ + "greedy_modularity_communities", + "naive_greedy_modularity_communities", + "_naive_greedy_modularity_communities", +] + + +def greedy_modularity_communities(G, weight=None): + """Find communities in graph using Clauset-Newman-Moore greedy modularity + maximization. This method currently supports the Graph class and does not + consider edge weights. + + Greedy modularity maximization begins with each node in its own community + and joins the pair of communities that most increases modularity until no + such pair exists. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + Yields sets of nodes, one for each community. + + Examples + -------- + >>> from networkx.algorithms.community import greedy_modularity_communities + >>> G = nx.karate_club_graph() + >>> c = list(greedy_modularity_communities(G)) + >>> sorted(c[0]) + [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + + References + ---------- + .. [1] M. E. J Newman 'Networks: An Introduction', page 224 + Oxford University Press 2011. + .. [2] Clauset, A., Newman, M. E., & Moore, C. + "Finding community structure in very large networks." + Physical Review E 70(6), 2004. + """ + + # Count nodes and edges + N = len(G.nodes()) + m = sum([d.get("weight", 1) for u, v, d in G.edges(data=True)]) + q0 = 1.0 / (2.0 * m) + + # Map node labels to contiguous integers + label_for_node = {i: v for i, v in enumerate(G.nodes())} + node_for_label = {label_for_node[i]: i for i in range(N)} + + # Calculate degrees + k_for_label = G.degree(G.nodes(), weight=weight) + k = [k_for_label[label_for_node[i]] for i in range(N)] + + # Initialize community and merge lists + communities = {i: frozenset([i]) for i in range(N)} + merges = [] + + # Initial modularity + partition = [[label_for_node[x] for x in c] for c in communities.values()] + q_cnm = modularity(G, partition) + + # Initialize data structures + # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji) + # a[i]: fraction of edges within community i + # dq_dict[i][j]: dQ for merging community i, j + # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ + # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij) + a = [k[i] * q0 for i in range(N)] + dq_dict = { + i: { + j: 2 * q0 - 2 * k[i] * k[j] * q0 * q0 + for j in [node_for_label[u] for u in G.neighbors(label_for_node[i])] + if j != i + } + for i in range(N) + } + dq_heap = [ + MappedQueue([(-dq, i, j) for j, dq in dq_dict[i].items()]) for i in range(N) + ] + H = MappedQueue([dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0]) + + # Merge communities until we can't improve modularity + while len(H) > 1: + # Find best merge + # Remove from heap of row maxes + # Ties will be broken by choosing the pair with lowest min community id + try: + dq, i, j = H.pop() + except IndexError: + break + dq = -dq + # Remove best merge from row i heap + dq_heap[i].pop() + # Push new row max onto H + if len(dq_heap[i]) > 0: + H.push(dq_heap[i].h[0]) + # If this element was also at the root of row j, we need to remove the + # duplicate entry from H + if dq_heap[j].h[0] == (-dq, j, i): + H.remove((-dq, j, i)) + # Remove best merge from row j heap + dq_heap[j].remove((-dq, j, i)) + # Push new row max onto H + if len(dq_heap[j]) > 0: + H.push(dq_heap[j].h[0]) + else: + # Duplicate wasn't in H, just remove from row j heap + dq_heap[j].remove((-dq, j, i)) + # Stop when change is non-positive + if dq <= 0: + break + + # Perform merge + communities[j] = frozenset(communities[i] | communities[j]) + del communities[i] + merges.append((i, j, dq)) + # New modularity + q_cnm += dq + # Get list of communities connected to merged communities + i_set = set(dq_dict[i].keys()) + j_set = set(dq_dict[j].keys()) + all_set = (i_set | j_set) - {i, j} + both_set = i_set & j_set + # Merge i into j and update dQ + for k in all_set: + # Calculate new dq value + if k in both_set: + dq_jk = dq_dict[j][k] + dq_dict[i][k] + elif k in j_set: + dq_jk = dq_dict[j][k] - 2.0 * a[i] * a[k] + else: + # k in i_set + dq_jk = dq_dict[i][k] - 2.0 * a[j] * a[k] + # Update rows j and k + for row, col in [(j, k), (k, j)]: + # Save old value for finding heap index + if k in j_set: + d_old = (-dq_dict[row][col], row, col) + else: + d_old = None + # Update dict for j,k only (i is removed below) + dq_dict[row][col] = dq_jk + # Save old max of per-row heap + if len(dq_heap[row]) > 0: + d_oldmax = dq_heap[row].h[0] + else: + d_oldmax = None + # Add/update heaps + d = (-dq_jk, row, col) + if d_old is None: + # We're creating a new nonzero element, add to heap + dq_heap[row].push(d) + else: + # Update existing element in per-row heap + dq_heap[row].update(d_old, d) + # Update heap of row maxes if necessary + if d_oldmax is None: + # No entries previously in this row, push new max + H.push(d) + else: + # We've updated an entry in this row, has the max changed? + if dq_heap[row].h[0] != d_oldmax: + H.update(d_oldmax, dq_heap[row].h[0]) + + # Remove row/col i from matrix + i_neighbors = dq_dict[i].keys() + for k in i_neighbors: + # Remove from dict + dq_old = dq_dict[k][i] + del dq_dict[k][i] + # Remove from heaps if we haven't already + if k != j: + # Remove both row and column + for row, col in [(k, i), (i, k)]: + # Check if replaced dq is row max + d_old = (-dq_old, row, col) + if dq_heap[row].h[0] == d_old: + # Update per-row heap and heap of row maxes + dq_heap[row].remove(d_old) + H.remove(d_old) + # Update row max + if len(dq_heap[row]) > 0: + H.push(dq_heap[row].h[0]) + else: + # Only update per-row heap + dq_heap[row].remove(d_old) + + del dq_dict[i] + # Mark row i as deleted, but keep placeholder + dq_heap[i] = MappedQueue() + # Merge i into j and update a + a[j] += a[i] + a[i] = 0 + + communities = [ + frozenset([label_for_node[i] for i in c]) for c in communities.values() + ] + return sorted(communities, key=len, reverse=True) + + +def naive_greedy_modularity_communities(G): + """Find communities in graph using the greedy modularity maximization. + This implementation is O(n^4), much slower than alternatives, but it is + provided as an easy-to-understand reference implementation. + """ + # First create one community for each node + communities = list([frozenset([u]) for u in G.nodes()]) + # Track merges + merges = [] + # Greedily merge communities until no improvement is possible + old_modularity = None + new_modularity = modularity(G, communities) + while old_modularity is None or new_modularity > old_modularity: + # Save modularity for comparison + old_modularity = new_modularity + # Find best pair to merge + trial_communities = list(communities) + to_merge = None + for i, u in enumerate(communities): + for j, v in enumerate(communities): + # Skip i=j and empty communities + if j <= i or len(u) == 0 or len(v) == 0: + continue + # Merge communities u and v + trial_communities[j] = u | v + trial_communities[i] = frozenset([]) + trial_modularity = modularity(G, trial_communities) + if trial_modularity >= new_modularity: + # Check if strictly better or tie + if trial_modularity > new_modularity: + # Found new best, save modularity and group indexes + new_modularity = trial_modularity + to_merge = (i, j, new_modularity - old_modularity) + elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]): + # Break ties by choosing pair with lowest min id + new_modularity = trial_modularity + to_merge = (i, j, new_modularity - old_modularity) + # Un-merge + trial_communities[i] = u + trial_communities[j] = v + if to_merge is not None: + # If the best merge improves modularity, use it + merges.append(to_merge) + i, j, dq = to_merge + u, v = communities[i], communities[j] + communities[j] = u | v + communities[i] = frozenset([]) + # Remove empty communities and sort + communities = [c for c in communities if len(c) > 0] + yield from sorted(communities, key=lambda x: len(x), reverse=True) + + +# old name +_naive_greedy_modularity_communities = naive_greedy_modularity_communities