sam_consensus_v3: env/lib/python3.9/site-packages/networkx/algorithms/community/modularity

comparison env/lib/python3.9/site-packages/networkx/algorithms/community/modularity_max.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"

author	shellac
date	Mon, 22 Mar 2021 18:12:50 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:4f3585e2f14b
+# TODO:
+#   - Alter equations for weighted case
+#   - Write tests for weighted case
+"""Functions for detecting communities based on modularity.
+"""
+from networkx.algorithms.community.quality import modularity
+from networkx.utils.mapped_queue import MappedQueue
+__all__ = [
+"greedy_modularity_communities",
+"naive_greedy_modularity_communities",
+"_naive_greedy_modularity_communities",
+]
+def greedy_modularity_communities(G, weight=None):
+"""Find communities in graph using Clauset-Newman-Moore greedy modularity
+maximization. This method currently supports the Graph class and does not
+consider edge weights.
+Greedy modularity maximization begins with each node in its own community
+and joins the pair of communities that most increases modularity until no
+such pair exists.
+Parameters
+----------
+G : NetworkX graph
+Returns
+-------
+Yields sets of nodes, one for each community.
+Examples
+--------
+>>> from networkx.algorithms.community import greedy_modularity_communities
+>>> G = nx.karate_club_graph()
+>>> c = list(greedy_modularity_communities(G))
+>>> sorted(c[0])
+[8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
+References
+----------
+.. [1] M. E. J Newman 'Networks: An Introduction', page 224
+Oxford University Press 2011.
+.. [2] Clauset, A., Newman, M. E., & Moore, C.
+"Finding community structure in very large networks."
+Physical Review E 70(6), 2004.
+"""
+# Count nodes and edges
+N = len(G.nodes())
+m = sum([d.get("weight", 1) for u, v, d in G.edges(data=True)])
+q0 = 1.0 / (2.0 * m)
+# Map node labels to contiguous integers
+label_for_node = {i: v for i, v in enumerate(G.nodes())}
+node_for_label = {label_for_node[i]: i for i in range(N)}
+# Calculate degrees
+k_for_label = G.degree(G.nodes(), weight=weight)
+k = [k_for_label[label_for_node[i]] for i in range(N)]
+# Initialize community and merge lists
+communities = {i: frozenset([i]) for i in range(N)}
+merges = []
+# Initial modularity
+partition = [[label_for_node[x] for x in c] for c in communities.values()]
+q_cnm = modularity(G, partition)
+# Initialize data structures
+# CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji)
+# a[i]: fraction of edges within community i
+# dq_dict[i][j]: dQ for merging community i, j
+# dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ
+# H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij)
+a = [k[i] * q0 for i in range(N)]
+dq_dict = {
+i: {
+j: 2 * q0 - 2 * k[i] * k[j] * q0 * q0
+for j in [node_for_label[u] for u in G.neighbors(label_for_node[i])]
+if j != i
+}
+for i in range(N)
+}
+dq_heap = [
+MappedQueue([(-dq, i, j) for j, dq in dq_dict[i].items()]) for i in range(N)
+]
+H = MappedQueue([dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0])
+# Merge communities until we can't improve modularity
+while len(H) > 1:
+# Find best merge
+# Remove from heap of row maxes
+# Ties will be broken by choosing the pair with lowest min community id
+try:
+dq, i, j = H.pop()
+except IndexError:
+break
+dq = -dq
+# Remove best merge from row i heap
+dq_heap[i].pop()
+# Push new row max onto H
+if len(dq_heap[i]) > 0:
+H.push(dq_heap[i].h[0])
+# If this element was also at the root of row j, we need to remove the
+# duplicate entry from H
+if dq_heap[j].h[0] == (-dq, j, i):
+H.remove((-dq, j, i))
+# Remove best merge from row j heap
+dq_heap[j].remove((-dq, j, i))
+# Push new row max onto H
+if len(dq_heap[j]) > 0:
+H.push(dq_heap[j].h[0])
+else:
+# Duplicate wasn't in H, just remove from row j heap
+dq_heap[j].remove((-dq, j, i))
+# Stop when change is non-positive
+if dq <= 0:
+break
+# Perform merge
+communities[j] = frozenset(communities[i] | communities[j])
+del communities[i]
+merges.append((i, j, dq))
+# New modularity
+q_cnm += dq
+# Get list of communities connected to merged communities
+i_set = set(dq_dict[i].keys())
+j_set = set(dq_dict[j].keys())
+all_set = (i_set | j_set) - {i, j}
+both_set = i_set & j_set
+# Merge i into j and update dQ
+for k in all_set:
+# Calculate new dq value
+if k in both_set:
+dq_jk = dq_dict[j][k] + dq_dict[i][k]
+elif k in j_set:
+dq_jk = dq_dict[j][k] - 2.0 * a[i] * a[k]
+else:
+# k in i_set
+dq_jk = dq_dict[i][k] - 2.0 * a[j] * a[k]
+# Update rows j and k
+for row, col in [(j, k), (k, j)]:
+# Save old value for finding heap index
+if k in j_set:
+d_old = (-dq_dict[row][col], row, col)
+else:
+d_old = None
+# Update dict for j,k only (i is removed below)
+dq_dict[row][col] = dq_jk
+# Save old max of per-row heap
+if len(dq_heap[row]) > 0:
+d_oldmax = dq_heap[row].h[0]
+else:
+d_oldmax = None
+# Add/update heaps
+d = (-dq_jk, row, col)
+if d_old is None:
+# We're creating a new nonzero element, add to heap
+dq_heap[row].push(d)
+else:
+# Update existing element in per-row heap
+dq_heap[row].update(d_old, d)
+# Update heap of row maxes if necessary
+if d_oldmax is None:
+# No entries previously in this row, push new max
+H.push(d)
+else:
+# We've updated an entry in this row, has the max changed?
+if dq_heap[row].h[0] != d_oldmax:
+H.update(d_oldmax, dq_heap[row].h[0])
+# Remove row/col i from matrix
+i_neighbors = dq_dict[i].keys()
+for k in i_neighbors:
+# Remove from dict
+dq_old = dq_dict[k][i]
+del dq_dict[k][i]
+# Remove from heaps if we haven't already
+if k != j:
+# Remove both row and column
+for row, col in [(k, i), (i, k)]:
+# Check if replaced dq is row max
+d_old = (-dq_old, row, col)
+if dq_heap[row].h[0] == d_old:
+# Update per-row heap and heap of row maxes
+dq_heap[row].remove(d_old)
+H.remove(d_old)
+# Update row max
+if len(dq_heap[row]) > 0:
+H.push(dq_heap[row].h[0])
+else:
+# Only update per-row heap
+dq_heap[row].remove(d_old)
+del dq_dict[i]
+# Mark row i as deleted, but keep placeholder
+dq_heap[i] = MappedQueue()
+# Merge i into j and update a
+a[j] += a[i]
+a[i] = 0
+communities = [
+frozenset([label_for_node[i] for i in c]) for c in communities.values()
+]
+return sorted(communities, key=len, reverse=True)
+def naive_greedy_modularity_communities(G):
+"""Find communities in graph using the greedy modularity maximization.
+This implementation is O(n^4), much slower than alternatives, but it is
+provided as an easy-to-understand reference implementation.
+"""
+# First create one community for each node
+communities = list([frozenset([u]) for u in G.nodes()])
+# Track merges
+merges = []
+# Greedily merge communities until no improvement is possible
+old_modularity = None
+new_modularity = modularity(G, communities)
+while old_modularity is None or new_modularity > old_modularity:
+# Save modularity for comparison
+old_modularity = new_modularity
+# Find best pair to merge
+trial_communities = list(communities)
+to_merge = None
+for i, u in enumerate(communities):
+for j, v in enumerate(communities):
+# Skip i=j and empty communities
+if j <= i or len(u) == 0 or len(v) == 0:
+continue
+# Merge communities u and v
+trial_communities[j] = u | v
+trial_communities[i] = frozenset([])
+trial_modularity = modularity(G, trial_communities)
+if trial_modularity >= new_modularity:
+# Check if strictly better or tie
+if trial_modularity > new_modularity:
+# Found new best, save modularity and group indexes
+new_modularity = trial_modularity
+to_merge = (i, j, new_modularity - old_modularity)
+elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]):
+# Break ties by choosing pair with lowest min id
+new_modularity = trial_modularity
+to_merge = (i, j, new_modularity - old_modularity)
+# Un-merge
+trial_communities[i] = u
+trial_communities[j] = v
+if to_merge is not None:
+# If the best merge improves modularity, use it
+merges.append(to_merge)
+i, j, dq = to_merge
+u, v = communities[i], communities[j]
+communities[j] = u | v
+communities[i] = frozenset([])
+# Remove empty communities and sort
+communities = [c for c in communities if len(c) > 0]
+yield from sorted(communities, key=lambda x: len(x), reverse=True)
+# old name
+_naive_greedy_modularity_communities = naive_greedy_modularity_communities

Mercurial > repos > shellac > sam_consensus_v3

comparison env/lib/python3.9/site-packages/networkx/algorithms/community/modularity_max.py @ 0:4f3585e2f14b draft default tip