Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/community/modularity_max.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # TODO: | |
2 # - Alter equations for weighted case | |
3 # - Write tests for weighted case | |
4 """Functions for detecting communities based on modularity. | |
5 """ | |
6 | |
7 from networkx.algorithms.community.quality import modularity | |
8 | |
9 from networkx.utils.mapped_queue import MappedQueue | |
10 | |
11 __all__ = [ | |
12 "greedy_modularity_communities", | |
13 "naive_greedy_modularity_communities", | |
14 "_naive_greedy_modularity_communities", | |
15 ] | |
16 | |
17 | |
18 def greedy_modularity_communities(G, weight=None): | |
19 """Find communities in graph using Clauset-Newman-Moore greedy modularity | |
20 maximization. This method currently supports the Graph class and does not | |
21 consider edge weights. | |
22 | |
23 Greedy modularity maximization begins with each node in its own community | |
24 and joins the pair of communities that most increases modularity until no | |
25 such pair exists. | |
26 | |
27 Parameters | |
28 ---------- | |
29 G : NetworkX graph | |
30 | |
31 Returns | |
32 ------- | |
33 Yields sets of nodes, one for each community. | |
34 | |
35 Examples | |
36 -------- | |
37 >>> from networkx.algorithms.community import greedy_modularity_communities | |
38 >>> G = nx.karate_club_graph() | |
39 >>> c = list(greedy_modularity_communities(G)) | |
40 >>> sorted(c[0]) | |
41 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] | |
42 | |
43 References | |
44 ---------- | |
45 .. [1] M. E. J Newman 'Networks: An Introduction', page 224 | |
46 Oxford University Press 2011. | |
47 .. [2] Clauset, A., Newman, M. E., & Moore, C. | |
48 "Finding community structure in very large networks." | |
49 Physical Review E 70(6), 2004. | |
50 """ | |
51 | |
52 # Count nodes and edges | |
53 N = len(G.nodes()) | |
54 m = sum([d.get("weight", 1) for u, v, d in G.edges(data=True)]) | |
55 q0 = 1.0 / (2.0 * m) | |
56 | |
57 # Map node labels to contiguous integers | |
58 label_for_node = {i: v for i, v in enumerate(G.nodes())} | |
59 node_for_label = {label_for_node[i]: i for i in range(N)} | |
60 | |
61 # Calculate degrees | |
62 k_for_label = G.degree(G.nodes(), weight=weight) | |
63 k = [k_for_label[label_for_node[i]] for i in range(N)] | |
64 | |
65 # Initialize community and merge lists | |
66 communities = {i: frozenset([i]) for i in range(N)} | |
67 merges = [] | |
68 | |
69 # Initial modularity | |
70 partition = [[label_for_node[x] for x in c] for c in communities.values()] | |
71 q_cnm = modularity(G, partition) | |
72 | |
73 # Initialize data structures | |
74 # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji) | |
75 # a[i]: fraction of edges within community i | |
76 # dq_dict[i][j]: dQ for merging community i, j | |
77 # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ | |
78 # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij) | |
79 a = [k[i] * q0 for i in range(N)] | |
80 dq_dict = { | |
81 i: { | |
82 j: 2 * q0 - 2 * k[i] * k[j] * q0 * q0 | |
83 for j in [node_for_label[u] for u in G.neighbors(label_for_node[i])] | |
84 if j != i | |
85 } | |
86 for i in range(N) | |
87 } | |
88 dq_heap = [ | |
89 MappedQueue([(-dq, i, j) for j, dq in dq_dict[i].items()]) for i in range(N) | |
90 ] | |
91 H = MappedQueue([dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0]) | |
92 | |
93 # Merge communities until we can't improve modularity | |
94 while len(H) > 1: | |
95 # Find best merge | |
96 # Remove from heap of row maxes | |
97 # Ties will be broken by choosing the pair with lowest min community id | |
98 try: | |
99 dq, i, j = H.pop() | |
100 except IndexError: | |
101 break | |
102 dq = -dq | |
103 # Remove best merge from row i heap | |
104 dq_heap[i].pop() | |
105 # Push new row max onto H | |
106 if len(dq_heap[i]) > 0: | |
107 H.push(dq_heap[i].h[0]) | |
108 # If this element was also at the root of row j, we need to remove the | |
109 # duplicate entry from H | |
110 if dq_heap[j].h[0] == (-dq, j, i): | |
111 H.remove((-dq, j, i)) | |
112 # Remove best merge from row j heap | |
113 dq_heap[j].remove((-dq, j, i)) | |
114 # Push new row max onto H | |
115 if len(dq_heap[j]) > 0: | |
116 H.push(dq_heap[j].h[0]) | |
117 else: | |
118 # Duplicate wasn't in H, just remove from row j heap | |
119 dq_heap[j].remove((-dq, j, i)) | |
120 # Stop when change is non-positive | |
121 if dq <= 0: | |
122 break | |
123 | |
124 # Perform merge | |
125 communities[j] = frozenset(communities[i] | communities[j]) | |
126 del communities[i] | |
127 merges.append((i, j, dq)) | |
128 # New modularity | |
129 q_cnm += dq | |
130 # Get list of communities connected to merged communities | |
131 i_set = set(dq_dict[i].keys()) | |
132 j_set = set(dq_dict[j].keys()) | |
133 all_set = (i_set | j_set) - {i, j} | |
134 both_set = i_set & j_set | |
135 # Merge i into j and update dQ | |
136 for k in all_set: | |
137 # Calculate new dq value | |
138 if k in both_set: | |
139 dq_jk = dq_dict[j][k] + dq_dict[i][k] | |
140 elif k in j_set: | |
141 dq_jk = dq_dict[j][k] - 2.0 * a[i] * a[k] | |
142 else: | |
143 # k in i_set | |
144 dq_jk = dq_dict[i][k] - 2.0 * a[j] * a[k] | |
145 # Update rows j and k | |
146 for row, col in [(j, k), (k, j)]: | |
147 # Save old value for finding heap index | |
148 if k in j_set: | |
149 d_old = (-dq_dict[row][col], row, col) | |
150 else: | |
151 d_old = None | |
152 # Update dict for j,k only (i is removed below) | |
153 dq_dict[row][col] = dq_jk | |
154 # Save old max of per-row heap | |
155 if len(dq_heap[row]) > 0: | |
156 d_oldmax = dq_heap[row].h[0] | |
157 else: | |
158 d_oldmax = None | |
159 # Add/update heaps | |
160 d = (-dq_jk, row, col) | |
161 if d_old is None: | |
162 # We're creating a new nonzero element, add to heap | |
163 dq_heap[row].push(d) | |
164 else: | |
165 # Update existing element in per-row heap | |
166 dq_heap[row].update(d_old, d) | |
167 # Update heap of row maxes if necessary | |
168 if d_oldmax is None: | |
169 # No entries previously in this row, push new max | |
170 H.push(d) | |
171 else: | |
172 # We've updated an entry in this row, has the max changed? | |
173 if dq_heap[row].h[0] != d_oldmax: | |
174 H.update(d_oldmax, dq_heap[row].h[0]) | |
175 | |
176 # Remove row/col i from matrix | |
177 i_neighbors = dq_dict[i].keys() | |
178 for k in i_neighbors: | |
179 # Remove from dict | |
180 dq_old = dq_dict[k][i] | |
181 del dq_dict[k][i] | |
182 # Remove from heaps if we haven't already | |
183 if k != j: | |
184 # Remove both row and column | |
185 for row, col in [(k, i), (i, k)]: | |
186 # Check if replaced dq is row max | |
187 d_old = (-dq_old, row, col) | |
188 if dq_heap[row].h[0] == d_old: | |
189 # Update per-row heap and heap of row maxes | |
190 dq_heap[row].remove(d_old) | |
191 H.remove(d_old) | |
192 # Update row max | |
193 if len(dq_heap[row]) > 0: | |
194 H.push(dq_heap[row].h[0]) | |
195 else: | |
196 # Only update per-row heap | |
197 dq_heap[row].remove(d_old) | |
198 | |
199 del dq_dict[i] | |
200 # Mark row i as deleted, but keep placeholder | |
201 dq_heap[i] = MappedQueue() | |
202 # Merge i into j and update a | |
203 a[j] += a[i] | |
204 a[i] = 0 | |
205 | |
206 communities = [ | |
207 frozenset([label_for_node[i] for i in c]) for c in communities.values() | |
208 ] | |
209 return sorted(communities, key=len, reverse=True) | |
210 | |
211 | |
212 def naive_greedy_modularity_communities(G): | |
213 """Find communities in graph using the greedy modularity maximization. | |
214 This implementation is O(n^4), much slower than alternatives, but it is | |
215 provided as an easy-to-understand reference implementation. | |
216 """ | |
217 # First create one community for each node | |
218 communities = list([frozenset([u]) for u in G.nodes()]) | |
219 # Track merges | |
220 merges = [] | |
221 # Greedily merge communities until no improvement is possible | |
222 old_modularity = None | |
223 new_modularity = modularity(G, communities) | |
224 while old_modularity is None or new_modularity > old_modularity: | |
225 # Save modularity for comparison | |
226 old_modularity = new_modularity | |
227 # Find best pair to merge | |
228 trial_communities = list(communities) | |
229 to_merge = None | |
230 for i, u in enumerate(communities): | |
231 for j, v in enumerate(communities): | |
232 # Skip i=j and empty communities | |
233 if j <= i or len(u) == 0 or len(v) == 0: | |
234 continue | |
235 # Merge communities u and v | |
236 trial_communities[j] = u | v | |
237 trial_communities[i] = frozenset([]) | |
238 trial_modularity = modularity(G, trial_communities) | |
239 if trial_modularity >= new_modularity: | |
240 # Check if strictly better or tie | |
241 if trial_modularity > new_modularity: | |
242 # Found new best, save modularity and group indexes | |
243 new_modularity = trial_modularity | |
244 to_merge = (i, j, new_modularity - old_modularity) | |
245 elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]): | |
246 # Break ties by choosing pair with lowest min id | |
247 new_modularity = trial_modularity | |
248 to_merge = (i, j, new_modularity - old_modularity) | |
249 # Un-merge | |
250 trial_communities[i] = u | |
251 trial_communities[j] = v | |
252 if to_merge is not None: | |
253 # If the best merge improves modularity, use it | |
254 merges.append(to_merge) | |
255 i, j, dq = to_merge | |
256 u, v = communities[i], communities[j] | |
257 communities[j] = u | v | |
258 communities[i] = frozenset([]) | |
259 # Remove empty communities and sort | |
260 communities = [c for c in communities if len(c) > 0] | |
261 yield from sorted(communities, key=lambda x: len(x), reverse=True) | |
262 | |
263 | |
264 # old name | |
265 _naive_greedy_modularity_communities = naive_greedy_modularity_communities |