comparison env/lib/python3.9/site-packages/networkx/algorithms/community/modularity_max.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # TODO:
2 # - Alter equations for weighted case
3 # - Write tests for weighted case
4 """Functions for detecting communities based on modularity.
5 """
6
7 from networkx.algorithms.community.quality import modularity
8
9 from networkx.utils.mapped_queue import MappedQueue
10
11 __all__ = [
12 "greedy_modularity_communities",
13 "naive_greedy_modularity_communities",
14 "_naive_greedy_modularity_communities",
15 ]
16
17
18 def greedy_modularity_communities(G, weight=None):
19 """Find communities in graph using Clauset-Newman-Moore greedy modularity
20 maximization. This method currently supports the Graph class and does not
21 consider edge weights.
22
23 Greedy modularity maximization begins with each node in its own community
24 and joins the pair of communities that most increases modularity until no
25 such pair exists.
26
27 Parameters
28 ----------
29 G : NetworkX graph
30
31 Returns
32 -------
33 Yields sets of nodes, one for each community.
34
35 Examples
36 --------
37 >>> from networkx.algorithms.community import greedy_modularity_communities
38 >>> G = nx.karate_club_graph()
39 >>> c = list(greedy_modularity_communities(G))
40 >>> sorted(c[0])
41 [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
42
43 References
44 ----------
45 .. [1] M. E. J Newman 'Networks: An Introduction', page 224
46 Oxford University Press 2011.
47 .. [2] Clauset, A., Newman, M. E., & Moore, C.
48 "Finding community structure in very large networks."
49 Physical Review E 70(6), 2004.
50 """
51
52 # Count nodes and edges
53 N = len(G.nodes())
54 m = sum([d.get("weight", 1) for u, v, d in G.edges(data=True)])
55 q0 = 1.0 / (2.0 * m)
56
57 # Map node labels to contiguous integers
58 label_for_node = {i: v for i, v in enumerate(G.nodes())}
59 node_for_label = {label_for_node[i]: i for i in range(N)}
60
61 # Calculate degrees
62 k_for_label = G.degree(G.nodes(), weight=weight)
63 k = [k_for_label[label_for_node[i]] for i in range(N)]
64
65 # Initialize community and merge lists
66 communities = {i: frozenset([i]) for i in range(N)}
67 merges = []
68
69 # Initial modularity
70 partition = [[label_for_node[x] for x in c] for c in communities.values()]
71 q_cnm = modularity(G, partition)
72
73 # Initialize data structures
74 # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji)
75 # a[i]: fraction of edges within community i
76 # dq_dict[i][j]: dQ for merging community i, j
77 # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ
78 # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij)
79 a = [k[i] * q0 for i in range(N)]
80 dq_dict = {
81 i: {
82 j: 2 * q0 - 2 * k[i] * k[j] * q0 * q0
83 for j in [node_for_label[u] for u in G.neighbors(label_for_node[i])]
84 if j != i
85 }
86 for i in range(N)
87 }
88 dq_heap = [
89 MappedQueue([(-dq, i, j) for j, dq in dq_dict[i].items()]) for i in range(N)
90 ]
91 H = MappedQueue([dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0])
92
93 # Merge communities until we can't improve modularity
94 while len(H) > 1:
95 # Find best merge
96 # Remove from heap of row maxes
97 # Ties will be broken by choosing the pair with lowest min community id
98 try:
99 dq, i, j = H.pop()
100 except IndexError:
101 break
102 dq = -dq
103 # Remove best merge from row i heap
104 dq_heap[i].pop()
105 # Push new row max onto H
106 if len(dq_heap[i]) > 0:
107 H.push(dq_heap[i].h[0])
108 # If this element was also at the root of row j, we need to remove the
109 # duplicate entry from H
110 if dq_heap[j].h[0] == (-dq, j, i):
111 H.remove((-dq, j, i))
112 # Remove best merge from row j heap
113 dq_heap[j].remove((-dq, j, i))
114 # Push new row max onto H
115 if len(dq_heap[j]) > 0:
116 H.push(dq_heap[j].h[0])
117 else:
118 # Duplicate wasn't in H, just remove from row j heap
119 dq_heap[j].remove((-dq, j, i))
120 # Stop when change is non-positive
121 if dq <= 0:
122 break
123
124 # Perform merge
125 communities[j] = frozenset(communities[i] | communities[j])
126 del communities[i]
127 merges.append((i, j, dq))
128 # New modularity
129 q_cnm += dq
130 # Get list of communities connected to merged communities
131 i_set = set(dq_dict[i].keys())
132 j_set = set(dq_dict[j].keys())
133 all_set = (i_set | j_set) - {i, j}
134 both_set = i_set & j_set
135 # Merge i into j and update dQ
136 for k in all_set:
137 # Calculate new dq value
138 if k in both_set:
139 dq_jk = dq_dict[j][k] + dq_dict[i][k]
140 elif k in j_set:
141 dq_jk = dq_dict[j][k] - 2.0 * a[i] * a[k]
142 else:
143 # k in i_set
144 dq_jk = dq_dict[i][k] - 2.0 * a[j] * a[k]
145 # Update rows j and k
146 for row, col in [(j, k), (k, j)]:
147 # Save old value for finding heap index
148 if k in j_set:
149 d_old = (-dq_dict[row][col], row, col)
150 else:
151 d_old = None
152 # Update dict for j,k only (i is removed below)
153 dq_dict[row][col] = dq_jk
154 # Save old max of per-row heap
155 if len(dq_heap[row]) > 0:
156 d_oldmax = dq_heap[row].h[0]
157 else:
158 d_oldmax = None
159 # Add/update heaps
160 d = (-dq_jk, row, col)
161 if d_old is None:
162 # We're creating a new nonzero element, add to heap
163 dq_heap[row].push(d)
164 else:
165 # Update existing element in per-row heap
166 dq_heap[row].update(d_old, d)
167 # Update heap of row maxes if necessary
168 if d_oldmax is None:
169 # No entries previously in this row, push new max
170 H.push(d)
171 else:
172 # We've updated an entry in this row, has the max changed?
173 if dq_heap[row].h[0] != d_oldmax:
174 H.update(d_oldmax, dq_heap[row].h[0])
175
176 # Remove row/col i from matrix
177 i_neighbors = dq_dict[i].keys()
178 for k in i_neighbors:
179 # Remove from dict
180 dq_old = dq_dict[k][i]
181 del dq_dict[k][i]
182 # Remove from heaps if we haven't already
183 if k != j:
184 # Remove both row and column
185 for row, col in [(k, i), (i, k)]:
186 # Check if replaced dq is row max
187 d_old = (-dq_old, row, col)
188 if dq_heap[row].h[0] == d_old:
189 # Update per-row heap and heap of row maxes
190 dq_heap[row].remove(d_old)
191 H.remove(d_old)
192 # Update row max
193 if len(dq_heap[row]) > 0:
194 H.push(dq_heap[row].h[0])
195 else:
196 # Only update per-row heap
197 dq_heap[row].remove(d_old)
198
199 del dq_dict[i]
200 # Mark row i as deleted, but keep placeholder
201 dq_heap[i] = MappedQueue()
202 # Merge i into j and update a
203 a[j] += a[i]
204 a[i] = 0
205
206 communities = [
207 frozenset([label_for_node[i] for i in c]) for c in communities.values()
208 ]
209 return sorted(communities, key=len, reverse=True)
210
211
212 def naive_greedy_modularity_communities(G):
213 """Find communities in graph using the greedy modularity maximization.
214 This implementation is O(n^4), much slower than alternatives, but it is
215 provided as an easy-to-understand reference implementation.
216 """
217 # First create one community for each node
218 communities = list([frozenset([u]) for u in G.nodes()])
219 # Track merges
220 merges = []
221 # Greedily merge communities until no improvement is possible
222 old_modularity = None
223 new_modularity = modularity(G, communities)
224 while old_modularity is None or new_modularity > old_modularity:
225 # Save modularity for comparison
226 old_modularity = new_modularity
227 # Find best pair to merge
228 trial_communities = list(communities)
229 to_merge = None
230 for i, u in enumerate(communities):
231 for j, v in enumerate(communities):
232 # Skip i=j and empty communities
233 if j <= i or len(u) == 0 or len(v) == 0:
234 continue
235 # Merge communities u and v
236 trial_communities[j] = u | v
237 trial_communities[i] = frozenset([])
238 trial_modularity = modularity(G, trial_communities)
239 if trial_modularity >= new_modularity:
240 # Check if strictly better or tie
241 if trial_modularity > new_modularity:
242 # Found new best, save modularity and group indexes
243 new_modularity = trial_modularity
244 to_merge = (i, j, new_modularity - old_modularity)
245 elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]):
246 # Break ties by choosing pair with lowest min id
247 new_modularity = trial_modularity
248 to_merge = (i, j, new_modularity - old_modularity)
249 # Un-merge
250 trial_communities[i] = u
251 trial_communities[j] = v
252 if to_merge is not None:
253 # If the best merge improves modularity, use it
254 merges.append(to_merge)
255 i, j, dq = to_merge
256 u, v = communities[i], communities[j]
257 communities[j] = u | v
258 communities[i] = frozenset([])
259 # Remove empty communities and sort
260 communities = [c for c in communities if len(c) > 0]
261 yield from sorted(communities, key=lambda x: len(x), reverse=True)
262
263
264 # old name
265 _naive_greedy_modularity_communities = naive_greedy_modularity_communities