Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/bipartite/cluster.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Functions for computing clustering of pairs | |
2 | |
3 """ | |
4 | |
5 import itertools | |
6 import networkx as nx | |
7 | |
8 __all__ = [ | |
9 "clustering", | |
10 "average_clustering", | |
11 "latapy_clustering", | |
12 "robins_alexander_clustering", | |
13 ] | |
14 | |
15 | |
16 def cc_dot(nu, nv): | |
17 return float(len(nu & nv)) / len(nu | nv) | |
18 | |
19 | |
20 def cc_max(nu, nv): | |
21 return float(len(nu & nv)) / max(len(nu), len(nv)) | |
22 | |
23 | |
24 def cc_min(nu, nv): | |
25 return float(len(nu & nv)) / min(len(nu), len(nv)) | |
26 | |
27 | |
28 modes = {"dot": cc_dot, "min": cc_min, "max": cc_max} | |
29 | |
30 | |
31 def latapy_clustering(G, nodes=None, mode="dot"): | |
32 r"""Compute a bipartite clustering coefficient for nodes. | |
33 | |
34 The bipartie clustering coefficient is a measure of local density | |
35 of connections defined as [1]_: | |
36 | |
37 .. math:: | |
38 | |
39 c_u = \frac{\sum_{v \in N(N(u))} c_{uv} }{|N(N(u))|} | |
40 | |
41 where `N(N(u))` are the second order neighbors of `u` in `G` excluding `u`, | |
42 and `c_{uv}` is the pairwise clustering coefficient between nodes | |
43 `u` and `v`. | |
44 | |
45 The mode selects the function for `c_{uv}` which can be: | |
46 | |
47 `dot`: | |
48 | |
49 .. math:: | |
50 | |
51 c_{uv}=\frac{|N(u)\cap N(v)|}{|N(u) \cup N(v)|} | |
52 | |
53 `min`: | |
54 | |
55 .. math:: | |
56 | |
57 c_{uv}=\frac{|N(u)\cap N(v)|}{min(|N(u)|,|N(v)|)} | |
58 | |
59 `max`: | |
60 | |
61 .. math:: | |
62 | |
63 c_{uv}=\frac{|N(u)\cap N(v)|}{max(|N(u)|,|N(v)|)} | |
64 | |
65 | |
66 Parameters | |
67 ---------- | |
68 G : graph | |
69 A bipartite graph | |
70 | |
71 nodes : list or iterable (optional) | |
72 Compute bipartite clustering for these nodes. The default | |
73 is all nodes in G. | |
74 | |
75 mode : string | |
76 The pariwise bipartite clustering method to be used in the computation. | |
77 It must be "dot", "max", or "min". | |
78 | |
79 Returns | |
80 ------- | |
81 clustering : dictionary | |
82 A dictionary keyed by node with the clustering coefficient value. | |
83 | |
84 | |
85 Examples | |
86 -------- | |
87 >>> from networkx.algorithms import bipartite | |
88 >>> G = nx.path_graph(4) # path graphs are bipartite | |
89 >>> c = bipartite.clustering(G) | |
90 >>> c[0] | |
91 0.5 | |
92 >>> c = bipartite.clustering(G, mode="min") | |
93 >>> c[0] | |
94 1.0 | |
95 | |
96 See Also | |
97 -------- | |
98 robins_alexander_clustering | |
99 square_clustering | |
100 average_clustering | |
101 | |
102 References | |
103 ---------- | |
104 .. [1] Latapy, Matthieu, Clémence Magnien, and Nathalie Del Vecchio (2008). | |
105 Basic notions for the analysis of large two-mode networks. | |
106 Social Networks 30(1), 31--48. | |
107 """ | |
108 if not nx.algorithms.bipartite.is_bipartite(G): | |
109 raise nx.NetworkXError("Graph is not bipartite") | |
110 | |
111 try: | |
112 cc_func = modes[mode] | |
113 except KeyError as e: | |
114 raise nx.NetworkXError( | |
115 "Mode for bipartite clustering must be: dot, min or max" | |
116 ) from e | |
117 | |
118 if nodes is None: | |
119 nodes = G | |
120 ccs = {} | |
121 for v in nodes: | |
122 cc = 0.0 | |
123 nbrs2 = {u for nbr in G[v] for u in G[nbr]} - {v} | |
124 for u in nbrs2: | |
125 cc += cc_func(set(G[u]), set(G[v])) | |
126 if cc > 0.0: # len(nbrs2)>0 | |
127 cc /= len(nbrs2) | |
128 ccs[v] = cc | |
129 return ccs | |
130 | |
131 | |
132 clustering = latapy_clustering | |
133 | |
134 | |
135 def average_clustering(G, nodes=None, mode="dot"): | |
136 r"""Compute the average bipartite clustering coefficient. | |
137 | |
138 A clustering coefficient for the whole graph is the average, | |
139 | |
140 .. math:: | |
141 | |
142 C = \frac{1}{n}\sum_{v \in G} c_v, | |
143 | |
144 where `n` is the number of nodes in `G`. | |
145 | |
146 Similar measures for the two bipartite sets can be defined [1]_ | |
147 | |
148 .. math:: | |
149 | |
150 C_X = \frac{1}{|X|}\sum_{v \in X} c_v, | |
151 | |
152 where `X` is a bipartite set of `G`. | |
153 | |
154 Parameters | |
155 ---------- | |
156 G : graph | |
157 a bipartite graph | |
158 | |
159 nodes : list or iterable, optional | |
160 A container of nodes to use in computing the average. | |
161 The nodes should be either the entire graph (the default) or one of the | |
162 bipartite sets. | |
163 | |
164 mode : string | |
165 The pariwise bipartite clustering method. | |
166 It must be "dot", "max", or "min" | |
167 | |
168 Returns | |
169 ------- | |
170 clustering : float | |
171 The average bipartite clustering for the given set of nodes or the | |
172 entire graph if no nodes are specified. | |
173 | |
174 Examples | |
175 -------- | |
176 >>> from networkx.algorithms import bipartite | |
177 >>> G = nx.star_graph(3) # star graphs are bipartite | |
178 >>> bipartite.average_clustering(G) | |
179 0.75 | |
180 >>> X, Y = bipartite.sets(G) | |
181 >>> bipartite.average_clustering(G, X) | |
182 0.0 | |
183 >>> bipartite.average_clustering(G, Y) | |
184 1.0 | |
185 | |
186 See Also | |
187 -------- | |
188 clustering | |
189 | |
190 Notes | |
191 ----- | |
192 The container of nodes passed to this function must contain all of the nodes | |
193 in one of the bipartite sets ("top" or "bottom") in order to compute | |
194 the correct average bipartite clustering coefficients. | |
195 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` | |
196 for further details on how bipartite graphs are handled in NetworkX. | |
197 | |
198 | |
199 References | |
200 ---------- | |
201 .. [1] Latapy, Matthieu, Clémence Magnien, and Nathalie Del Vecchio (2008). | |
202 Basic notions for the analysis of large two-mode networks. | |
203 Social Networks 30(1), 31--48. | |
204 """ | |
205 if nodes is None: | |
206 nodes = G | |
207 ccs = latapy_clustering(G, nodes=nodes, mode=mode) | |
208 return float(sum(ccs[v] for v in nodes)) / len(nodes) | |
209 | |
210 | |
211 def robins_alexander_clustering(G): | |
212 r"""Compute the bipartite clustering of G. | |
213 | |
214 Robins and Alexander [1]_ defined bipartite clustering coefficient as | |
215 four times the number of four cycles `C_4` divided by the number of | |
216 three paths `L_3` in a bipartite graph: | |
217 | |
218 .. math:: | |
219 | |
220 CC_4 = \frac{4 * C_4}{L_3} | |
221 | |
222 Parameters | |
223 ---------- | |
224 G : graph | |
225 a bipartite graph | |
226 | |
227 Returns | |
228 ------- | |
229 clustering : float | |
230 The Robins and Alexander bipartite clustering for the input graph. | |
231 | |
232 Examples | |
233 -------- | |
234 >>> from networkx.algorithms import bipartite | |
235 >>> G = nx.davis_southern_women_graph() | |
236 >>> print(round(bipartite.robins_alexander_clustering(G), 3)) | |
237 0.468 | |
238 | |
239 See Also | |
240 -------- | |
241 latapy_clustering | |
242 square_clustering | |
243 | |
244 References | |
245 ---------- | |
246 .. [1] Robins, G. and M. Alexander (2004). Small worlds among interlocking | |
247 directors: Network structure and distance in bipartite graphs. | |
248 Computational & Mathematical Organization Theory 10(1), 69–94. | |
249 | |
250 """ | |
251 if G.order() < 4 or G.size() < 3: | |
252 return 0 | |
253 L_3 = _threepaths(G) | |
254 if L_3 == 0: | |
255 return 0 | |
256 C_4 = _four_cycles(G) | |
257 return (4.0 * C_4) / L_3 | |
258 | |
259 | |
260 def _four_cycles(G): | |
261 cycles = 0 | |
262 for v in G: | |
263 for u, w in itertools.combinations(G[v], 2): | |
264 cycles += len((set(G[u]) & set(G[w])) - {v}) | |
265 return cycles / 4 | |
266 | |
267 | |
268 def _threepaths(G): | |
269 paths = 0 | |
270 for v in G: | |
271 for u in G[v]: | |
272 for w in set(G[u]) - {v}: | |
273 paths += len(set(G[w]) - {v, u}) | |
274 # Divide by two because we count each three path twice | |
275 # one for each possible starting point | |
276 return paths / 2 |