## Mercurial > repos > shellac > sam_consensus_v3

### comparison env/lib/python3.9/site-packages/networkx/algorithms/approximation/kcomponents.py @ 0:4f3585e2f14b draft default tip

Find changesets by keywords (author, files, the commit message), revision
number or hash, or revset expression.

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"

author | shellac |
---|---|

date | Mon, 22 Mar 2021 18:12:50 +0000 |

parents | |

children |

comparison

equal
deleted
inserted
replaced

-1:000000000000 | 0:4f3585e2f14b |
---|---|

1 """ Fast approximation for k-component structure | |

2 """ | |

3 import itertools | |

4 from collections import defaultdict | |

5 from collections.abc import Mapping | |

6 | |

7 import networkx as nx | |

8 from networkx.exception import NetworkXError | |

9 from networkx.utils import not_implemented_for | |

10 | |

11 from networkx.algorithms.approximation import local_node_connectivity | |

12 | |

13 | |

14 __all__ = ["k_components"] | |

15 | |

16 | |

17 not_implemented_for("directed") | |

18 | |

19 | |

20 def k_components(G, min_density=0.95): | |

21 r"""Returns the approximate k-component structure of a graph G. | |

22 | |

23 A `k`-component is a maximal subgraph of a graph G that has, at least, | |

24 node connectivity `k`: we need to remove at least `k` nodes to break it | |

25 into more components. `k`-components have an inherent hierarchical | |

26 structure because they are nested in terms of connectivity: a connected | |

27 graph can contain several 2-components, each of which can contain | |

28 one or more 3-components, and so forth. | |

29 | |

30 This implementation is based on the fast heuristics to approximate | |

31 the `k`-component structure of a graph [1]_. Which, in turn, it is based on | |

32 a fast approximation algorithm for finding good lower bounds of the number | |

33 of node independent paths between two nodes [2]_. | |

34 | |

35 Parameters | |

36 ---------- | |

37 G : NetworkX graph | |

38 Undirected graph | |

39 | |

40 min_density : Float | |

41 Density relaxation threshold. Default value 0.95 | |

42 | |

43 Returns | |

44 ------- | |

45 k_components : dict | |

46 Dictionary with connectivity level `k` as key and a list of | |

47 sets of nodes that form a k-component of level `k` as values. | |

48 | |

49 | |

50 Examples | |

51 -------- | |

52 >>> # Petersen graph has 10 nodes and it is triconnected, thus all | |

53 >>> # nodes are in a single component on all three connectivity levels | |

54 >>> from networkx.algorithms import approximation as apxa | |

55 >>> G = nx.petersen_graph() | |

56 >>> k_components = apxa.k_components(G) | |

57 | |

58 Notes | |

59 ----- | |

60 The logic of the approximation algorithm for computing the `k`-component | |

61 structure [1]_ is based on repeatedly applying simple and fast algorithms | |

62 for `k`-cores and biconnected components in order to narrow down the | |

63 number of pairs of nodes over which we have to compute White and Newman's | |

64 approximation algorithm for finding node independent paths [2]_. More | |

65 formally, this algorithm is based on Whitney's theorem, which states | |

66 an inclusion relation among node connectivity, edge connectivity, and | |

67 minimum degree for any graph G. This theorem implies that every | |

68 `k`-component is nested inside a `k`-edge-component, which in turn, | |

69 is contained in a `k`-core. Thus, this algorithm computes node independent | |

70 paths among pairs of nodes in each biconnected part of each `k`-core, | |

71 and repeats this procedure for each `k` from 3 to the maximal core number | |

72 of a node in the input graph. | |

73 | |

74 Because, in practice, many nodes of the core of level `k` inside a | |

75 bicomponent actually are part of a component of level k, the auxiliary | |

76 graph needed for the algorithm is likely to be very dense. Thus, we use | |

77 a complement graph data structure (see `AntiGraph`) to save memory. | |

78 AntiGraph only stores information of the edges that are *not* present | |

79 in the actual auxiliary graph. When applying algorithms to this | |

80 complement graph data structure, it behaves as if it were the dense | |

81 version. | |

82 | |

83 See also | |

84 -------- | |

85 k_components | |

86 | |

87 References | |

88 ---------- | |

89 .. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion: | |

90 Visualization and Heuristics for Fast Computation. | |

91 https://arxiv.org/pdf/1503.04476v1 | |

92 | |

93 .. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for | |

94 Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035 | |

95 http://eclectic.ss.uci.edu/~drwhite/working.pdf | |

96 | |

97 .. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness: | |

98 A hierarchical conception of social groups. | |

99 American Sociological Review 68(1), 103--28. | |

100 http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf | |

101 | |

102 """ | |

103 # Dictionary with connectivity level (k) as keys and a list of | |

104 # sets of nodes that form a k-component as values | |

105 k_components = defaultdict(list) | |

106 # make a few functions local for speed | |

107 node_connectivity = local_node_connectivity | |

108 k_core = nx.k_core | |

109 core_number = nx.core_number | |

110 biconnected_components = nx.biconnected_components | |

111 density = nx.density | |

112 combinations = itertools.combinations | |

113 # Exact solution for k = {1,2} | |

114 # There is a linear time algorithm for triconnectivity, if we had an | |

115 # implementation available we could start from k = 4. | |

116 for component in nx.connected_components(G): | |

117 # isolated nodes have connectivity 0 | |

118 comp = set(component) | |

119 if len(comp) > 1: | |

120 k_components[1].append(comp) | |

121 for bicomponent in nx.biconnected_components(G): | |

122 # avoid considering dyads as bicomponents | |

123 bicomp = set(bicomponent) | |

124 if len(bicomp) > 2: | |

125 k_components[2].append(bicomp) | |

126 # There is no k-component of k > maximum core number | |

127 # \kappa(G) <= \lambda(G) <= \delta(G) | |

128 g_cnumber = core_number(G) | |

129 max_core = max(g_cnumber.values()) | |

130 for k in range(3, max_core + 1): | |

131 C = k_core(G, k, core_number=g_cnumber) | |

132 for nodes in biconnected_components(C): | |

133 # Build a subgraph SG induced by the nodes that are part of | |

134 # each biconnected component of the k-core subgraph C. | |

135 if len(nodes) < k: | |

136 continue | |

137 SG = G.subgraph(nodes) | |

138 # Build auxiliary graph | |

139 H = _AntiGraph() | |

140 H.add_nodes_from(SG.nodes()) | |

141 for u, v in combinations(SG, 2): | |

142 K = node_connectivity(SG, u, v, cutoff=k) | |

143 if k > K: | |

144 H.add_edge(u, v) | |

145 for h_nodes in biconnected_components(H): | |

146 if len(h_nodes) <= k: | |

147 continue | |

148 SH = H.subgraph(h_nodes) | |

149 for Gc in _cliques_heuristic(SG, SH, k, min_density): | |

150 for k_nodes in biconnected_components(Gc): | |

151 Gk = nx.k_core(SG.subgraph(k_nodes), k) | |

152 if len(Gk) <= k: | |

153 continue | |

154 k_components[k].append(set(Gk)) | |

155 return k_components | |

156 | |

157 | |

158 def _cliques_heuristic(G, H, k, min_density): | |

159 h_cnumber = nx.core_number(H) | |

160 for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): | |

161 cands = {n for n, c in h_cnumber.items() if c == c_value} | |

162 # Skip checking for overlap for the highest core value | |

163 if i == 0: | |

164 overlap = False | |

165 else: | |

166 overlap = set.intersection( | |

167 *[{x for x in H[n] if x not in cands} for n in cands] | |

168 ) | |

169 if overlap and len(overlap) < k: | |

170 SH = H.subgraph(cands | overlap) | |

171 else: | |

172 SH = H.subgraph(cands) | |

173 sh_cnumber = nx.core_number(SH) | |

174 SG = nx.k_core(G.subgraph(SH), k) | |

175 while not (_same(sh_cnumber) and nx.density(SH) >= min_density): | |

176 # This subgraph must be writable => .copy() | |

177 SH = H.subgraph(SG).copy() | |

178 if len(SH) <= k: | |

179 break | |

180 sh_cnumber = nx.core_number(SH) | |

181 sh_deg = dict(SH.degree()) | |

182 min_deg = min(sh_deg.values()) | |

183 SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) | |

184 SG = nx.k_core(G.subgraph(SH), k) | |

185 else: | |

186 yield SG | |

187 | |

188 | |

189 def _same(measure, tol=0): | |

190 vals = set(measure.values()) | |

191 if (max(vals) - min(vals)) <= tol: | |

192 return True | |

193 return False | |

194 | |

195 | |

196 class _AntiGraph(nx.Graph): | |

197 """ | |

198 Class for complement graphs. | |

199 | |

200 The main goal is to be able to work with big and dense graphs with | |

201 a low memory foodprint. | |

202 | |

203 In this class you add the edges that *do not exist* in the dense graph, | |

204 the report methods of the class return the neighbors, the edges and | |

205 the degree as if it was the dense graph. Thus it's possible to use | |

206 an instance of this class with some of NetworkX functions. In this | |

207 case we only use k-core, connected_components, and biconnected_components. | |

208 """ | |

209 | |

210 all_edge_dict = {"weight": 1} | |

211 | |

212 def single_edge_dict(self): | |

213 return self.all_edge_dict | |

214 | |

215 edge_attr_dict_factory = single_edge_dict | |

216 | |

217 def __getitem__(self, n): | |

218 """Returns a dict of neighbors of node n in the dense graph. | |

219 | |

220 Parameters | |

221 ---------- | |

222 n : node | |

223 A node in the graph. | |

224 | |

225 Returns | |

226 ------- | |

227 adj_dict : dictionary | |

228 The adjacency dictionary for nodes connected to n. | |

229 | |

230 """ | |

231 all_edge_dict = self.all_edge_dict | |

232 return { | |

233 node: all_edge_dict for node in set(self._adj) - set(self._adj[n]) - {n} | |

234 } | |

235 | |

236 def neighbors(self, n): | |

237 """Returns an iterator over all neighbors of node n in the | |

238 dense graph. | |

239 """ | |

240 try: | |

241 return iter(set(self._adj) - set(self._adj[n]) - {n}) | |

242 except KeyError as e: | |

243 raise NetworkXError(f"The node {n} is not in the graph.") from e | |

244 | |

245 class AntiAtlasView(Mapping): | |

246 """An adjacency inner dict for AntiGraph""" | |

247 | |

248 def __init__(self, graph, node): | |

249 self._graph = graph | |

250 self._atlas = graph._adj[node] | |

251 self._node = node | |

252 | |

253 def __len__(self): | |

254 return len(self._graph) - len(self._atlas) - 1 | |

255 | |

256 def __iter__(self): | |

257 return (n for n in self._graph if n not in self._atlas and n != self._node) | |

258 | |

259 def __getitem__(self, nbr): | |

260 nbrs = set(self._graph._adj) - set(self._atlas) - {self._node} | |

261 if nbr in nbrs: | |

262 return self._graph.all_edge_dict | |

263 raise KeyError(nbr) | |

264 | |

265 class AntiAdjacencyView(AntiAtlasView): | |

266 """An adjacency outer dict for AntiGraph""" | |

267 | |

268 def __init__(self, graph): | |

269 self._graph = graph | |

270 self._atlas = graph._adj | |

271 | |

272 def __len__(self): | |

273 return len(self._atlas) | |

274 | |

275 def __iter__(self): | |

276 return iter(self._graph) | |

277 | |

278 def __getitem__(self, node): | |

279 if node not in self._graph: | |

280 raise KeyError(node) | |

281 return self._graph.AntiAtlasView(self._graph, node) | |

282 | |

283 @property | |

284 def adj(self): | |

285 return self.AntiAdjacencyView(self) | |

286 | |

287 def subgraph(self, nodes): | |

288 """This subgraph method returns a full AntiGraph. Not a View""" | |

289 nodes = set(nodes) | |

290 G = _AntiGraph() | |

291 G.add_nodes_from(nodes) | |

292 for n in G: | |

293 Gnbrs = G.adjlist_inner_dict_factory() | |

294 G._adj[n] = Gnbrs | |

295 for nbr, d in self._adj[n].items(): | |

296 if nbr in G._adj: | |

297 Gnbrs[nbr] = d | |

298 G._adj[nbr][n] = d | |

299 G.graph = self.graph | |

300 return G | |

301 | |

302 class AntiDegreeView(nx.reportviews.DegreeView): | |

303 def __iter__(self): | |

304 all_nodes = set(self._succ) | |

305 for n in self._nodes: | |

306 nbrs = all_nodes - set(self._succ[n]) - {n} | |

307 yield (n, len(nbrs)) | |

308 | |

309 def __getitem__(self, n): | |

310 nbrs = set(self._succ) - set(self._succ[n]) - {n} | |

311 # AntiGraph is a ThinGraph so all edges have weight 1 | |

312 return len(nbrs) + (n in nbrs) | |

313 | |

314 @property | |

315 def degree(self): | |

316 """Returns an iterator for (node, degree) and degree for single node. | |

317 | |

318 The node degree is the number of edges adjacent to the node. | |

319 | |

320 Parameters | |

321 ---------- | |

322 nbunch : iterable container, optional (default=all nodes) | |

323 A container of nodes. The container will be iterated | |

324 through once. | |

325 | |

326 weight : string or None, optional (default=None) | |

327 The edge attribute that holds the numerical value used | |

328 as a weight. If None, then each edge has weight 1. | |

329 The degree is the sum of the edge weights adjacent to the node. | |

330 | |

331 Returns | |

332 ------- | |

333 deg: | |

334 Degree of the node, if a single node is passed as argument. | |

335 nd_iter : an iterator | |

336 The iterator returns two-tuples of (node, degree). | |

337 | |

338 See Also | |

339 -------- | |

340 degree | |

341 | |

342 Examples | |

343 -------- | |

344 >>> G = nx.path_graph(4) | |

345 >>> G.degree(0) # node 0 with degree 1 | |

346 1 | |

347 >>> list(G.degree([0, 1])) | |

348 [(0, 1), (1, 2)] | |

349 | |

350 """ | |

351 return self.AntiDegreeView(self) | |

352 | |

353 def adjacency(self): | |

354 """Returns an iterator of (node, adjacency set) tuples for all nodes | |

355 in the dense graph. | |

356 | |

357 This is the fastest way to look at every edge. | |

358 For directed graphs, only outgoing adjacencies are included. | |

359 | |

360 Returns | |

361 ------- | |

362 adj_iter : iterator | |

363 An iterator of (node, adjacency set) for all nodes in | |

364 the graph. | |

365 | |

366 """ | |

367 for n in self._adj: | |

368 yield (n, set(self._adj) - set(self._adj[n]) - {n}) |