Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/tree/mst.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ | |
2 Algorithms for calculating min/max spanning trees/forests. | |
3 | |
4 """ | |
5 from heapq import heappop, heappush | |
6 from operator import itemgetter | |
7 from itertools import count | |
8 from math import isnan | |
9 | |
10 import networkx as nx | |
11 from networkx.utils import UnionFind, not_implemented_for | |
12 | |
13 __all__ = [ | |
14 "minimum_spanning_edges", | |
15 "maximum_spanning_edges", | |
16 "minimum_spanning_tree", | |
17 "maximum_spanning_tree", | |
18 ] | |
19 | |
20 | |
21 @not_implemented_for("multigraph") | |
22 def boruvka_mst_edges( | |
23 G, minimum=True, weight="weight", keys=False, data=True, ignore_nan=False | |
24 ): | |
25 """Iterate over edges of a Borůvka's algorithm min/max spanning tree. | |
26 | |
27 Parameters | |
28 ---------- | |
29 G : NetworkX Graph | |
30 The edges of `G` must have distinct weights, | |
31 otherwise the edges may not form a tree. | |
32 | |
33 minimum : bool (default: True) | |
34 Find the minimum (True) or maximum (False) spanning tree. | |
35 | |
36 weight : string (default: 'weight') | |
37 The name of the edge attribute holding the edge weights. | |
38 | |
39 keys : bool (default: True) | |
40 This argument is ignored since this function is not | |
41 implemented for multigraphs; it exists only for consistency | |
42 with the other minimum spanning tree functions. | |
43 | |
44 data : bool (default: True) | |
45 Flag for whether to yield edge attribute dicts. | |
46 If True, yield edges `(u, v, d)`, where `d` is the attribute dict. | |
47 If False, yield edges `(u, v)`. | |
48 | |
49 ignore_nan : bool (default: False) | |
50 If a NaN is found as an edge weight normally an exception is raised. | |
51 If `ignore_nan is True` then that edge is ignored instead. | |
52 | |
53 """ | |
54 # Initialize a forest, assuming initially that it is the discrete | |
55 # partition of the nodes of the graph. | |
56 forest = UnionFind(G) | |
57 | |
58 def best_edge(component): | |
59 """Returns the optimum (minimum or maximum) edge on the edge | |
60 boundary of the given set of nodes. | |
61 | |
62 A return value of ``None`` indicates an empty boundary. | |
63 | |
64 """ | |
65 sign = 1 if minimum else -1 | |
66 minwt = float("inf") | |
67 boundary = None | |
68 for e in nx.edge_boundary(G, component, data=True): | |
69 wt = e[-1].get(weight, 1) * sign | |
70 if isnan(wt): | |
71 if ignore_nan: | |
72 continue | |
73 msg = f"NaN found as an edge weight. Edge {e}" | |
74 raise ValueError(msg) | |
75 if wt < minwt: | |
76 minwt = wt | |
77 boundary = e | |
78 return boundary | |
79 | |
80 # Determine the optimum edge in the edge boundary of each component | |
81 # in the forest. | |
82 best_edges = (best_edge(component) for component in forest.to_sets()) | |
83 best_edges = [edge for edge in best_edges if edge is not None] | |
84 # If each entry was ``None``, that means the graph was disconnected, | |
85 # so we are done generating the forest. | |
86 while best_edges: | |
87 # Determine the optimum edge in the edge boundary of each | |
88 # component in the forest. | |
89 # | |
90 # This must be a sequence, not an iterator. In this list, the | |
91 # same edge may appear twice, in different orientations (but | |
92 # that's okay, since a union operation will be called on the | |
93 # endpoints the first time it is seen, but not the second time). | |
94 # | |
95 # Any ``None`` indicates that the edge boundary for that | |
96 # component was empty, so that part of the forest has been | |
97 # completed. | |
98 # | |
99 # TODO This can be parallelized, both in the outer loop over | |
100 # each component in the forest and in the computation of the | |
101 # minimum. (Same goes for the identical lines outside the loop.) | |
102 best_edges = (best_edge(component) for component in forest.to_sets()) | |
103 best_edges = [edge for edge in best_edges if edge is not None] | |
104 # Join trees in the forest using the best edges, and yield that | |
105 # edge, since it is part of the spanning tree. | |
106 # | |
107 # TODO This loop can be parallelized, to an extent (the union | |
108 # operation must be atomic). | |
109 for u, v, d in best_edges: | |
110 if forest[u] != forest[v]: | |
111 if data: | |
112 yield u, v, d | |
113 else: | |
114 yield u, v | |
115 forest.union(u, v) | |
116 | |
117 | |
118 def kruskal_mst_edges( | |
119 G, minimum, weight="weight", keys=True, data=True, ignore_nan=False | |
120 ): | |
121 """Iterate over edges of a Kruskal's algorithm min/max spanning tree. | |
122 | |
123 Parameters | |
124 ---------- | |
125 G : NetworkX Graph | |
126 The graph holding the tree of interest. | |
127 | |
128 minimum : bool (default: True) | |
129 Find the minimum (True) or maximum (False) spanning tree. | |
130 | |
131 weight : string (default: 'weight') | |
132 The name of the edge attribute holding the edge weights. | |
133 | |
134 keys : bool (default: True) | |
135 If `G` is a multigraph, `keys` controls whether edge keys ar yielded. | |
136 Otherwise `keys` is ignored. | |
137 | |
138 data : bool (default: True) | |
139 Flag for whether to yield edge attribute dicts. | |
140 If True, yield edges `(u, v, d)`, where `d` is the attribute dict. | |
141 If False, yield edges `(u, v)`. | |
142 | |
143 ignore_nan : bool (default: False) | |
144 If a NaN is found as an edge weight normally an exception is raised. | |
145 If `ignore_nan is True` then that edge is ignored instead. | |
146 | |
147 """ | |
148 subtrees = UnionFind() | |
149 if G.is_multigraph(): | |
150 edges = G.edges(keys=True, data=True) | |
151 | |
152 def filter_nan_edges(edges=edges, weight=weight): | |
153 sign = 1 if minimum else -1 | |
154 for u, v, k, d in edges: | |
155 wt = d.get(weight, 1) * sign | |
156 if isnan(wt): | |
157 if ignore_nan: | |
158 continue | |
159 msg = f"NaN found as an edge weight. Edge {(u, v, k, d)}" | |
160 raise ValueError(msg) | |
161 yield wt, u, v, k, d | |
162 | |
163 else: | |
164 edges = G.edges(data=True) | |
165 | |
166 def filter_nan_edges(edges=edges, weight=weight): | |
167 sign = 1 if minimum else -1 | |
168 for u, v, d in edges: | |
169 wt = d.get(weight, 1) * sign | |
170 if isnan(wt): | |
171 if ignore_nan: | |
172 continue | |
173 msg = f"NaN found as an edge weight. Edge {(u, v, d)}" | |
174 raise ValueError(msg) | |
175 yield wt, u, v, d | |
176 | |
177 edges = sorted(filter_nan_edges(), key=itemgetter(0)) | |
178 # Multigraphs need to handle edge keys in addition to edge data. | |
179 if G.is_multigraph(): | |
180 for wt, u, v, k, d in edges: | |
181 if subtrees[u] != subtrees[v]: | |
182 if keys: | |
183 if data: | |
184 yield u, v, k, d | |
185 else: | |
186 yield u, v, k | |
187 else: | |
188 if data: | |
189 yield u, v, d | |
190 else: | |
191 yield u, v | |
192 subtrees.union(u, v) | |
193 else: | |
194 for wt, u, v, d in edges: | |
195 if subtrees[u] != subtrees[v]: | |
196 if data: | |
197 yield (u, v, d) | |
198 else: | |
199 yield (u, v) | |
200 subtrees.union(u, v) | |
201 | |
202 | |
203 def prim_mst_edges(G, minimum, weight="weight", keys=True, data=True, ignore_nan=False): | |
204 """Iterate over edges of Prim's algorithm min/max spanning tree. | |
205 | |
206 Parameters | |
207 ---------- | |
208 G : NetworkX Graph | |
209 The graph holding the tree of interest. | |
210 | |
211 minimum : bool (default: True) | |
212 Find the minimum (True) or maximum (False) spanning tree. | |
213 | |
214 weight : string (default: 'weight') | |
215 The name of the edge attribute holding the edge weights. | |
216 | |
217 keys : bool (default: True) | |
218 If `G` is a multigraph, `keys` controls whether edge keys ar yielded. | |
219 Otherwise `keys` is ignored. | |
220 | |
221 data : bool (default: True) | |
222 Flag for whether to yield edge attribute dicts. | |
223 If True, yield edges `(u, v, d)`, where `d` is the attribute dict. | |
224 If False, yield edges `(u, v)`. | |
225 | |
226 ignore_nan : bool (default: False) | |
227 If a NaN is found as an edge weight normally an exception is raised. | |
228 If `ignore_nan is True` then that edge is ignored instead. | |
229 | |
230 """ | |
231 is_multigraph = G.is_multigraph() | |
232 push = heappush | |
233 pop = heappop | |
234 | |
235 nodes = set(G) | |
236 c = count() | |
237 | |
238 sign = 1 if minimum else -1 | |
239 | |
240 while nodes: | |
241 u = nodes.pop() | |
242 frontier = [] | |
243 visited = {u} | |
244 if is_multigraph: | |
245 for v, keydict in G.adj[u].items(): | |
246 for k, d in keydict.items(): | |
247 wt = d.get(weight, 1) * sign | |
248 if isnan(wt): | |
249 if ignore_nan: | |
250 continue | |
251 msg = f"NaN found as an edge weight. Edge {(u, v, k, d)}" | |
252 raise ValueError(msg) | |
253 push(frontier, (wt, next(c), u, v, k, d)) | |
254 else: | |
255 for v, d in G.adj[u].items(): | |
256 wt = d.get(weight, 1) * sign | |
257 if isnan(wt): | |
258 if ignore_nan: | |
259 continue | |
260 msg = f"NaN found as an edge weight. Edge {(u, v, d)}" | |
261 raise ValueError(msg) | |
262 push(frontier, (wt, next(c), u, v, d)) | |
263 while frontier: | |
264 if is_multigraph: | |
265 W, _, u, v, k, d = pop(frontier) | |
266 else: | |
267 W, _, u, v, d = pop(frontier) | |
268 if v in visited or v not in nodes: | |
269 continue | |
270 # Multigraphs need to handle edge keys in addition to edge data. | |
271 if is_multigraph and keys: | |
272 if data: | |
273 yield u, v, k, d | |
274 else: | |
275 yield u, v, k | |
276 else: | |
277 if data: | |
278 yield u, v, d | |
279 else: | |
280 yield u, v | |
281 # update frontier | |
282 visited.add(v) | |
283 nodes.discard(v) | |
284 if is_multigraph: | |
285 for w, keydict in G.adj[v].items(): | |
286 if w in visited: | |
287 continue | |
288 for k2, d2 in keydict.items(): | |
289 new_weight = d2.get(weight, 1) * sign | |
290 push(frontier, (new_weight, next(c), v, w, k2, d2)) | |
291 else: | |
292 for w, d2 in G.adj[v].items(): | |
293 if w in visited: | |
294 continue | |
295 new_weight = d2.get(weight, 1) * sign | |
296 push(frontier, (new_weight, next(c), v, w, d2)) | |
297 | |
298 | |
299 ALGORITHMS = { | |
300 "boruvka": boruvka_mst_edges, | |
301 "borůvka": boruvka_mst_edges, | |
302 "kruskal": kruskal_mst_edges, | |
303 "prim": prim_mst_edges, | |
304 } | |
305 | |
306 | |
307 @not_implemented_for("directed") | |
308 def minimum_spanning_edges( | |
309 G, algorithm="kruskal", weight="weight", keys=True, data=True, ignore_nan=False | |
310 ): | |
311 """Generate edges in a minimum spanning forest of an undirected | |
312 weighted graph. | |
313 | |
314 A minimum spanning tree is a subgraph of the graph (a tree) | |
315 with the minimum sum of edge weights. A spanning forest is a | |
316 union of the spanning trees for each connected component of the graph. | |
317 | |
318 Parameters | |
319 ---------- | |
320 G : undirected Graph | |
321 An undirected graph. If `G` is connected, then the algorithm finds a | |
322 spanning tree. Otherwise, a spanning forest is found. | |
323 | |
324 algorithm : string | |
325 The algorithm to use when finding a minimum spanning tree. Valid | |
326 choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'. | |
327 | |
328 weight : string | |
329 Edge data key to use for weight (default 'weight'). | |
330 | |
331 keys : bool | |
332 Whether to yield edge key in multigraphs in addition to the edge. | |
333 If `G` is not a multigraph, this is ignored. | |
334 | |
335 data : bool, optional | |
336 If True yield the edge data along with the edge. | |
337 | |
338 ignore_nan : bool (default: False) | |
339 If a NaN is found as an edge weight normally an exception is raised. | |
340 If `ignore_nan is True` then that edge is ignored instead. | |
341 | |
342 Returns | |
343 ------- | |
344 edges : iterator | |
345 An iterator over edges in a maximum spanning tree of `G`. | |
346 Edges connecting nodes `u` and `v` are represented as tuples: | |
347 `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)` | |
348 | |
349 If `G` is a multigraph, `keys` indicates whether the edge key `k` will | |
350 be reported in the third position in the edge tuple. `data` indicates | |
351 whether the edge datadict `d` will appear at the end of the edge tuple. | |
352 | |
353 If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True | |
354 or `(u, v)` if `data` is False. | |
355 | |
356 Examples | |
357 -------- | |
358 >>> from networkx.algorithms import tree | |
359 | |
360 Find minimum spanning edges by Kruskal's algorithm | |
361 | |
362 >>> G = nx.cycle_graph(4) | |
363 >>> G.add_edge(0, 3, weight=2) | |
364 >>> mst = tree.minimum_spanning_edges(G, algorithm="kruskal", data=False) | |
365 >>> edgelist = list(mst) | |
366 >>> sorted(sorted(e) for e in edgelist) | |
367 [[0, 1], [1, 2], [2, 3]] | |
368 | |
369 Find minimum spanning edges by Prim's algorithm | |
370 | |
371 >>> G = nx.cycle_graph(4) | |
372 >>> G.add_edge(0, 3, weight=2) | |
373 >>> mst = tree.minimum_spanning_edges(G, algorithm="prim", data=False) | |
374 >>> edgelist = list(mst) | |
375 >>> sorted(sorted(e) for e in edgelist) | |
376 [[0, 1], [1, 2], [2, 3]] | |
377 | |
378 Notes | |
379 ----- | |
380 For Borůvka's algorithm, each edge must have a weight attribute, and | |
381 each edge weight must be distinct. | |
382 | |
383 For the other algorithms, if the graph edges do not have a weight | |
384 attribute a default weight of 1 will be used. | |
385 | |
386 Modified code from David Eppstein, April 2006 | |
387 http://www.ics.uci.edu/~eppstein/PADS/ | |
388 | |
389 """ | |
390 try: | |
391 algo = ALGORITHMS[algorithm] | |
392 except KeyError as e: | |
393 msg = f"{algorithm} is not a valid choice for an algorithm." | |
394 raise ValueError(msg) from e | |
395 | |
396 return algo( | |
397 G, minimum=True, weight=weight, keys=keys, data=data, ignore_nan=ignore_nan | |
398 ) | |
399 | |
400 | |
401 @not_implemented_for("directed") | |
402 def maximum_spanning_edges( | |
403 G, algorithm="kruskal", weight="weight", keys=True, data=True, ignore_nan=False | |
404 ): | |
405 """Generate edges in a maximum spanning forest of an undirected | |
406 weighted graph. | |
407 | |
408 A maximum spanning tree is a subgraph of the graph (a tree) | |
409 with the maximum possible sum of edge weights. A spanning forest is a | |
410 union of the spanning trees for each connected component of the graph. | |
411 | |
412 Parameters | |
413 ---------- | |
414 G : undirected Graph | |
415 An undirected graph. If `G` is connected, then the algorithm finds a | |
416 spanning tree. Otherwise, a spanning forest is found. | |
417 | |
418 algorithm : string | |
419 The algorithm to use when finding a maximum spanning tree. Valid | |
420 choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'. | |
421 | |
422 weight : string | |
423 Edge data key to use for weight (default 'weight'). | |
424 | |
425 keys : bool | |
426 Whether to yield edge key in multigraphs in addition to the edge. | |
427 If `G` is not a multigraph, this is ignored. | |
428 | |
429 data : bool, optional | |
430 If True yield the edge data along with the edge. | |
431 | |
432 ignore_nan : bool (default: False) | |
433 If a NaN is found as an edge weight normally an exception is raised. | |
434 If `ignore_nan is True` then that edge is ignored instead. | |
435 | |
436 Returns | |
437 ------- | |
438 edges : iterator | |
439 An iterator over edges in a maximum spanning tree of `G`. | |
440 Edges connecting nodes `u` and `v` are represented as tuples: | |
441 `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)` | |
442 | |
443 If `G` is a multigraph, `keys` indicates whether the edge key `k` will | |
444 be reported in the third position in the edge tuple. `data` indicates | |
445 whether the edge datadict `d` will appear at the end of the edge tuple. | |
446 | |
447 If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True | |
448 or `(u, v)` if `data` is False. | |
449 | |
450 Examples | |
451 -------- | |
452 >>> from networkx.algorithms import tree | |
453 | |
454 Find maximum spanning edges by Kruskal's algorithm | |
455 | |
456 >>> G = nx.cycle_graph(4) | |
457 >>> G.add_edge(0, 3, weight=2) | |
458 >>> mst = tree.maximum_spanning_edges(G, algorithm="kruskal", data=False) | |
459 >>> edgelist = list(mst) | |
460 >>> sorted(sorted(e) for e in edgelist) | |
461 [[0, 1], [0, 3], [1, 2]] | |
462 | |
463 Find maximum spanning edges by Prim's algorithm | |
464 | |
465 >>> G = nx.cycle_graph(4) | |
466 >>> G.add_edge(0, 3, weight=2) # assign weight 2 to edge 0-3 | |
467 >>> mst = tree.maximum_spanning_edges(G, algorithm="prim", data=False) | |
468 >>> edgelist = list(mst) | |
469 >>> sorted(sorted(e) for e in edgelist) | |
470 [[0, 1], [0, 3], [2, 3]] | |
471 | |
472 Notes | |
473 ----- | |
474 For Borůvka's algorithm, each edge must have a weight attribute, and | |
475 each edge weight must be distinct. | |
476 | |
477 For the other algorithms, if the graph edges do not have a weight | |
478 attribute a default weight of 1 will be used. | |
479 | |
480 Modified code from David Eppstein, April 2006 | |
481 http://www.ics.uci.edu/~eppstein/PADS/ | |
482 """ | |
483 try: | |
484 algo = ALGORITHMS[algorithm] | |
485 except KeyError as e: | |
486 msg = f"{algorithm} is not a valid choice for an algorithm." | |
487 raise ValueError(msg) from e | |
488 | |
489 return algo( | |
490 G, minimum=False, weight=weight, keys=keys, data=data, ignore_nan=ignore_nan | |
491 ) | |
492 | |
493 | |
494 def minimum_spanning_tree(G, weight="weight", algorithm="kruskal", ignore_nan=False): | |
495 """Returns a minimum spanning tree or forest on an undirected graph `G`. | |
496 | |
497 Parameters | |
498 ---------- | |
499 G : undirected graph | |
500 An undirected graph. If `G` is connected, then the algorithm finds a | |
501 spanning tree. Otherwise, a spanning forest is found. | |
502 | |
503 weight : str | |
504 Data key to use for edge weights. | |
505 | |
506 algorithm : string | |
507 The algorithm to use when finding a minimum spanning tree. Valid | |
508 choices are 'kruskal', 'prim', or 'boruvka'. The default is | |
509 'kruskal'. | |
510 | |
511 ignore_nan : bool (default: False) | |
512 If a NaN is found as an edge weight normally an exception is raised. | |
513 If `ignore_nan is True` then that edge is ignored instead. | |
514 | |
515 Returns | |
516 ------- | |
517 G : NetworkX Graph | |
518 A minimum spanning tree or forest. | |
519 | |
520 Examples | |
521 -------- | |
522 >>> G = nx.cycle_graph(4) | |
523 >>> G.add_edge(0, 3, weight=2) | |
524 >>> T = nx.minimum_spanning_tree(G) | |
525 >>> sorted(T.edges(data=True)) | |
526 [(0, 1, {}), (1, 2, {}), (2, 3, {})] | |
527 | |
528 | |
529 Notes | |
530 ----- | |
531 For Borůvka's algorithm, each edge must have a weight attribute, and | |
532 each edge weight must be distinct. | |
533 | |
534 For the other algorithms, if the graph edges do not have a weight | |
535 attribute a default weight of 1 will be used. | |
536 | |
537 There may be more than one tree with the same minimum or maximum weight. | |
538 See :mod:`networkx.tree.recognition` for more detailed definitions. | |
539 | |
540 Isolated nodes with self-loops are in the tree as edgeless isolated nodes. | |
541 | |
542 """ | |
543 edges = minimum_spanning_edges( | |
544 G, algorithm, weight, keys=True, data=True, ignore_nan=ignore_nan | |
545 ) | |
546 T = G.__class__() # Same graph class as G | |
547 T.graph.update(G.graph) | |
548 T.add_nodes_from(G.nodes.items()) | |
549 T.add_edges_from(edges) | |
550 return T | |
551 | |
552 | |
553 def maximum_spanning_tree(G, weight="weight", algorithm="kruskal", ignore_nan=False): | |
554 """Returns a maximum spanning tree or forest on an undirected graph `G`. | |
555 | |
556 Parameters | |
557 ---------- | |
558 G : undirected graph | |
559 An undirected graph. If `G` is connected, then the algorithm finds a | |
560 spanning tree. Otherwise, a spanning forest is found. | |
561 | |
562 weight : str | |
563 Data key to use for edge weights. | |
564 | |
565 algorithm : string | |
566 The algorithm to use when finding a maximum spanning tree. Valid | |
567 choices are 'kruskal', 'prim', or 'boruvka'. The default is | |
568 'kruskal'. | |
569 | |
570 ignore_nan : bool (default: False) | |
571 If a NaN is found as an edge weight normally an exception is raised. | |
572 If `ignore_nan is True` then that edge is ignored instead. | |
573 | |
574 | |
575 Returns | |
576 ------- | |
577 G : NetworkX Graph | |
578 A maximum spanning tree or forest. | |
579 | |
580 | |
581 Examples | |
582 -------- | |
583 >>> G = nx.cycle_graph(4) | |
584 >>> G.add_edge(0, 3, weight=2) | |
585 >>> T = nx.maximum_spanning_tree(G) | |
586 >>> sorted(T.edges(data=True)) | |
587 [(0, 1, {}), (0, 3, {'weight': 2}), (1, 2, {})] | |
588 | |
589 | |
590 Notes | |
591 ----- | |
592 For Borůvka's algorithm, each edge must have a weight attribute, and | |
593 each edge weight must be distinct. | |
594 | |
595 For the other algorithms, if the graph edges do not have a weight | |
596 attribute a default weight of 1 will be used. | |
597 | |
598 There may be more than one tree with the same minimum or maximum weight. | |
599 See :mod:`networkx.tree.recognition` for more detailed definitions. | |
600 | |
601 Isolated nodes with self-loops are in the tree as edgeless isolated nodes. | |
602 | |
603 """ | |
604 edges = maximum_spanning_edges( | |
605 G, algorithm, weight, keys=True, data=True, ignore_nan=ignore_nan | |
606 ) | |
607 edges = list(edges) | |
608 T = G.__class__() # Same graph class as G | |
609 T.graph.update(G.graph) | |
610 T.add_nodes_from(G.nodes.items()) | |
611 T.add_edges_from(edges) | |
612 return T |