Mercurial > repos > shellac > sam_consensus_v3
diff env/lib/python3.9/site-packages/networkx/algorithms/tree/mst.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/networkx/algorithms/tree/mst.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,612 @@ +""" +Algorithms for calculating min/max spanning trees/forests. + +""" +from heapq import heappop, heappush +from operator import itemgetter +from itertools import count +from math import isnan + +import networkx as nx +from networkx.utils import UnionFind, not_implemented_for + +__all__ = [ + "minimum_spanning_edges", + "maximum_spanning_edges", + "minimum_spanning_tree", + "maximum_spanning_tree", +] + + +@not_implemented_for("multigraph") +def boruvka_mst_edges( + G, minimum=True, weight="weight", keys=False, data=True, ignore_nan=False +): + """Iterate over edges of a Borůvka's algorithm min/max spanning tree. + + Parameters + ---------- + G : NetworkX Graph + The edges of `G` must have distinct weights, + otherwise the edges may not form a tree. + + minimum : bool (default: True) + Find the minimum (True) or maximum (False) spanning tree. + + weight : string (default: 'weight') + The name of the edge attribute holding the edge weights. + + keys : bool (default: True) + This argument is ignored since this function is not + implemented for multigraphs; it exists only for consistency + with the other minimum spanning tree functions. + + data : bool (default: True) + Flag for whether to yield edge attribute dicts. + If True, yield edges `(u, v, d)`, where `d` is the attribute dict. + If False, yield edges `(u, v)`. + + ignore_nan : bool (default: False) + If a NaN is found as an edge weight normally an exception is raised. + If `ignore_nan is True` then that edge is ignored instead. + + """ + # Initialize a forest, assuming initially that it is the discrete + # partition of the nodes of the graph. + forest = UnionFind(G) + + def best_edge(component): + """Returns the optimum (minimum or maximum) edge on the edge + boundary of the given set of nodes. + + A return value of ``None`` indicates an empty boundary. + + """ + sign = 1 if minimum else -1 + minwt = float("inf") + boundary = None + for e in nx.edge_boundary(G, component, data=True): + wt = e[-1].get(weight, 1) * sign + if isnan(wt): + if ignore_nan: + continue + msg = f"NaN found as an edge weight. Edge {e}" + raise ValueError(msg) + if wt < minwt: + minwt = wt + boundary = e + return boundary + + # Determine the optimum edge in the edge boundary of each component + # in the forest. + best_edges = (best_edge(component) for component in forest.to_sets()) + best_edges = [edge for edge in best_edges if edge is not None] + # If each entry was ``None``, that means the graph was disconnected, + # so we are done generating the forest. + while best_edges: + # Determine the optimum edge in the edge boundary of each + # component in the forest. + # + # This must be a sequence, not an iterator. In this list, the + # same edge may appear twice, in different orientations (but + # that's okay, since a union operation will be called on the + # endpoints the first time it is seen, but not the second time). + # + # Any ``None`` indicates that the edge boundary for that + # component was empty, so that part of the forest has been + # completed. + # + # TODO This can be parallelized, both in the outer loop over + # each component in the forest and in the computation of the + # minimum. (Same goes for the identical lines outside the loop.) + best_edges = (best_edge(component) for component in forest.to_sets()) + best_edges = [edge for edge in best_edges if edge is not None] + # Join trees in the forest using the best edges, and yield that + # edge, since it is part of the spanning tree. + # + # TODO This loop can be parallelized, to an extent (the union + # operation must be atomic). + for u, v, d in best_edges: + if forest[u] != forest[v]: + if data: + yield u, v, d + else: + yield u, v + forest.union(u, v) + + +def kruskal_mst_edges( + G, minimum, weight="weight", keys=True, data=True, ignore_nan=False +): + """Iterate over edges of a Kruskal's algorithm min/max spanning tree. + + Parameters + ---------- + G : NetworkX Graph + The graph holding the tree of interest. + + minimum : bool (default: True) + Find the minimum (True) or maximum (False) spanning tree. + + weight : string (default: 'weight') + The name of the edge attribute holding the edge weights. + + keys : bool (default: True) + If `G` is a multigraph, `keys` controls whether edge keys ar yielded. + Otherwise `keys` is ignored. + + data : bool (default: True) + Flag for whether to yield edge attribute dicts. + If True, yield edges `(u, v, d)`, where `d` is the attribute dict. + If False, yield edges `(u, v)`. + + ignore_nan : bool (default: False) + If a NaN is found as an edge weight normally an exception is raised. + If `ignore_nan is True` then that edge is ignored instead. + + """ + subtrees = UnionFind() + if G.is_multigraph(): + edges = G.edges(keys=True, data=True) + + def filter_nan_edges(edges=edges, weight=weight): + sign = 1 if minimum else -1 + for u, v, k, d in edges: + wt = d.get(weight, 1) * sign + if isnan(wt): + if ignore_nan: + continue + msg = f"NaN found as an edge weight. Edge {(u, v, k, d)}" + raise ValueError(msg) + yield wt, u, v, k, d + + else: + edges = G.edges(data=True) + + def filter_nan_edges(edges=edges, weight=weight): + sign = 1 if minimum else -1 + for u, v, d in edges: + wt = d.get(weight, 1) * sign + if isnan(wt): + if ignore_nan: + continue + msg = f"NaN found as an edge weight. Edge {(u, v, d)}" + raise ValueError(msg) + yield wt, u, v, d + + edges = sorted(filter_nan_edges(), key=itemgetter(0)) + # Multigraphs need to handle edge keys in addition to edge data. + if G.is_multigraph(): + for wt, u, v, k, d in edges: + if subtrees[u] != subtrees[v]: + if keys: + if data: + yield u, v, k, d + else: + yield u, v, k + else: + if data: + yield u, v, d + else: + yield u, v + subtrees.union(u, v) + else: + for wt, u, v, d in edges: + if subtrees[u] != subtrees[v]: + if data: + yield (u, v, d) + else: + yield (u, v) + subtrees.union(u, v) + + +def prim_mst_edges(G, minimum, weight="weight", keys=True, data=True, ignore_nan=False): + """Iterate over edges of Prim's algorithm min/max spanning tree. + + Parameters + ---------- + G : NetworkX Graph + The graph holding the tree of interest. + + minimum : bool (default: True) + Find the minimum (True) or maximum (False) spanning tree. + + weight : string (default: 'weight') + The name of the edge attribute holding the edge weights. + + keys : bool (default: True) + If `G` is a multigraph, `keys` controls whether edge keys ar yielded. + Otherwise `keys` is ignored. + + data : bool (default: True) + Flag for whether to yield edge attribute dicts. + If True, yield edges `(u, v, d)`, where `d` is the attribute dict. + If False, yield edges `(u, v)`. + + ignore_nan : bool (default: False) + If a NaN is found as an edge weight normally an exception is raised. + If `ignore_nan is True` then that edge is ignored instead. + + """ + is_multigraph = G.is_multigraph() + push = heappush + pop = heappop + + nodes = set(G) + c = count() + + sign = 1 if minimum else -1 + + while nodes: + u = nodes.pop() + frontier = [] + visited = {u} + if is_multigraph: + for v, keydict in G.adj[u].items(): + for k, d in keydict.items(): + wt = d.get(weight, 1) * sign + if isnan(wt): + if ignore_nan: + continue + msg = f"NaN found as an edge weight. Edge {(u, v, k, d)}" + raise ValueError(msg) + push(frontier, (wt, next(c), u, v, k, d)) + else: + for v, d in G.adj[u].items(): + wt = d.get(weight, 1) * sign + if isnan(wt): + if ignore_nan: + continue + msg = f"NaN found as an edge weight. Edge {(u, v, d)}" + raise ValueError(msg) + push(frontier, (wt, next(c), u, v, d)) + while frontier: + if is_multigraph: + W, _, u, v, k, d = pop(frontier) + else: + W, _, u, v, d = pop(frontier) + if v in visited or v not in nodes: + continue + # Multigraphs need to handle edge keys in addition to edge data. + if is_multigraph and keys: + if data: + yield u, v, k, d + else: + yield u, v, k + else: + if data: + yield u, v, d + else: + yield u, v + # update frontier + visited.add(v) + nodes.discard(v) + if is_multigraph: + for w, keydict in G.adj[v].items(): + if w in visited: + continue + for k2, d2 in keydict.items(): + new_weight = d2.get(weight, 1) * sign + push(frontier, (new_weight, next(c), v, w, k2, d2)) + else: + for w, d2 in G.adj[v].items(): + if w in visited: + continue + new_weight = d2.get(weight, 1) * sign + push(frontier, (new_weight, next(c), v, w, d2)) + + +ALGORITHMS = { + "boruvka": boruvka_mst_edges, + "borůvka": boruvka_mst_edges, + "kruskal": kruskal_mst_edges, + "prim": prim_mst_edges, +} + + +@not_implemented_for("directed") +def minimum_spanning_edges( + G, algorithm="kruskal", weight="weight", keys=True, data=True, ignore_nan=False +): + """Generate edges in a minimum spanning forest of an undirected + weighted graph. + + A minimum spanning tree is a subgraph of the graph (a tree) + with the minimum sum of edge weights. A spanning forest is a + union of the spanning trees for each connected component of the graph. + + Parameters + ---------- + G : undirected Graph + An undirected graph. If `G` is connected, then the algorithm finds a + spanning tree. Otherwise, a spanning forest is found. + + algorithm : string + The algorithm to use when finding a minimum spanning tree. Valid + choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'. + + weight : string + Edge data key to use for weight (default 'weight'). + + keys : bool + Whether to yield edge key in multigraphs in addition to the edge. + If `G` is not a multigraph, this is ignored. + + data : bool, optional + If True yield the edge data along with the edge. + + ignore_nan : bool (default: False) + If a NaN is found as an edge weight normally an exception is raised. + If `ignore_nan is True` then that edge is ignored instead. + + Returns + ------- + edges : iterator + An iterator over edges in a maximum spanning tree of `G`. + Edges connecting nodes `u` and `v` are represented as tuples: + `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)` + + If `G` is a multigraph, `keys` indicates whether the edge key `k` will + be reported in the third position in the edge tuple. `data` indicates + whether the edge datadict `d` will appear at the end of the edge tuple. + + If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True + or `(u, v)` if `data` is False. + + Examples + -------- + >>> from networkx.algorithms import tree + + Find minimum spanning edges by Kruskal's algorithm + + >>> G = nx.cycle_graph(4) + >>> G.add_edge(0, 3, weight=2) + >>> mst = tree.minimum_spanning_edges(G, algorithm="kruskal", data=False) + >>> edgelist = list(mst) + >>> sorted(sorted(e) for e in edgelist) + [[0, 1], [1, 2], [2, 3]] + + Find minimum spanning edges by Prim's algorithm + + >>> G = nx.cycle_graph(4) + >>> G.add_edge(0, 3, weight=2) + >>> mst = tree.minimum_spanning_edges(G, algorithm="prim", data=False) + >>> edgelist = list(mst) + >>> sorted(sorted(e) for e in edgelist) + [[0, 1], [1, 2], [2, 3]] + + Notes + ----- + For Borůvka's algorithm, each edge must have a weight attribute, and + each edge weight must be distinct. + + For the other algorithms, if the graph edges do not have a weight + attribute a default weight of 1 will be used. + + Modified code from David Eppstein, April 2006 + http://www.ics.uci.edu/~eppstein/PADS/ + + """ + try: + algo = ALGORITHMS[algorithm] + except KeyError as e: + msg = f"{algorithm} is not a valid choice for an algorithm." + raise ValueError(msg) from e + + return algo( + G, minimum=True, weight=weight, keys=keys, data=data, ignore_nan=ignore_nan + ) + + +@not_implemented_for("directed") +def maximum_spanning_edges( + G, algorithm="kruskal", weight="weight", keys=True, data=True, ignore_nan=False +): + """Generate edges in a maximum spanning forest of an undirected + weighted graph. + + A maximum spanning tree is a subgraph of the graph (a tree) + with the maximum possible sum of edge weights. A spanning forest is a + union of the spanning trees for each connected component of the graph. + + Parameters + ---------- + G : undirected Graph + An undirected graph. If `G` is connected, then the algorithm finds a + spanning tree. Otherwise, a spanning forest is found. + + algorithm : string + The algorithm to use when finding a maximum spanning tree. Valid + choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'. + + weight : string + Edge data key to use for weight (default 'weight'). + + keys : bool + Whether to yield edge key in multigraphs in addition to the edge. + If `G` is not a multigraph, this is ignored. + + data : bool, optional + If True yield the edge data along with the edge. + + ignore_nan : bool (default: False) + If a NaN is found as an edge weight normally an exception is raised. + If `ignore_nan is True` then that edge is ignored instead. + + Returns + ------- + edges : iterator + An iterator over edges in a maximum spanning tree of `G`. + Edges connecting nodes `u` and `v` are represented as tuples: + `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)` + + If `G` is a multigraph, `keys` indicates whether the edge key `k` will + be reported in the third position in the edge tuple. `data` indicates + whether the edge datadict `d` will appear at the end of the edge tuple. + + If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True + or `(u, v)` if `data` is False. + + Examples + -------- + >>> from networkx.algorithms import tree + + Find maximum spanning edges by Kruskal's algorithm + + >>> G = nx.cycle_graph(4) + >>> G.add_edge(0, 3, weight=2) + >>> mst = tree.maximum_spanning_edges(G, algorithm="kruskal", data=False) + >>> edgelist = list(mst) + >>> sorted(sorted(e) for e in edgelist) + [[0, 1], [0, 3], [1, 2]] + + Find maximum spanning edges by Prim's algorithm + + >>> G = nx.cycle_graph(4) + >>> G.add_edge(0, 3, weight=2) # assign weight 2 to edge 0-3 + >>> mst = tree.maximum_spanning_edges(G, algorithm="prim", data=False) + >>> edgelist = list(mst) + >>> sorted(sorted(e) for e in edgelist) + [[0, 1], [0, 3], [2, 3]] + + Notes + ----- + For Borůvka's algorithm, each edge must have a weight attribute, and + each edge weight must be distinct. + + For the other algorithms, if the graph edges do not have a weight + attribute a default weight of 1 will be used. + + Modified code from David Eppstein, April 2006 + http://www.ics.uci.edu/~eppstein/PADS/ + """ + try: + algo = ALGORITHMS[algorithm] + except KeyError as e: + msg = f"{algorithm} is not a valid choice for an algorithm." + raise ValueError(msg) from e + + return algo( + G, minimum=False, weight=weight, keys=keys, data=data, ignore_nan=ignore_nan + ) + + +def minimum_spanning_tree(G, weight="weight", algorithm="kruskal", ignore_nan=False): + """Returns a minimum spanning tree or forest on an undirected graph `G`. + + Parameters + ---------- + G : undirected graph + An undirected graph. If `G` is connected, then the algorithm finds a + spanning tree. Otherwise, a spanning forest is found. + + weight : str + Data key to use for edge weights. + + algorithm : string + The algorithm to use when finding a minimum spanning tree. Valid + choices are 'kruskal', 'prim', or 'boruvka'. The default is + 'kruskal'. + + ignore_nan : bool (default: False) + If a NaN is found as an edge weight normally an exception is raised. + If `ignore_nan is True` then that edge is ignored instead. + + Returns + ------- + G : NetworkX Graph + A minimum spanning tree or forest. + + Examples + -------- + >>> G = nx.cycle_graph(4) + >>> G.add_edge(0, 3, weight=2) + >>> T = nx.minimum_spanning_tree(G) + >>> sorted(T.edges(data=True)) + [(0, 1, {}), (1, 2, {}), (2, 3, {})] + + + Notes + ----- + For Borůvka's algorithm, each edge must have a weight attribute, and + each edge weight must be distinct. + + For the other algorithms, if the graph edges do not have a weight + attribute a default weight of 1 will be used. + + There may be more than one tree with the same minimum or maximum weight. + See :mod:`networkx.tree.recognition` for more detailed definitions. + + Isolated nodes with self-loops are in the tree as edgeless isolated nodes. + + """ + edges = minimum_spanning_edges( + G, algorithm, weight, keys=True, data=True, ignore_nan=ignore_nan + ) + T = G.__class__() # Same graph class as G + T.graph.update(G.graph) + T.add_nodes_from(G.nodes.items()) + T.add_edges_from(edges) + return T + + +def maximum_spanning_tree(G, weight="weight", algorithm="kruskal", ignore_nan=False): + """Returns a maximum spanning tree or forest on an undirected graph `G`. + + Parameters + ---------- + G : undirected graph + An undirected graph. If `G` is connected, then the algorithm finds a + spanning tree. Otherwise, a spanning forest is found. + + weight : str + Data key to use for edge weights. + + algorithm : string + The algorithm to use when finding a maximum spanning tree. Valid + choices are 'kruskal', 'prim', or 'boruvka'. The default is + 'kruskal'. + + ignore_nan : bool (default: False) + If a NaN is found as an edge weight normally an exception is raised. + If `ignore_nan is True` then that edge is ignored instead. + + + Returns + ------- + G : NetworkX Graph + A maximum spanning tree or forest. + + + Examples + -------- + >>> G = nx.cycle_graph(4) + >>> G.add_edge(0, 3, weight=2) + >>> T = nx.maximum_spanning_tree(G) + >>> sorted(T.edges(data=True)) + [(0, 1, {}), (0, 3, {'weight': 2}), (1, 2, {})] + + + Notes + ----- + For Borůvka's algorithm, each edge must have a weight attribute, and + each edge weight must be distinct. + + For the other algorithms, if the graph edges do not have a weight + attribute a default weight of 1 will be used. + + There may be more than one tree with the same minimum or maximum weight. + See :mod:`networkx.tree.recognition` for more detailed definitions. + + Isolated nodes with self-loops are in the tree as edgeless isolated nodes. + + """ + edges = maximum_spanning_edges( + G, algorithm, weight, keys=True, data=True, ignore_nan=ignore_nan + ) + edges = list(edges) + T = G.__class__() # Same graph class as G + T.graph.update(G.graph) + T.add_nodes_from(G.nodes.items()) + T.add_edges_from(edges) + return T