Mercurial > repos > shellac > sam_consensus_v3
diff env/lib/python3.9/site-packages/networkx/algorithms/lowest_common_ancestors.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/networkx/algorithms/lowest_common_ancestors.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,365 @@ +"""Algorithms for finding the lowest common ancestor of trees and DAGs.""" +from collections import defaultdict +from collections.abc import Mapping, Set +from itertools import chain, count + +import networkx as nx +from networkx.utils import ( + arbitrary_element, + not_implemented_for, + UnionFind, + generate_unique_node, +) + +__all__ = [ + "all_pairs_lowest_common_ancestor", + "tree_all_pairs_lowest_common_ancestor", + "lowest_common_ancestor", +] + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None): + r"""Yield the lowest common ancestor for sets of pairs in a tree. + + Parameters + ---------- + G : NetworkX directed graph (must be a tree) + + root : node, optional (default: None) + The root of the subtree to operate on. + If None, assume the entire graph has exactly one source and use that. + + pairs : iterable or iterator of pairs of nodes, optional (default: None) + The pairs of interest. If None, Defaults to all pairs of nodes + under `root` that have a lowest common ancestor. + + Returns + ------- + lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes + in `pairs` and `lca` is their lowest common ancestor. + + Notes + ----- + Only defined on non-null trees represented with directed edges from + parents to children. Uses Tarjan's off-line lowest-common-ancestors + algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest + value of the inverse Ackermann function likely to ever come up in actual + use, and $P$ is the number of pairs requested (or $V^2$ if all are needed). + + Tarjan, R. E. (1979), "Applications of path compression on balanced trees", + Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161. + + See Also + -------- + all_pairs_lowest_common_ancestor (similar routine for general DAGs) + lowest_common_ancestor (just a single pair for general DAGs) + """ + if len(G) == 0: + raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") + elif None in G: + raise nx.NetworkXError("None is not a valid node.") + + # Index pairs of interest for efficient lookup from either side. + if pairs is not None: + pair_dict = defaultdict(set) + # See note on all_pairs_lowest_common_ancestor. + if not isinstance(pairs, (Mapping, Set)): + pairs = set(pairs) + for u, v in pairs: + for n in (u, v): + if n not in G: + msg = f"The node {str(n)} is not in the digraph." + raise nx.NodeNotFound(msg) + pair_dict[u].add(v) + pair_dict[v].add(u) + + # If root is not specified, find the exactly one node with in degree 0 and + # use it. Raise an error if none are found, or more than one is. Also check + # for any nodes with in degree larger than 1, which would imply G is not a + # tree. + if root is None: + for n, deg in G.in_degree: + if deg == 0: + if root is not None: + msg = "No root specified and tree has multiple sources." + raise nx.NetworkXError(msg) + root = n + elif deg > 1: + msg = "Tree LCA only defined on trees; use DAG routine." + raise nx.NetworkXError(msg) + if root is None: + raise nx.NetworkXError("Graph contains a cycle.") + + # Iterative implementation of Tarjan's offline lca algorithm + # as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition) + uf = UnionFind() + ancestors = {} + for node in G: + ancestors[node] = uf[node] + + colors = defaultdict(bool) + for node in nx.dfs_postorder_nodes(G, root): + colors[node] = True + for v in pair_dict[node] if pairs is not None else G: + if colors[v]: + # If the user requested both directions of a pair, give it. + # Otherwise, just give one. + if pairs is not None and (node, v) in pairs: + yield (node, v), ancestors[uf[v]] + if pairs is None or (v, node) in pairs: + yield (v, node), ancestors[uf[v]] + if node != root: + parent = arbitrary_element(G.pred[node]) + uf.union(parent, node) + ancestors[uf[parent]] = parent + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def lowest_common_ancestor(G, node1, node2, default=None): + """Compute the lowest common ancestor of the given pair of nodes. + + Parameters + ---------- + G : NetworkX directed graph + + node1, node2 : nodes in the graph. + + default : object + Returned if no common ancestor between `node1` and `node2` + + Returns + ------- + The lowest common ancestor of node1 and node2, + or default if they have no common ancestors. + + Notes + ----- + Only defined on non-null directed acyclic graphs. + Takes n log(n) time in the size of the graph. + See `all_pairs_lowest_common_ancestor` when you have + more than one pair of nodes of interest. + + See Also + -------- + tree_all_pairs_lowest_common_ancestor + all_pairs_lowest_common_ancestor + """ + ans = list(all_pairs_lowest_common_ancestor(G, pairs=[(node1, node2)])) + if ans: + assert len(ans) == 1 + return ans[0][1] + else: + return default + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def all_pairs_lowest_common_ancestor(G, pairs=None): + """Compute the lowest common ancestor for pairs of nodes. + + Parameters + ---------- + G : NetworkX directed graph + + pairs : iterable of pairs of nodes, optional (default: all pairs) + The pairs of nodes of interest. + If None, will find the LCA of all pairs of nodes. + + Returns + ------- + An iterator over ((node1, node2), lca) where (node1, node2) are + the pairs specified and lca is a lowest common ancestor of the pair. + Note that for the default of all pairs in G, we consider + unordered pairs, e.g. you will not get both (b, a) and (a, b). + + Notes + ----- + Only defined on non-null directed acyclic graphs. + + Uses the $O(n^3)$ ancestor-list algorithm from: + M. A. Bender, M. Farach-Colton, G. Pemmasani, S. Skiena, P. Sumazin. + "Lowest common ancestors in trees and directed acyclic graphs." + Journal of Algorithms, 57(2): 75-94, 2005. + + See Also + -------- + tree_all_pairs_lowest_common_ancestor + lowest_common_ancestor + """ + if not nx.is_directed_acyclic_graph(G): + raise nx.NetworkXError("LCA only defined on directed acyclic graphs.") + elif len(G) == 0: + raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") + elif None in G: + raise nx.NetworkXError("None is not a valid node.") + + # The copy isn't ideal, neither is the switch-on-type, but without it users + # passing an iterable will encounter confusing errors, and itertools.tee + # does not appear to handle builtin types efficiently (IE, it materializes + # another buffer rather than just creating listoperators at the same + # offset). The Python documentation notes use of tee is unadvised when one + # is consumed before the other. + # + # This will always produce correct results and avoid unnecessary + # copies in many common cases. + # + if not isinstance(pairs, (Mapping, Set)) and pairs is not None: + pairs = set(pairs) + + # Convert G into a dag with a single root by adding a node with edges to + # all sources iff necessary. + sources = [n for n, deg in G.in_degree if deg == 0] + if len(sources) == 1: + root = sources[0] + super_root = None + else: + G = G.copy() + super_root = root = generate_unique_node() + for source in sources: + G.add_edge(root, source) + + # Start by computing a spanning tree, and the DAG of all edges not in it. + # We will then use the tree lca algorithm on the spanning tree, and use + # the DAG to figure out the set of tree queries necessary. + spanning_tree = nx.dfs_tree(G, root) + dag = nx.DiGraph( + (u, v) + for u, v in G.edges + if u not in spanning_tree or v not in spanning_tree[u] + ) + + # Ensure that both the dag and the spanning tree contains all nodes in G, + # even nodes that are disconnected in the dag. + spanning_tree.add_nodes_from(G) + dag.add_nodes_from(G) + + counter = count() + + # Necessary to handle graphs consisting of a single node and no edges. + root_distance = {root: next(counter)} + + for edge in nx.bfs_edges(spanning_tree, root): + for node in edge: + if node not in root_distance: + root_distance[node] = next(counter) + + # Index the position of all nodes in the Euler tour so we can efficiently + # sort lists and merge in tour order. + euler_tour_pos = {} + for node in nx.depth_first_search.dfs_preorder_nodes(G, root): + if node not in euler_tour_pos: + euler_tour_pos[node] = next(counter) + + # Generate the set of all nodes of interest in the pairs. + pairset = set() + if pairs is not None: + pairset = set(chain.from_iterable(pairs)) + + for n in pairset: + if n not in G: + msg = f"The node {str(n)} is not in the digraph." + raise nx.NodeNotFound(msg) + + # Generate the transitive closure over the dag (not G) of all nodes, and + # sort each node's closure set by order of first appearance in the Euler + # tour. + ancestors = {} + for v in dag: + if pairs is None or v in pairset: + my_ancestors = nx.dag.ancestors(dag, v) + my_ancestors.add(v) + ancestors[v] = sorted(my_ancestors, key=euler_tour_pos.get) + + def _compute_dag_lca_from_tree_values(tree_lca, dry_run): + """Iterate through the in-order merge for each pair of interest. + + We do this to answer the user's query, but it is also used to + avoid generating unnecessary tree entries when the user only + needs some pairs. + """ + for (node1, node2) in pairs if pairs is not None else tree_lca: + best_root_distance = None + best = None + + indices = [0, 0] + ancestors_by_index = [ancestors[node1], ancestors[node2]] + + def get_next_in_merged_lists(indices): + """Returns index of the list containing the next item + + Next order refers to the merged order. + Index can be 0 or 1 (or None if exhausted). + """ + index1, index2 = indices + if index1 >= len(ancestors[node1]) and index2 >= len(ancestors[node2]): + return None + elif index1 >= len(ancestors[node1]): + return 1 + elif index2 >= len(ancestors[node2]): + return 0 + elif ( + euler_tour_pos[ancestors[node1][index1]] + < euler_tour_pos[ancestors[node2][index2]] + ): + return 0 + else: + return 1 + + # Find the LCA by iterating through the in-order merge of the two + # nodes of interests' ancestor sets. In principle, we need to + # consider all pairs in the Cartesian product of the ancestor sets, + # but by the restricted min range query reduction we are guaranteed + # that one of the pairs of interest is adjacent in the merged list + # iff one came from each list. + i = get_next_in_merged_lists(indices) + cur = ancestors_by_index[i][indices[i]], i + while i is not None: + prev = cur + indices[i] += 1 + i = get_next_in_merged_lists(indices) + if i is not None: + cur = ancestors_by_index[i][indices[i]], i + + # Two adjacent entries must not be from the same list + # in order for their tree LCA to be considered. + if cur[1] != prev[1]: + tree_node1, tree_node2 = prev[0], cur[0] + if (tree_node1, tree_node2) in tree_lca: + ans = tree_lca[tree_node1, tree_node2] + else: + ans = tree_lca[tree_node2, tree_node1] + if not dry_run and ( + best is None or root_distance[ans] > best_root_distance + ): + best_root_distance = root_distance[ans] + best = ans + + # If the LCA is super_root, there is no LCA in the user's graph. + if not dry_run and (super_root is None or best != super_root): + yield (node1, node2), best + + # Generate the spanning tree lca for all pairs. This doesn't make sense to + # do incrementally since we are using a linear time offline algorithm for + # tree lca. + if pairs is None: + # We want all pairs so we'll need the entire tree. + tree_lca = dict(tree_all_pairs_lowest_common_ancestor(spanning_tree, root)) + else: + # We only need the merged adjacent pairs by seeing which queries the + # algorithm needs then generating them in a single pass. + tree_lca = defaultdict(int) + for _ in _compute_dag_lca_from_tree_values(tree_lca, True): + pass + + # Replace the bogus default tree values with the real ones. + for (pair, lca) in tree_all_pairs_lowest_common_ancestor( + spanning_tree, root, tree_lca + ): + tree_lca[pair] = lca + + # All precomputations complete. Now we just need to give the user the pairs + # they asked for, or all pairs if they want them all. + return _compute_dag_lca_from_tree_values(tree_lca, False)