comparison env/lib/python3.9/site-packages/networkx/algorithms/lowest_common_ancestors.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Algorithms for finding the lowest common ancestor of trees and DAGs."""
2 from collections import defaultdict
3 from collections.abc import Mapping, Set
4 from itertools import chain, count
5
6 import networkx as nx
7 from networkx.utils import (
8 arbitrary_element,
9 not_implemented_for,
10 UnionFind,
11 generate_unique_node,
12 )
13
14 __all__ = [
15 "all_pairs_lowest_common_ancestor",
16 "tree_all_pairs_lowest_common_ancestor",
17 "lowest_common_ancestor",
18 ]
19
20
21 @not_implemented_for("undirected")
22 @not_implemented_for("multigraph")
23 def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None):
24 r"""Yield the lowest common ancestor for sets of pairs in a tree.
25
26 Parameters
27 ----------
28 G : NetworkX directed graph (must be a tree)
29
30 root : node, optional (default: None)
31 The root of the subtree to operate on.
32 If None, assume the entire graph has exactly one source and use that.
33
34 pairs : iterable or iterator of pairs of nodes, optional (default: None)
35 The pairs of interest. If None, Defaults to all pairs of nodes
36 under `root` that have a lowest common ancestor.
37
38 Returns
39 -------
40 lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes
41 in `pairs` and `lca` is their lowest common ancestor.
42
43 Notes
44 -----
45 Only defined on non-null trees represented with directed edges from
46 parents to children. Uses Tarjan's off-line lowest-common-ancestors
47 algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest
48 value of the inverse Ackermann function likely to ever come up in actual
49 use, and $P$ is the number of pairs requested (or $V^2$ if all are needed).
50
51 Tarjan, R. E. (1979), "Applications of path compression on balanced trees",
52 Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161.
53
54 See Also
55 --------
56 all_pairs_lowest_common_ancestor (similar routine for general DAGs)
57 lowest_common_ancestor (just a single pair for general DAGs)
58 """
59 if len(G) == 0:
60 raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
61 elif None in G:
62 raise nx.NetworkXError("None is not a valid node.")
63
64 # Index pairs of interest for efficient lookup from either side.
65 if pairs is not None:
66 pair_dict = defaultdict(set)
67 # See note on all_pairs_lowest_common_ancestor.
68 if not isinstance(pairs, (Mapping, Set)):
69 pairs = set(pairs)
70 for u, v in pairs:
71 for n in (u, v):
72 if n not in G:
73 msg = f"The node {str(n)} is not in the digraph."
74 raise nx.NodeNotFound(msg)
75 pair_dict[u].add(v)
76 pair_dict[v].add(u)
77
78 # If root is not specified, find the exactly one node with in degree 0 and
79 # use it. Raise an error if none are found, or more than one is. Also check
80 # for any nodes with in degree larger than 1, which would imply G is not a
81 # tree.
82 if root is None:
83 for n, deg in G.in_degree:
84 if deg == 0:
85 if root is not None:
86 msg = "No root specified and tree has multiple sources."
87 raise nx.NetworkXError(msg)
88 root = n
89 elif deg > 1:
90 msg = "Tree LCA only defined on trees; use DAG routine."
91 raise nx.NetworkXError(msg)
92 if root is None:
93 raise nx.NetworkXError("Graph contains a cycle.")
94
95 # Iterative implementation of Tarjan's offline lca algorithm
96 # as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition)
97 uf = UnionFind()
98 ancestors = {}
99 for node in G:
100 ancestors[node] = uf[node]
101
102 colors = defaultdict(bool)
103 for node in nx.dfs_postorder_nodes(G, root):
104 colors[node] = True
105 for v in pair_dict[node] if pairs is not None else G:
106 if colors[v]:
107 # If the user requested both directions of a pair, give it.
108 # Otherwise, just give one.
109 if pairs is not None and (node, v) in pairs:
110 yield (node, v), ancestors[uf[v]]
111 if pairs is None or (v, node) in pairs:
112 yield (v, node), ancestors[uf[v]]
113 if node != root:
114 parent = arbitrary_element(G.pred[node])
115 uf.union(parent, node)
116 ancestors[uf[parent]] = parent
117
118
119 @not_implemented_for("undirected")
120 @not_implemented_for("multigraph")
121 def lowest_common_ancestor(G, node1, node2, default=None):
122 """Compute the lowest common ancestor of the given pair of nodes.
123
124 Parameters
125 ----------
126 G : NetworkX directed graph
127
128 node1, node2 : nodes in the graph.
129
130 default : object
131 Returned if no common ancestor between `node1` and `node2`
132
133 Returns
134 -------
135 The lowest common ancestor of node1 and node2,
136 or default if they have no common ancestors.
137
138 Notes
139 -----
140 Only defined on non-null directed acyclic graphs.
141 Takes n log(n) time in the size of the graph.
142 See `all_pairs_lowest_common_ancestor` when you have
143 more than one pair of nodes of interest.
144
145 See Also
146 --------
147 tree_all_pairs_lowest_common_ancestor
148 all_pairs_lowest_common_ancestor
149 """
150 ans = list(all_pairs_lowest_common_ancestor(G, pairs=[(node1, node2)]))
151 if ans:
152 assert len(ans) == 1
153 return ans[0][1]
154 else:
155 return default
156
157
158 @not_implemented_for("undirected")
159 @not_implemented_for("multigraph")
160 def all_pairs_lowest_common_ancestor(G, pairs=None):
161 """Compute the lowest common ancestor for pairs of nodes.
162
163 Parameters
164 ----------
165 G : NetworkX directed graph
166
167 pairs : iterable of pairs of nodes, optional (default: all pairs)
168 The pairs of nodes of interest.
169 If None, will find the LCA of all pairs of nodes.
170
171 Returns
172 -------
173 An iterator over ((node1, node2), lca) where (node1, node2) are
174 the pairs specified and lca is a lowest common ancestor of the pair.
175 Note that for the default of all pairs in G, we consider
176 unordered pairs, e.g. you will not get both (b, a) and (a, b).
177
178 Notes
179 -----
180 Only defined on non-null directed acyclic graphs.
181
182 Uses the $O(n^3)$ ancestor-list algorithm from:
183 M. A. Bender, M. Farach-Colton, G. Pemmasani, S. Skiena, P. Sumazin.
184 "Lowest common ancestors in trees and directed acyclic graphs."
185 Journal of Algorithms, 57(2): 75-94, 2005.
186
187 See Also
188 --------
189 tree_all_pairs_lowest_common_ancestor
190 lowest_common_ancestor
191 """
192 if not nx.is_directed_acyclic_graph(G):
193 raise nx.NetworkXError("LCA only defined on directed acyclic graphs.")
194 elif len(G) == 0:
195 raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
196 elif None in G:
197 raise nx.NetworkXError("None is not a valid node.")
198
199 # The copy isn't ideal, neither is the switch-on-type, but without it users
200 # passing an iterable will encounter confusing errors, and itertools.tee
201 # does not appear to handle builtin types efficiently (IE, it materializes
202 # another buffer rather than just creating listoperators at the same
203 # offset). The Python documentation notes use of tee is unadvised when one
204 # is consumed before the other.
205 #
206 # This will always produce correct results and avoid unnecessary
207 # copies in many common cases.
208 #
209 if not isinstance(pairs, (Mapping, Set)) and pairs is not None:
210 pairs = set(pairs)
211
212 # Convert G into a dag with a single root by adding a node with edges to
213 # all sources iff necessary.
214 sources = [n for n, deg in G.in_degree if deg == 0]
215 if len(sources) == 1:
216 root = sources[0]
217 super_root = None
218 else:
219 G = G.copy()
220 super_root = root = generate_unique_node()
221 for source in sources:
222 G.add_edge(root, source)
223
224 # Start by computing a spanning tree, and the DAG of all edges not in it.
225 # We will then use the tree lca algorithm on the spanning tree, and use
226 # the DAG to figure out the set of tree queries necessary.
227 spanning_tree = nx.dfs_tree(G, root)
228 dag = nx.DiGraph(
229 (u, v)
230 for u, v in G.edges
231 if u not in spanning_tree or v not in spanning_tree[u]
232 )
233
234 # Ensure that both the dag and the spanning tree contains all nodes in G,
235 # even nodes that are disconnected in the dag.
236 spanning_tree.add_nodes_from(G)
237 dag.add_nodes_from(G)
238
239 counter = count()
240
241 # Necessary to handle graphs consisting of a single node and no edges.
242 root_distance = {root: next(counter)}
243
244 for edge in nx.bfs_edges(spanning_tree, root):
245 for node in edge:
246 if node not in root_distance:
247 root_distance[node] = next(counter)
248
249 # Index the position of all nodes in the Euler tour so we can efficiently
250 # sort lists and merge in tour order.
251 euler_tour_pos = {}
252 for node in nx.depth_first_search.dfs_preorder_nodes(G, root):
253 if node not in euler_tour_pos:
254 euler_tour_pos[node] = next(counter)
255
256 # Generate the set of all nodes of interest in the pairs.
257 pairset = set()
258 if pairs is not None:
259 pairset = set(chain.from_iterable(pairs))
260
261 for n in pairset:
262 if n not in G:
263 msg = f"The node {str(n)} is not in the digraph."
264 raise nx.NodeNotFound(msg)
265
266 # Generate the transitive closure over the dag (not G) of all nodes, and
267 # sort each node's closure set by order of first appearance in the Euler
268 # tour.
269 ancestors = {}
270 for v in dag:
271 if pairs is None or v in pairset:
272 my_ancestors = nx.dag.ancestors(dag, v)
273 my_ancestors.add(v)
274 ancestors[v] = sorted(my_ancestors, key=euler_tour_pos.get)
275
276 def _compute_dag_lca_from_tree_values(tree_lca, dry_run):
277 """Iterate through the in-order merge for each pair of interest.
278
279 We do this to answer the user's query, but it is also used to
280 avoid generating unnecessary tree entries when the user only
281 needs some pairs.
282 """
283 for (node1, node2) in pairs if pairs is not None else tree_lca:
284 best_root_distance = None
285 best = None
286
287 indices = [0, 0]
288 ancestors_by_index = [ancestors[node1], ancestors[node2]]
289
290 def get_next_in_merged_lists(indices):
291 """Returns index of the list containing the next item
292
293 Next order refers to the merged order.
294 Index can be 0 or 1 (or None if exhausted).
295 """
296 index1, index2 = indices
297 if index1 >= len(ancestors[node1]) and index2 >= len(ancestors[node2]):
298 return None
299 elif index1 >= len(ancestors[node1]):
300 return 1
301 elif index2 >= len(ancestors[node2]):
302 return 0
303 elif (
304 euler_tour_pos[ancestors[node1][index1]]
305 < euler_tour_pos[ancestors[node2][index2]]
306 ):
307 return 0
308 else:
309 return 1
310
311 # Find the LCA by iterating through the in-order merge of the two
312 # nodes of interests' ancestor sets. In principle, we need to
313 # consider all pairs in the Cartesian product of the ancestor sets,
314 # but by the restricted min range query reduction we are guaranteed
315 # that one of the pairs of interest is adjacent in the merged list
316 # iff one came from each list.
317 i = get_next_in_merged_lists(indices)
318 cur = ancestors_by_index[i][indices[i]], i
319 while i is not None:
320 prev = cur
321 indices[i] += 1
322 i = get_next_in_merged_lists(indices)
323 if i is not None:
324 cur = ancestors_by_index[i][indices[i]], i
325
326 # Two adjacent entries must not be from the same list
327 # in order for their tree LCA to be considered.
328 if cur[1] != prev[1]:
329 tree_node1, tree_node2 = prev[0], cur[0]
330 if (tree_node1, tree_node2) in tree_lca:
331 ans = tree_lca[tree_node1, tree_node2]
332 else:
333 ans = tree_lca[tree_node2, tree_node1]
334 if not dry_run and (
335 best is None or root_distance[ans] > best_root_distance
336 ):
337 best_root_distance = root_distance[ans]
338 best = ans
339
340 # If the LCA is super_root, there is no LCA in the user's graph.
341 if not dry_run and (super_root is None or best != super_root):
342 yield (node1, node2), best
343
344 # Generate the spanning tree lca for all pairs. This doesn't make sense to
345 # do incrementally since we are using a linear time offline algorithm for
346 # tree lca.
347 if pairs is None:
348 # We want all pairs so we'll need the entire tree.
349 tree_lca = dict(tree_all_pairs_lowest_common_ancestor(spanning_tree, root))
350 else:
351 # We only need the merged adjacent pairs by seeing which queries the
352 # algorithm needs then generating them in a single pass.
353 tree_lca = defaultdict(int)
354 for _ in _compute_dag_lca_from_tree_values(tree_lca, True):
355 pass
356
357 # Replace the bogus default tree values with the real ones.
358 for (pair, lca) in tree_all_pairs_lowest_common_ancestor(
359 spanning_tree, root, tree_lca
360 ):
361 tree_lca[pair] = lca
362
363 # All precomputations complete. Now we just need to give the user the pairs
364 # they asked for, or all pairs if they want them all.
365 return _compute_dag_lca_from_tree_values(tree_lca, False)