Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/lowest_common_ancestors.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Algorithms for finding the lowest common ancestor of trees and DAGs.""" | |
2 from collections import defaultdict | |
3 from collections.abc import Mapping, Set | |
4 from itertools import chain, count | |
5 | |
6 import networkx as nx | |
7 from networkx.utils import ( | |
8 arbitrary_element, | |
9 not_implemented_for, | |
10 UnionFind, | |
11 generate_unique_node, | |
12 ) | |
13 | |
14 __all__ = [ | |
15 "all_pairs_lowest_common_ancestor", | |
16 "tree_all_pairs_lowest_common_ancestor", | |
17 "lowest_common_ancestor", | |
18 ] | |
19 | |
20 | |
21 @not_implemented_for("undirected") | |
22 @not_implemented_for("multigraph") | |
23 def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None): | |
24 r"""Yield the lowest common ancestor for sets of pairs in a tree. | |
25 | |
26 Parameters | |
27 ---------- | |
28 G : NetworkX directed graph (must be a tree) | |
29 | |
30 root : node, optional (default: None) | |
31 The root of the subtree to operate on. | |
32 If None, assume the entire graph has exactly one source and use that. | |
33 | |
34 pairs : iterable or iterator of pairs of nodes, optional (default: None) | |
35 The pairs of interest. If None, Defaults to all pairs of nodes | |
36 under `root` that have a lowest common ancestor. | |
37 | |
38 Returns | |
39 ------- | |
40 lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes | |
41 in `pairs` and `lca` is their lowest common ancestor. | |
42 | |
43 Notes | |
44 ----- | |
45 Only defined on non-null trees represented with directed edges from | |
46 parents to children. Uses Tarjan's off-line lowest-common-ancestors | |
47 algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest | |
48 value of the inverse Ackermann function likely to ever come up in actual | |
49 use, and $P$ is the number of pairs requested (or $V^2$ if all are needed). | |
50 | |
51 Tarjan, R. E. (1979), "Applications of path compression on balanced trees", | |
52 Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161. | |
53 | |
54 See Also | |
55 -------- | |
56 all_pairs_lowest_common_ancestor (similar routine for general DAGs) | |
57 lowest_common_ancestor (just a single pair for general DAGs) | |
58 """ | |
59 if len(G) == 0: | |
60 raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") | |
61 elif None in G: | |
62 raise nx.NetworkXError("None is not a valid node.") | |
63 | |
64 # Index pairs of interest for efficient lookup from either side. | |
65 if pairs is not None: | |
66 pair_dict = defaultdict(set) | |
67 # See note on all_pairs_lowest_common_ancestor. | |
68 if not isinstance(pairs, (Mapping, Set)): | |
69 pairs = set(pairs) | |
70 for u, v in pairs: | |
71 for n in (u, v): | |
72 if n not in G: | |
73 msg = f"The node {str(n)} is not in the digraph." | |
74 raise nx.NodeNotFound(msg) | |
75 pair_dict[u].add(v) | |
76 pair_dict[v].add(u) | |
77 | |
78 # If root is not specified, find the exactly one node with in degree 0 and | |
79 # use it. Raise an error if none are found, or more than one is. Also check | |
80 # for any nodes with in degree larger than 1, which would imply G is not a | |
81 # tree. | |
82 if root is None: | |
83 for n, deg in G.in_degree: | |
84 if deg == 0: | |
85 if root is not None: | |
86 msg = "No root specified and tree has multiple sources." | |
87 raise nx.NetworkXError(msg) | |
88 root = n | |
89 elif deg > 1: | |
90 msg = "Tree LCA only defined on trees; use DAG routine." | |
91 raise nx.NetworkXError(msg) | |
92 if root is None: | |
93 raise nx.NetworkXError("Graph contains a cycle.") | |
94 | |
95 # Iterative implementation of Tarjan's offline lca algorithm | |
96 # as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition) | |
97 uf = UnionFind() | |
98 ancestors = {} | |
99 for node in G: | |
100 ancestors[node] = uf[node] | |
101 | |
102 colors = defaultdict(bool) | |
103 for node in nx.dfs_postorder_nodes(G, root): | |
104 colors[node] = True | |
105 for v in pair_dict[node] if pairs is not None else G: | |
106 if colors[v]: | |
107 # If the user requested both directions of a pair, give it. | |
108 # Otherwise, just give one. | |
109 if pairs is not None and (node, v) in pairs: | |
110 yield (node, v), ancestors[uf[v]] | |
111 if pairs is None or (v, node) in pairs: | |
112 yield (v, node), ancestors[uf[v]] | |
113 if node != root: | |
114 parent = arbitrary_element(G.pred[node]) | |
115 uf.union(parent, node) | |
116 ancestors[uf[parent]] = parent | |
117 | |
118 | |
119 @not_implemented_for("undirected") | |
120 @not_implemented_for("multigraph") | |
121 def lowest_common_ancestor(G, node1, node2, default=None): | |
122 """Compute the lowest common ancestor of the given pair of nodes. | |
123 | |
124 Parameters | |
125 ---------- | |
126 G : NetworkX directed graph | |
127 | |
128 node1, node2 : nodes in the graph. | |
129 | |
130 default : object | |
131 Returned if no common ancestor between `node1` and `node2` | |
132 | |
133 Returns | |
134 ------- | |
135 The lowest common ancestor of node1 and node2, | |
136 or default if they have no common ancestors. | |
137 | |
138 Notes | |
139 ----- | |
140 Only defined on non-null directed acyclic graphs. | |
141 Takes n log(n) time in the size of the graph. | |
142 See `all_pairs_lowest_common_ancestor` when you have | |
143 more than one pair of nodes of interest. | |
144 | |
145 See Also | |
146 -------- | |
147 tree_all_pairs_lowest_common_ancestor | |
148 all_pairs_lowest_common_ancestor | |
149 """ | |
150 ans = list(all_pairs_lowest_common_ancestor(G, pairs=[(node1, node2)])) | |
151 if ans: | |
152 assert len(ans) == 1 | |
153 return ans[0][1] | |
154 else: | |
155 return default | |
156 | |
157 | |
158 @not_implemented_for("undirected") | |
159 @not_implemented_for("multigraph") | |
160 def all_pairs_lowest_common_ancestor(G, pairs=None): | |
161 """Compute the lowest common ancestor for pairs of nodes. | |
162 | |
163 Parameters | |
164 ---------- | |
165 G : NetworkX directed graph | |
166 | |
167 pairs : iterable of pairs of nodes, optional (default: all pairs) | |
168 The pairs of nodes of interest. | |
169 If None, will find the LCA of all pairs of nodes. | |
170 | |
171 Returns | |
172 ------- | |
173 An iterator over ((node1, node2), lca) where (node1, node2) are | |
174 the pairs specified and lca is a lowest common ancestor of the pair. | |
175 Note that for the default of all pairs in G, we consider | |
176 unordered pairs, e.g. you will not get both (b, a) and (a, b). | |
177 | |
178 Notes | |
179 ----- | |
180 Only defined on non-null directed acyclic graphs. | |
181 | |
182 Uses the $O(n^3)$ ancestor-list algorithm from: | |
183 M. A. Bender, M. Farach-Colton, G. Pemmasani, S. Skiena, P. Sumazin. | |
184 "Lowest common ancestors in trees and directed acyclic graphs." | |
185 Journal of Algorithms, 57(2): 75-94, 2005. | |
186 | |
187 See Also | |
188 -------- | |
189 tree_all_pairs_lowest_common_ancestor | |
190 lowest_common_ancestor | |
191 """ | |
192 if not nx.is_directed_acyclic_graph(G): | |
193 raise nx.NetworkXError("LCA only defined on directed acyclic graphs.") | |
194 elif len(G) == 0: | |
195 raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") | |
196 elif None in G: | |
197 raise nx.NetworkXError("None is not a valid node.") | |
198 | |
199 # The copy isn't ideal, neither is the switch-on-type, but without it users | |
200 # passing an iterable will encounter confusing errors, and itertools.tee | |
201 # does not appear to handle builtin types efficiently (IE, it materializes | |
202 # another buffer rather than just creating listoperators at the same | |
203 # offset). The Python documentation notes use of tee is unadvised when one | |
204 # is consumed before the other. | |
205 # | |
206 # This will always produce correct results and avoid unnecessary | |
207 # copies in many common cases. | |
208 # | |
209 if not isinstance(pairs, (Mapping, Set)) and pairs is not None: | |
210 pairs = set(pairs) | |
211 | |
212 # Convert G into a dag with a single root by adding a node with edges to | |
213 # all sources iff necessary. | |
214 sources = [n for n, deg in G.in_degree if deg == 0] | |
215 if len(sources) == 1: | |
216 root = sources[0] | |
217 super_root = None | |
218 else: | |
219 G = G.copy() | |
220 super_root = root = generate_unique_node() | |
221 for source in sources: | |
222 G.add_edge(root, source) | |
223 | |
224 # Start by computing a spanning tree, and the DAG of all edges not in it. | |
225 # We will then use the tree lca algorithm on the spanning tree, and use | |
226 # the DAG to figure out the set of tree queries necessary. | |
227 spanning_tree = nx.dfs_tree(G, root) | |
228 dag = nx.DiGraph( | |
229 (u, v) | |
230 for u, v in G.edges | |
231 if u not in spanning_tree or v not in spanning_tree[u] | |
232 ) | |
233 | |
234 # Ensure that both the dag and the spanning tree contains all nodes in G, | |
235 # even nodes that are disconnected in the dag. | |
236 spanning_tree.add_nodes_from(G) | |
237 dag.add_nodes_from(G) | |
238 | |
239 counter = count() | |
240 | |
241 # Necessary to handle graphs consisting of a single node and no edges. | |
242 root_distance = {root: next(counter)} | |
243 | |
244 for edge in nx.bfs_edges(spanning_tree, root): | |
245 for node in edge: | |
246 if node not in root_distance: | |
247 root_distance[node] = next(counter) | |
248 | |
249 # Index the position of all nodes in the Euler tour so we can efficiently | |
250 # sort lists and merge in tour order. | |
251 euler_tour_pos = {} | |
252 for node in nx.depth_first_search.dfs_preorder_nodes(G, root): | |
253 if node not in euler_tour_pos: | |
254 euler_tour_pos[node] = next(counter) | |
255 | |
256 # Generate the set of all nodes of interest in the pairs. | |
257 pairset = set() | |
258 if pairs is not None: | |
259 pairset = set(chain.from_iterable(pairs)) | |
260 | |
261 for n in pairset: | |
262 if n not in G: | |
263 msg = f"The node {str(n)} is not in the digraph." | |
264 raise nx.NodeNotFound(msg) | |
265 | |
266 # Generate the transitive closure over the dag (not G) of all nodes, and | |
267 # sort each node's closure set by order of first appearance in the Euler | |
268 # tour. | |
269 ancestors = {} | |
270 for v in dag: | |
271 if pairs is None or v in pairset: | |
272 my_ancestors = nx.dag.ancestors(dag, v) | |
273 my_ancestors.add(v) | |
274 ancestors[v] = sorted(my_ancestors, key=euler_tour_pos.get) | |
275 | |
276 def _compute_dag_lca_from_tree_values(tree_lca, dry_run): | |
277 """Iterate through the in-order merge for each pair of interest. | |
278 | |
279 We do this to answer the user's query, but it is also used to | |
280 avoid generating unnecessary tree entries when the user only | |
281 needs some pairs. | |
282 """ | |
283 for (node1, node2) in pairs if pairs is not None else tree_lca: | |
284 best_root_distance = None | |
285 best = None | |
286 | |
287 indices = [0, 0] | |
288 ancestors_by_index = [ancestors[node1], ancestors[node2]] | |
289 | |
290 def get_next_in_merged_lists(indices): | |
291 """Returns index of the list containing the next item | |
292 | |
293 Next order refers to the merged order. | |
294 Index can be 0 or 1 (or None if exhausted). | |
295 """ | |
296 index1, index2 = indices | |
297 if index1 >= len(ancestors[node1]) and index2 >= len(ancestors[node2]): | |
298 return None | |
299 elif index1 >= len(ancestors[node1]): | |
300 return 1 | |
301 elif index2 >= len(ancestors[node2]): | |
302 return 0 | |
303 elif ( | |
304 euler_tour_pos[ancestors[node1][index1]] | |
305 < euler_tour_pos[ancestors[node2][index2]] | |
306 ): | |
307 return 0 | |
308 else: | |
309 return 1 | |
310 | |
311 # Find the LCA by iterating through the in-order merge of the two | |
312 # nodes of interests' ancestor sets. In principle, we need to | |
313 # consider all pairs in the Cartesian product of the ancestor sets, | |
314 # but by the restricted min range query reduction we are guaranteed | |
315 # that one of the pairs of interest is adjacent in the merged list | |
316 # iff one came from each list. | |
317 i = get_next_in_merged_lists(indices) | |
318 cur = ancestors_by_index[i][indices[i]], i | |
319 while i is not None: | |
320 prev = cur | |
321 indices[i] += 1 | |
322 i = get_next_in_merged_lists(indices) | |
323 if i is not None: | |
324 cur = ancestors_by_index[i][indices[i]], i | |
325 | |
326 # Two adjacent entries must not be from the same list | |
327 # in order for their tree LCA to be considered. | |
328 if cur[1] != prev[1]: | |
329 tree_node1, tree_node2 = prev[0], cur[0] | |
330 if (tree_node1, tree_node2) in tree_lca: | |
331 ans = tree_lca[tree_node1, tree_node2] | |
332 else: | |
333 ans = tree_lca[tree_node2, tree_node1] | |
334 if not dry_run and ( | |
335 best is None or root_distance[ans] > best_root_distance | |
336 ): | |
337 best_root_distance = root_distance[ans] | |
338 best = ans | |
339 | |
340 # If the LCA is super_root, there is no LCA in the user's graph. | |
341 if not dry_run and (super_root is None or best != super_root): | |
342 yield (node1, node2), best | |
343 | |
344 # Generate the spanning tree lca for all pairs. This doesn't make sense to | |
345 # do incrementally since we are using a linear time offline algorithm for | |
346 # tree lca. | |
347 if pairs is None: | |
348 # We want all pairs so we'll need the entire tree. | |
349 tree_lca = dict(tree_all_pairs_lowest_common_ancestor(spanning_tree, root)) | |
350 else: | |
351 # We only need the merged adjacent pairs by seeing which queries the | |
352 # algorithm needs then generating them in a single pass. | |
353 tree_lca = defaultdict(int) | |
354 for _ in _compute_dag_lca_from_tree_values(tree_lca, True): | |
355 pass | |
356 | |
357 # Replace the bogus default tree values with the real ones. | |
358 for (pair, lca) in tree_all_pairs_lowest_common_ancestor( | |
359 spanning_tree, root, tree_lca | |
360 ): | |
361 tree_lca[pair] = lca | |
362 | |
363 # All precomputations complete. Now we just need to give the user the pairs | |
364 # they asked for, or all pairs if they want them all. | |
365 return _compute_dag_lca_from_tree_values(tree_lca, False) |