### annotate env/lib/python3.9/site-packages/galaxy/util/topsort.py @ 0:4f3585e2f14bdraftdefaulttip

author shellac Mon, 22 Mar 2021 18:12:50 +0000
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
shellac
parents:
diff changeset
1 """
shellac
parents:
diff changeset
2 Topological sort.
shellac
parents:
diff changeset
3
shellac
parents:
diff changeset
4 From Tim Peters, see:
shellac
parents:
diff changeset
5 http://mail.python.org/pipermail/python-list/1999-July/006660.html
shellac
parents:
diff changeset
6
shellac
parents:
diff changeset
7 topsort takes a list of pairs, where each pair (x, y) is taken to
shellac
parents:
diff changeset
8 mean that x <= y wrt some abstract partial ordering. The return
shellac
parents:
diff changeset
9 value is a list, representing a total ordering that respects all
shellac
parents:
diff changeset
10 the input constraints.
shellac
parents:
diff changeset
11 E.g.,
shellac
parents:
diff changeset
12
shellac
parents:
diff changeset
13 topsort( [(1,2), (3,3)] )
shellac
parents:
diff changeset
14
shellac
parents:
diff changeset
15 Valid topological sorts would be any of (but nothing other than)
shellac
parents:
diff changeset
16
shellac
parents:
diff changeset
17 [3, 1, 2]
shellac
parents:
diff changeset
18 [1, 3, 2]
shellac
parents:
diff changeset
19 [1, 2, 3]
shellac
parents:
diff changeset
20
shellac
parents:
diff changeset
21 ... however this variant ensures that 'key' order (first element of
shellac
parents:
diff changeset
22 tuple) is preserved so the following will be result returned:
shellac
parents:
diff changeset
23
shellac
parents:
diff changeset
24 [1, 3, 2]
shellac
parents:
diff changeset
25
shellac
parents:
diff changeset
26 because those are the permutations of the input elements that
shellac
parents:
diff changeset
27 respect the "1 precedes 2" and "3 precedes 3" input constraints.
shellac
parents:
diff changeset
28 Note that a constraint of the form (x, x) is really just a trick
shellac
parents:
diff changeset
29 to make sure x appears *somewhere* in the output list.
shellac
parents:
diff changeset
30
shellac
parents:
diff changeset
31 If there's a cycle in the constraints, say
shellac
parents:
diff changeset
32
shellac
parents:
diff changeset
33 topsort( [(1,2), (2,1)] )
shellac
parents:
diff changeset
34
shellac
parents:
diff changeset
35 then CycleError is raised, and the exception object supports
shellac
parents:
diff changeset
36 many methods to help analyze and break the cycles. This requires
shellac
parents:
diff changeset
37 a good deal more code than topsort itself!
shellac
parents:
diff changeset
38 """
shellac
parents:
diff changeset
39
shellac
parents:
diff changeset
40
shellac
parents:
diff changeset
41 class CycleError(Exception):
shellac
parents:
diff changeset
42 def __init__(self, sofar, numpreds, succs):
shellac
parents:
diff changeset
43 Exception.__init__(self, "cycle in constraints",
shellac
parents:
diff changeset
44 sofar, numpreds, succs)
shellac
parents:
diff changeset
45 self.preds = None
shellac
parents:
diff changeset
46
shellac
parents:
diff changeset
47 # return as much of the total ordering as topsort was able to
shellac
parents:
diff changeset
48 # find before it hit a cycle
shellac
parents:
diff changeset
49 def get_partial(self):
shellac
parents:
diff changeset
50 return self[1]
shellac
parents:
diff changeset
51
shellac
parents:
diff changeset
52 # return remaining elt -> count of predecessors map
shellac
parents:
diff changeset
53 def get_pred_counts(self):
shellac
parents:
diff changeset
54 return self[2]
shellac
parents:
diff changeset
55
shellac
parents:
diff changeset
56 # return remaining elt -> list of successors map
shellac
parents:
diff changeset
57 def get_succs(self):
shellac
parents:
diff changeset
58 return self[3]
shellac
parents:
diff changeset
59
shellac
parents:
diff changeset
60 # return remaining elements (== those that don't appear in
shellac
parents:
diff changeset
61 # get_partial())
shellac
parents:
diff changeset
62 def get_elements(self):
shellac
parents:
diff changeset
63 return self.get_pred_counts().keys()
shellac
parents:
diff changeset
64
shellac
parents:
diff changeset
65 # Return a list of pairs representing the full state of what's
shellac
parents:
diff changeset
shellac
parents:
diff changeset
67 # CycleError again, and if you invoke get_pairlist on *that*
shellac
parents:
diff changeset
68 # exception object, the result will be isomorphic to *this*
shellac
parents:
diff changeset
69 # invocation of get_pairlist).
shellac
parents:
diff changeset
70 # The idea is that you can use pick_a_cycle to find a cycle,
shellac
parents:
diff changeset
71 # through some means or another pick an (x,y) pair in the cycle
shellac
parents:
diff changeset
72 # you no longer want to respect, then remove that pair from the
shellac
parents:
diff changeset
73 # output of get_pairlist and try topsort again.
shellac
parents:
diff changeset
74 def get_pairlist(self):
shellac
parents:
diff changeset
75 succs = self.get_succs()
shellac
parents:
diff changeset
shellac
parents:
diff changeset
77 for x in self.get_elements():
shellac
parents:
diff changeset
78 if x in succs:
shellac
parents:
diff changeset
79 for y in succs[x]:
shellac
parents:
diff changeset
shellac
parents:
diff changeset
81 else:
shellac
parents:
diff changeset
82 # make sure x appears in topsort's output!
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
85
shellac
parents:
diff changeset
86 # return remaining elt -> list of predecessors map
shellac
parents:
diff changeset
87 def get_preds(self):
shellac
parents:
diff changeset
88 if self.preds is not None:
shellac
parents:
diff changeset
89 return self.preds
shellac
parents:
diff changeset
90 self.preds = preds = {}
shellac
parents:
diff changeset
91 remaining_elts = self.get_elements()
shellac
parents:
diff changeset
92 for x in remaining_elts:
shellac
parents:
diff changeset
93 preds[x] = []
shellac
parents:
diff changeset
94 succs = self.get_succs()
shellac
parents:
diff changeset
95
shellac
parents:
diff changeset
96 for x in remaining_elts:
shellac
parents:
diff changeset
97 if x in succs:
shellac
parents:
diff changeset
98 for y in succs[x]:
shellac
parents:
diff changeset
99 preds[y].append(x)
shellac
parents:
diff changeset
100
shellac
parents:
diff changeset
101 if __debug__:
shellac
parents:
diff changeset
102 for x in remaining_elts:
shellac
parents:
diff changeset
103 assert len(preds[x]) > 0
shellac
parents:
diff changeset
104 return preds
shellac
parents:
diff changeset
105
shellac
parents:
diff changeset
106 # return a cycle [x, ..., x] at random
shellac
parents:
diff changeset
107 def pick_a_cycle(self):
shellac
parents:
diff changeset
108 remaining_elts = self.get_elements()
shellac
parents:
diff changeset
109
shellac
parents:
diff changeset
110 # We know that everything in remaining_elts has a predecessor,
shellac
parents:
diff changeset
111 # but don't know that everything in it has a successor. So
shellac
parents:
diff changeset
112 # crawling forward over succs may hit a dead end. Instead we
shellac
parents:
diff changeset
113 # crawl backward over the preds until we hit a duplicate, then
shellac
parents:
diff changeset
114 # reverse the path.
shellac
parents:
diff changeset
115 preds = self.get_preds()
shellac
parents:
diff changeset
116 from random import choice
shellac
parents:
diff changeset
117 x = choice(remaining_elts)
shellac
parents:
diff changeset
shellac
parents:
diff changeset
119 index = {}
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
124 x = choice(preds[x])
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
129
shellac
parents:
diff changeset
130
shellac
parents:
diff changeset
131 def _numpreds_and_successors_from_pairlist(pairlist):
shellac
parents:
diff changeset
132 numpreds = {} # elt -> # of predecessors
shellac
parents:
diff changeset
133 successors = {} # elt -> list of successors
shellac
parents:
diff changeset
134 for first, second in pairlist:
shellac
parents:
diff changeset
135 # make sure every elt is a key in numpreds
shellac
parents:
diff changeset
136 if first not in numpreds:
shellac
parents:
diff changeset
137 numpreds[first] = 0
shellac
parents:
diff changeset
138 if second not in numpreds:
shellac
parents:
diff changeset
139 numpreds[second] = 0
shellac
parents:
diff changeset
140
shellac
parents:
diff changeset
141 # if they're the same, there's no real dependence
shellac
parents:
diff changeset
142 if first == second:
shellac
parents:
diff changeset
143 continue
shellac
parents:
diff changeset
144
shellac
parents:
diff changeset
145 # since first < second, second gains a pred ...
shellac
parents:
diff changeset
146 numpreds[second] = numpreds[second] + 1
shellac
parents:
diff changeset
147
shellac
parents:
diff changeset
148 # ... and first gains a succ
shellac
parents:
diff changeset
149 if first in successors:
shellac
parents:
diff changeset
150 successors[first].append(second)
shellac
parents:
diff changeset
151 else:
shellac
parents:
diff changeset
152 successors[first] = [second]
shellac
parents:
diff changeset
153 return numpreds, successors
shellac
parents:
diff changeset
154
shellac
parents:
diff changeset
155
shellac
parents:
diff changeset
156 def topsort(pairlist):
shellac
parents:
diff changeset
157 numpreds, successors = _numpreds_and_successors_from_pairlist(pairlist)
shellac
parents:
diff changeset
158
shellac
parents:
diff changeset
159 # suck up everything without a predecessor
shellac
parents:
diff changeset
160 answer = [x for x in numpreds.keys() if numpreds[x] == 0]
shellac
parents:
diff changeset
161
shellac
parents:
diff changeset
162 # for everything in answer, knock down the pred count on
shellac
parents:
diff changeset
163 # its successors; note that answer grows *in* the loop
shellac
parents:
diff changeset
shellac
parents:
diff changeset
165 assert numpreds[x] == 0
shellac
parents:
diff changeset
166 del numpreds[x]
shellac
parents:
diff changeset
167 if x in successors:
shellac
parents:
diff changeset
168 for y in successors[x]:
shellac
parents:
diff changeset
169 numpreds[y] = numpreds[y] - 1
shellac
parents:
diff changeset
170 if numpreds[y] == 0:
shellac
parents:
diff changeset
shellac
parents:
diff changeset
172 # following "del" isn't needed; just makes
shellac
parents:
diff changeset
173 # CycleError details easier to grasp
shellac
parents:
diff changeset
174 del successors[x]
shellac
parents:
diff changeset
175
shellac
parents:
diff changeset
176 if numpreds:
shellac
parents:
diff changeset
177 # everything in numpreds has at least one predecessor ->
shellac
parents:
diff changeset
178 # there's a cycle
shellac
parents:
diff changeset
179 if __debug__:
shellac
parents:
diff changeset
180 for x in numpreds.keys():
shellac
parents:
diff changeset
181 assert numpreds[x] > 0
shellac
parents:
diff changeset
shellac
parents:
diff changeset
shellac
parents:
diff changeset
184
shellac
parents:
diff changeset
185
shellac
parents:
diff changeset
186 def topsort_levels(pairlist):
shellac
parents:
diff changeset
187 numpreds, successors = _numpreds_and_successors_from_pairlist(pairlist)
shellac
parents:
diff changeset
188
shellac
parents:
diff changeset
shellac
parents:
diff changeset
190
shellac
parents:
diff changeset
191 while 1:
shellac
parents:
diff changeset
192 # Suck up everything without a predecessor.
shellac
parents:
diff changeset
193 levparents = [x for x in numpreds.keys() if numpreds[x] == 0]
shellac
parents:
diff changeset
194 if not levparents:
shellac
parents:
diff changeset
195 break
shellac
parents:
diff changeset
shellac
parents:
diff changeset
197 for levparent in levparents:
shellac
parents:
diff changeset
198 del numpreds[levparent]
shellac
parents:
diff changeset
199 if levparent in successors:
shellac
parents:
diff changeset
200 for levparentsucc in successors[levparent]:
shellac
parents:
diff changeset
201 numpreds[levparentsucc] -= 1
shellac
parents:
diff changeset
202 del successors[levparent]
shellac
parents:
diff changeset
203
shellac
parents:
diff changeset
204 if numpreds:
shellac
parents:
diff changeset
205 # Everything in num_parents has at least one child ->
shellac
parents:
diff changeset
206 # there's a cycle.
shellac
parents:
diff changeset