comparison env/lib/python3.9/site-packages/networkx/algorithms/tree/branchings.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 Algorithms for finding optimum branchings and spanning arborescences.
3
4 This implementation is based on:
5
6 J. Edmonds, Optimum branchings, J. Res. Natl. Bur. Standards 71B (1967),
7 233–240. URL: http://archive.org/details/jresv71Bn4p233
8
9 """
10 # TODO: Implement method from Gabow, Galil, Spence and Tarjan:
11 #
12 # @article{
13 # year={1986},
14 # issn={0209-9683},
15 # journal={Combinatorica},
16 # volume={6},
17 # number={2},
18 # doi={10.1007/BF02579168},
19 # title={Efficient algorithms for finding minimum spanning trees in
20 # undirected and directed graphs},
21 # url={https://doi.org/10.1007/BF02579168},
22 # publisher={Springer-Verlag},
23 # keywords={68 B 15; 68 C 05},
24 # author={Gabow, Harold N. and Galil, Zvi and Spencer, Thomas and Tarjan,
25 # Robert E.},
26 # pages={109-122},
27 # language={English}
28 # }
29
30
31 import string
32 from operator import itemgetter
33
34 import networkx as nx
35 from networkx.utils import py_random_state
36
37 from .recognition import is_arborescence, is_branching
38
39
40 __all__ = [
41 "branching_weight",
42 "greedy_branching",
43 "maximum_branching",
44 "minimum_branching",
45 "maximum_spanning_arborescence",
46 "minimum_spanning_arborescence",
47 "Edmonds",
48 ]
49
50 KINDS = {"max", "min"}
51
52 STYLES = {
53 "branching": "branching",
54 "arborescence": "arborescence",
55 "spanning arborescence": "arborescence",
56 }
57
58 INF = float("inf")
59
60
61 @py_random_state(1)
62 def random_string(L=15, seed=None):
63 return "".join([seed.choice(string.ascii_letters) for n in range(L)])
64
65
66 def _min_weight(weight):
67 return -weight
68
69
70 def _max_weight(weight):
71 return weight
72
73
74 def branching_weight(G, attr="weight", default=1):
75 """
76 Returns the total weight of a branching.
77
78 """
79 return sum(edge[2].get(attr, default) for edge in G.edges(data=True))
80
81
82 @py_random_state(4)
83 def greedy_branching(G, attr="weight", default=1, kind="max", seed=None):
84 """
85 Returns a branching obtained through a greedy algorithm.
86
87 This algorithm is wrong, and cannot give a proper optimal branching.
88 However, we include it for pedagogical reasons, as it can be helpful to
89 see what its outputs are.
90
91 The output is a branching, and possibly, a spanning arborescence. However,
92 it is not guaranteed to be optimal in either case.
93
94 Parameters
95 ----------
96 G : DiGraph
97 The directed graph to scan.
98 attr : str
99 The attribute to use as weights. If None, then each edge will be
100 treated equally with a weight of 1.
101 default : float
102 When `attr` is not None, then if an edge does not have that attribute,
103 `default` specifies what value it should take.
104 kind : str
105 The type of optimum to search for: 'min' or 'max' greedy branching.
106 seed : integer, random_state, or None (default)
107 Indicator of random number generation state.
108 See :ref:`Randomness<randomness>`.
109
110 Returns
111 -------
112 B : directed graph
113 The greedily obtained branching.
114
115 """
116 if kind not in KINDS:
117 raise nx.NetworkXException("Unknown value for `kind`.")
118
119 if kind == "min":
120 reverse = False
121 else:
122 reverse = True
123
124 if attr is None:
125 # Generate a random string the graph probably won't have.
126 attr = random_string(seed=seed)
127
128 edges = [(u, v, data.get(attr, default)) for (u, v, data) in G.edges(data=True)]
129
130 # We sort by weight, but also by nodes to normalize behavior across runs.
131 try:
132 edges.sort(key=itemgetter(2, 0, 1), reverse=reverse)
133 except TypeError:
134 # This will fail in Python 3.x if the nodes are of varying types.
135 # In that case, we use the arbitrary order.
136 edges.sort(key=itemgetter(2), reverse=reverse)
137
138 # The branching begins with a forest of no edges.
139 B = nx.DiGraph()
140 B.add_nodes_from(G)
141
142 # Now we add edges greedily so long we maintain the branching.
143 uf = nx.utils.UnionFind()
144 for i, (u, v, w) in enumerate(edges):
145 if uf[u] == uf[v]:
146 # Adding this edge would form a directed cycle.
147 continue
148 elif B.in_degree(v) == 1:
149 # The edge would increase the degree to be greater than one.
150 continue
151 else:
152 # If attr was None, then don't insert weights...
153 data = {}
154 if attr is not None:
155 data[attr] = w
156 B.add_edge(u, v, **data)
157 uf.union(u, v)
158
159 return B
160
161
162 class MultiDiGraph_EdgeKey(nx.MultiDiGraph):
163 """
164 MultiDiGraph which assigns unique keys to every edge.
165
166 Adds a dictionary edge_index which maps edge keys to (u, v, data) tuples.
167
168 This is not a complete implementation. For Edmonds algorithm, we only use
169 add_node and add_edge, so that is all that is implemented here. During
170 additions, any specified keys are ignored---this means that you also
171 cannot update edge attributes through add_node and add_edge.
172
173 Why do we need this? Edmonds algorithm requires that we track edges, even
174 as we change the head and tail of an edge, and even changing the weight
175 of edges. We must reliably track edges across graph mutations.
176
177 """
178
179 def __init__(self, incoming_graph_data=None, **attr):
180 cls = super()
181 cls.__init__(incoming_graph_data=incoming_graph_data, **attr)
182
183 self._cls = cls
184 self.edge_index = {}
185
186 def remove_node(self, n):
187 keys = set()
188 for keydict in self.pred[n].values():
189 keys.update(keydict)
190 for keydict in self.succ[n].values():
191 keys.update(keydict)
192
193 for key in keys:
194 del self.edge_index[key]
195
196 self._cls.remove_node(n)
197
198 def remove_nodes_from(self, nbunch):
199 for n in nbunch:
200 self.remove_node(n)
201
202 def add_edge(self, u_for_edge, v_for_edge, key_for_edge, **attr):
203 """
204 Key is now required.
205
206 """
207 u, v, key = u_for_edge, v_for_edge, key_for_edge
208 if key in self.edge_index:
209 uu, vv, _ = self.edge_index[key]
210 if (u != uu) or (v != vv):
211 raise Exception(f"Key {key!r} is already in use.")
212
213 self._cls.add_edge(u, v, key, **attr)
214 self.edge_index[key] = (u, v, self.succ[u][v][key])
215
216 def add_edges_from(self, ebunch_to_add, **attr):
217 for u, v, k, d in ebunch_to_add:
218 self.add_edge(u, v, k, **d)
219
220 def remove_edge_with_key(self, key):
221 try:
222 u, v, _ = self.edge_index[key]
223 except KeyError as e:
224 raise KeyError(f"Invalid edge key {key!r}") from e
225 else:
226 del self.edge_index[key]
227 self._cls.remove_edge(u, v, key)
228
229 def remove_edges_from(self, ebunch):
230 raise NotImplementedError
231
232
233 def get_path(G, u, v):
234 """
235 Returns the edge keys of the unique path between u and v.
236
237 This is not a generic function. G must be a branching and an instance of
238 MultiDiGraph_EdgeKey.
239
240 """
241 nodes = nx.shortest_path(G, u, v)
242 # We are guaranteed that there is only one edge connected every node
243 # in the shortest path.
244
245 def first_key(i, vv):
246 # Needed for 2.x/3.x compatibilitity
247 keys = G[nodes[i]][vv].keys()
248 # Normalize behavior
249 keys = list(keys)
250 return keys[0]
251
252 edges = [first_key(i, vv) for i, vv in enumerate(nodes[1:])]
253 return nodes, edges
254
255
256 class Edmonds:
257 """
258 Edmonds algorithm for finding optimal branchings and spanning arborescences.
259
260 """
261
262 def __init__(self, G, seed=None):
263 self.G_original = G
264
265 # Need to fix this. We need the whole tree.
266 self.store = True
267
268 # The final answer.
269 self.edges = []
270
271 # Since we will be creating graphs with new nodes, we need to make
272 # sure that our node names do not conflict with the real node names.
273 self.template = random_string(seed=seed) + "_{0}"
274
275 def _init(self, attr, default, kind, style, preserve_attrs, seed):
276 if kind not in KINDS:
277 raise nx.NetworkXException("Unknown value for `kind`.")
278
279 # Store inputs.
280 self.attr = attr
281 self.default = default
282 self.kind = kind
283 self.style = style
284
285 # Determine how we are going to transform the weights.
286 if kind == "min":
287 self.trans = trans = _min_weight
288 else:
289 self.trans = trans = _max_weight
290
291 if attr is None:
292 # Generate a random attr the graph probably won't have.
293 attr = random_string(seed=seed)
294
295 # This is the actual attribute used by the algorithm.
296 self._attr = attr
297
298 # This attribute is used to store whether a particular edge is still
299 # a candidate. We generate a random attr to remove clashes with
300 # preserved edges
301 self.candidate_attr = "candidate_" + random_string(seed=seed)
302
303 # The object we manipulate at each step is a multidigraph.
304 self.G = G = MultiDiGraph_EdgeKey()
305 for key, (u, v, data) in enumerate(self.G_original.edges(data=True)):
306 d = {attr: trans(data.get(attr, default))}
307
308 if preserve_attrs:
309 for (d_k, d_v) in data.items():
310 if d_k != attr:
311 d[d_k] = d_v
312
313 G.add_edge(u, v, key, **d)
314
315 self.level = 0
316
317 # These are the "buckets" from the paper.
318 #
319 # As in the paper, G^i are modified versions of the original graph.
320 # D^i and E^i are nodes and edges of the maximal edges that are
321 # consistent with G^i. These are dashed edges in figures A-F of the
322 # paper. In this implementation, we store D^i and E^i together as a
323 # graph B^i. So we will have strictly more B^i than the paper does.
324 self.B = MultiDiGraph_EdgeKey()
325 self.B.edge_index = {}
326 self.graphs = [] # G^i
327 self.branchings = [] # B^i
328 self.uf = nx.utils.UnionFind()
329
330 # A list of lists of edge indexes. Each list is a circuit for graph G^i.
331 # Note the edge list will not, in general, be a circuit in graph G^0.
332 self.circuits = []
333 # Stores the index of the minimum edge in the circuit found in G^i and B^i.
334 # The ordering of the edges seems to preserve the weight ordering from G^0.
335 # So even if the circuit does not form a circuit in G^0, it is still true
336 # that the minimum edge of the circuit in G^i is still the minimum edge
337 # in circuit G^0 (depsite their weights being different).
338 self.minedge_circuit = []
339
340 def find_optimum(
341 self,
342 attr="weight",
343 default=1,
344 kind="max",
345 style="branching",
346 preserve_attrs=False,
347 seed=None,
348 ):
349 """
350 Returns a branching from G.
351
352 Parameters
353 ----------
354 attr : str
355 The edge attribute used to in determining optimality.
356 default : float
357 The value of the edge attribute used if an edge does not have
358 the attribute `attr`.
359 kind : {'min', 'max'}
360 The type of optimum to search for, either 'min' or 'max'.
361 style : {'branching', 'arborescence'}
362 If 'branching', then an optimal branching is found. If `style` is
363 'arborescence', then a branching is found, such that if the
364 branching is also an arborescence, then the branching is an
365 optimal spanning arborescences. A given graph G need not have
366 an optimal spanning arborescence.
367 preserve_attrs : bool
368 If True, preserve the other edge attributes of the original
369 graph (that are not the one passed to `attr`)
370 seed : integer, random_state, or None (default)
371 Indicator of random number generation state.
372 See :ref:`Randomness<randomness>`.
373
374 Returns
375 -------
376 H : (multi)digraph
377 The branching.
378
379 """
380 self._init(attr, default, kind, style, preserve_attrs, seed)
381 uf = self.uf
382
383 # This enormous while loop could use some refactoring...
384
385 G, B = self.G, self.B
386 D = set()
387 nodes = iter(list(G.nodes()))
388 attr = self._attr
389 G_pred = G.pred
390
391 def desired_edge(v):
392 """
393 Find the edge directed toward v with maximal weight.
394
395 """
396 edge = None
397 weight = -INF
398 for u, _, key, data in G.in_edges(v, data=True, keys=True):
399 new_weight = data[attr]
400 if new_weight > weight:
401 weight = new_weight
402 edge = (u, v, key, new_weight)
403
404 return edge, weight
405
406 while True:
407 # (I1): Choose a node v in G^i not in D^i.
408 try:
409 v = next(nodes)
410 except StopIteration:
411 # If there are no more new nodes to consider, then we *should*
412 # meet the break condition (b) from the paper:
413 # (b) every node of G^i is in D^i and E^i is a branching
414 # Construction guarantees that it's a branching.
415 assert len(G) == len(B)
416 if len(B):
417 assert is_branching(B)
418
419 if self.store:
420 self.graphs.append(G.copy())
421 self.branchings.append(B.copy())
422
423 # Add these to keep the lengths equal. Element i is the
424 # circuit at level i that was merged to form branching i+1.
425 # There is no circuit for the last level.
426 self.circuits.append([])
427 self.minedge_circuit.append(None)
428 break
429 else:
430 if v in D:
431 # print("v in D", v)
432 continue
433
434 # Put v into bucket D^i.
435 # print(f"Adding node {v}")
436 D.add(v)
437 B.add_node(v)
438
439 edge, weight = desired_edge(v)
440 # print(f"Max edge is {edge!r}")
441 if edge is None:
442 # If there is no edge, continue with a new node at (I1).
443 continue
444 else:
445 # Determine if adding the edge to E^i would mean its no longer
446 # a branching. Presently, v has indegree 0 in B---it is a root.
447 u = edge[0]
448
449 if uf[u] == uf[v]:
450 # Then adding the edge will create a circuit. Then B
451 # contains a unique path P from v to u. So condition (a)
452 # from the paper does hold. We need to store the circuit
453 # for future reference.
454 Q_nodes, Q_edges = get_path(B, v, u)
455 Q_edges.append(edge[2])
456 else:
457 # Then B with the edge is still a branching and condition
458 # (a) from the paper does not hold.
459 Q_nodes, Q_edges = None, None
460
461 # Conditions for adding the edge.
462 # If weight < 0, then it cannot help in finding a maximum branching.
463 if self.style == "branching" and weight <= 0:
464 acceptable = False
465 else:
466 acceptable = True
467
468 # print(f"Edge is acceptable: {acceptable}")
469 if acceptable:
470 dd = {attr: weight}
471 B.add_edge(u, v, edge[2], **dd)
472 G[u][v][edge[2]][self.candidate_attr] = True
473 uf.union(u, v)
474 if Q_edges is not None:
475 # print("Edge introduced a simple cycle:")
476 # print(Q_nodes, Q_edges)
477
478 # Move to method
479 # Previous meaning of u and v is no longer important.
480
481 # Apply (I2).
482 # Get the edge in the cycle with the minimum weight.
483 # Also, save the incoming weights for each node.
484 minweight = INF
485 minedge = None
486 Q_incoming_weight = {}
487 for edge_key in Q_edges:
488 u, v, data = B.edge_index[edge_key]
489 w = data[attr]
490 Q_incoming_weight[v] = w
491 if w < minweight:
492 minweight = w
493 minedge = edge_key
494
495 self.circuits.append(Q_edges)
496 self.minedge_circuit.append(minedge)
497
498 if self.store:
499 self.graphs.append(G.copy())
500 # Always need the branching with circuits.
501 self.branchings.append(B.copy())
502
503 # Now we mutate it.
504 new_node = self.template.format(self.level)
505
506 # print(minweight, minedge, Q_incoming_weight)
507
508 G.add_node(new_node)
509 new_edges = []
510 for u, v, key, data in G.edges(data=True, keys=True):
511 if u in Q_incoming_weight:
512 if v in Q_incoming_weight:
513 # Circuit edge, do nothing for now.
514 # Eventually delete it.
515 continue
516 else:
517 # Outgoing edge. Make it from new node
518 dd = data.copy()
519 new_edges.append((new_node, v, key, dd))
520 else:
521 if v in Q_incoming_weight:
522 # Incoming edge. Change its weight
523 w = data[attr]
524 w += minweight - Q_incoming_weight[v]
525 dd = data.copy()
526 dd[attr] = w
527 new_edges.append((u, new_node, key, dd))
528 else:
529 # Outside edge. No modification necessary.
530 continue
531
532 G.remove_nodes_from(Q_nodes)
533 B.remove_nodes_from(Q_nodes)
534 D.difference_update(set(Q_nodes))
535
536 for u, v, key, data in new_edges:
537 G.add_edge(u, v, key, **data)
538 if self.candidate_attr in data:
539 del data[self.candidate_attr]
540 B.add_edge(u, v, key, **data)
541 uf.union(u, v)
542
543 nodes = iter(list(G.nodes()))
544 self.level += 1
545
546 # (I3) Branch construction.
547 # print(self.level)
548 H = self.G_original.__class__()
549
550 def is_root(G, u, edgekeys):
551 """
552 Returns True if `u` is a root node in G.
553
554 Node `u` will be a root node if its in-degree, restricted to the
555 specified edges, is equal to 0.
556
557 """
558 if u not in G:
559 # print(G.nodes(), u)
560 raise Exception(f"{u!r} not in G")
561 for v in G.pred[u]:
562 for edgekey in G.pred[u][v]:
563 if edgekey in edgekeys:
564 return False, edgekey
565 else:
566 return True, None
567
568 # Start with the branching edges in the last level.
569 edges = set(self.branchings[self.level].edge_index)
570 while self.level > 0:
571 self.level -= 1
572
573 # The current level is i, and we start counting from 0.
574
575 # We need the node at level i+1 that results from merging a circuit
576 # at level i. randomname_0 is the first merged node and this
577 # happens at level 1. That is, randomname_0 is a node at level 1
578 # that results from merging a circuit at level 0.
579 merged_node = self.template.format(self.level)
580
581 # The circuit at level i that was merged as a node the graph
582 # at level i+1.
583 circuit = self.circuits[self.level]
584 # print
585 # print(merged_node, self.level, circuit)
586 # print("before", edges)
587 # Note, we ask if it is a root in the full graph, not the branching.
588 # The branching alone doesn't have all the edges.
589
590 isroot, edgekey = is_root(self.graphs[self.level + 1], merged_node, edges)
591 edges.update(circuit)
592 if isroot:
593 minedge = self.minedge_circuit[self.level]
594 if minedge is None:
595 raise Exception
596
597 # Remove the edge in the cycle with minimum weight.
598 edges.remove(minedge)
599 else:
600 # We have identified an edge at next higher level that
601 # transitions into the merged node at the level. That edge
602 # transitions to some corresponding node at the current level.
603 # We want to remove an edge from the cycle that transitions
604 # into the corresponding node.
605 # print("edgekey is: ", edgekey)
606 # print("circuit is: ", circuit)
607 # The branching at level i
608 G = self.graphs[self.level]
609 # print(G.edge_index)
610 target = G.edge_index[edgekey][1]
611 for edgekey in circuit:
612 u, v, data = G.edge_index[edgekey]
613 if v == target:
614 break
615 else:
616 raise Exception("Couldn't find edge incoming to merged node.")
617 # print(f"not a root. removing {edgekey}")
618
619 edges.remove(edgekey)
620
621 self.edges = edges
622
623 H.add_nodes_from(self.G_original)
624 for edgekey in edges:
625 u, v, d = self.graphs[0].edge_index[edgekey]
626 dd = {self.attr: self.trans(d[self.attr])}
627
628 # Optionally, preserve the other edge attributes of the original
629 # graph
630 if preserve_attrs:
631 for (key, value) in d.items():
632 if key not in [self.attr, self.candidate_attr]:
633 dd[key] = value
634
635 # TODO: make this preserve the key.
636 H.add_edge(u, v, **dd)
637
638 return H
639
640
641 def maximum_branching(G, attr="weight", default=1, preserve_attrs=False):
642 ed = Edmonds(G)
643 B = ed.find_optimum(
644 attr, default, kind="max", style="branching", preserve_attrs=preserve_attrs
645 )
646 return B
647
648
649 def minimum_branching(G, attr="weight", default=1, preserve_attrs=False):
650 ed = Edmonds(G)
651 B = ed.find_optimum(
652 attr, default, kind="min", style="branching", preserve_attrs=preserve_attrs
653 )
654 return B
655
656
657 def maximum_spanning_arborescence(G, attr="weight", default=1, preserve_attrs=False):
658 ed = Edmonds(G)
659 B = ed.find_optimum(
660 attr, default, kind="max", style="arborescence", preserve_attrs=preserve_attrs
661 )
662 if not is_arborescence(B):
663 msg = "No maximum spanning arborescence in G."
664 raise nx.exception.NetworkXException(msg)
665 return B
666
667
668 def minimum_spanning_arborescence(G, attr="weight", default=1, preserve_attrs=False):
669 ed = Edmonds(G)
670 B = ed.find_optimum(
671 attr, default, kind="min", style="arborescence", preserve_attrs=preserve_attrs
672 )
673 if not is_arborescence(B):
674 msg = "No minimum spanning arborescence in G."
675 raise nx.exception.NetworkXException(msg)
676 return B
677
678
679 docstring_branching = """
680 Returns a {kind} {style} from G.
681
682 Parameters
683 ----------
684 G : (multi)digraph-like
685 The graph to be searched.
686 attr : str
687 The edge attribute used to in determining optimality.
688 default : float
689 The value of the edge attribute used if an edge does not have
690 the attribute `attr`.
691 preserve_attrs : bool
692 If True, preserve the other attributes of the original graph (that are not
693 passed to `attr`)
694
695 Returns
696 -------
697 B : (multi)digraph-like
698 A {kind} {style}.
699 """
700
701 docstring_arborescence = (
702 docstring_branching
703 + """
704 Raises
705 ------
706 NetworkXException
707 If the graph does not contain a {kind} {style}.
708
709 """
710 )
711
712 maximum_branching.__doc__ = docstring_branching.format(
713 kind="maximum", style="branching"
714 )
715
716 minimum_branching.__doc__ = docstring_branching.format(
717 kind="minimum", style="branching"
718 )
719
720 maximum_spanning_arborescence.__doc__ = docstring_arborescence.format(
721 kind="maximum", style="spanning arborescence"
722 )
723
724 minimum_spanning_arborescence.__doc__ = docstring_arborescence.format(
725 kind="minimum", style="spanning arborescence"
726 )