Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/bipartite/matching.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # This module uses material from the Wikipedia article Hopcroft--Karp algorithm | |
2 # <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on | |
3 # January 3, 2015, which is released under the Creative Commons | |
4 # Attribution-Share-Alike License 3.0 | |
5 # <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes | |
6 # pseudocode, which has been translated into the corresponding Python code. | |
7 # | |
8 # Portions of this module use code from David Eppstein's Python Algorithms and | |
9 # Data Structures (PADS) library, which is dedicated to the public domain (for | |
10 # proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>). | |
11 """Provides functions for computing maximum cardinality matchings and minimum | |
12 weight full matchings in a bipartite graph. | |
13 | |
14 If you don't care about the particular implementation of the maximum matching | |
15 algorithm, simply use the :func:`maximum_matching`. If you do care, you can | |
16 import one of the named maximum matching algorithms directly. | |
17 | |
18 For example, to find a maximum matching in the complete bipartite graph with | |
19 two vertices on the left and three vertices on the right: | |
20 | |
21 >>> G = nx.complete_bipartite_graph(2, 3) | |
22 >>> left, right = nx.bipartite.sets(G) | |
23 >>> list(left) | |
24 [0, 1] | |
25 >>> list(right) | |
26 [2, 3, 4] | |
27 >>> nx.bipartite.maximum_matching(G) | |
28 {0: 2, 1: 3, 2: 0, 3: 1} | |
29 | |
30 The dictionary returned by :func:`maximum_matching` includes a mapping for | |
31 vertices in both the left and right vertex sets. | |
32 | |
33 Similarly, :func:`minimum_weight_full_matching` produces, for a complete | |
34 weighted bipartite graph, a matching whose cardinality is the cardinality of | |
35 the smaller of the two partitions, and for which the sum of the weights of the | |
36 edges included in the matching is minimal. | |
37 | |
38 """ | |
39 import collections | |
40 import itertools | |
41 | |
42 from networkx.algorithms.bipartite.matrix import biadjacency_matrix | |
43 from networkx.algorithms.bipartite import sets as bipartite_sets | |
44 import networkx as nx | |
45 | |
46 __all__ = [ | |
47 "maximum_matching", | |
48 "hopcroft_karp_matching", | |
49 "eppstein_matching", | |
50 "to_vertex_cover", | |
51 "minimum_weight_full_matching", | |
52 ] | |
53 | |
54 INFINITY = float("inf") | |
55 | |
56 | |
57 def hopcroft_karp_matching(G, top_nodes=None): | |
58 """Returns the maximum cardinality matching of the bipartite graph `G`. | |
59 | |
60 A matching is a set of edges that do not share any nodes. A maximum | |
61 cardinality matching is a matching with the most edges possible. It | |
62 is not always unique. Finding a matching in a bipartite graph can be | |
63 treated as a networkx flow problem. | |
64 | |
65 The functions ``hopcroft_karp_matching`` and ``maximum_matching`` | |
66 are aliases of the same function. | |
67 | |
68 Parameters | |
69 ---------- | |
70 G : NetworkX graph | |
71 | |
72 Undirected bipartite graph | |
73 | |
74 top_nodes : container of nodes | |
75 | |
76 Container with all nodes in one bipartite node set. If not supplied | |
77 it will be computed. But if more than one solution exists an exception | |
78 will be raised. | |
79 | |
80 Returns | |
81 ------- | |
82 matches : dictionary | |
83 | |
84 The matching is returned as a dictionary, `matches`, such that | |
85 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched | |
86 nodes do not occur as a key in `matches`. | |
87 | |
88 Raises | |
89 ------ | |
90 AmbiguousSolution | |
91 Raised if the input bipartite graph is disconnected and no container | |
92 with all nodes in one bipartite set is provided. When determining | |
93 the nodes in each bipartite set more than one valid solution is | |
94 possible if the input graph is disconnected. | |
95 | |
96 Notes | |
97 ----- | |
98 This function is implemented with the `Hopcroft--Karp matching algorithm | |
99 <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for | |
100 bipartite graphs. | |
101 | |
102 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` | |
103 for further details on how bipartite graphs are handled in NetworkX. | |
104 | |
105 See Also | |
106 -------- | |
107 maximum_matching | |
108 hopcroft_karp_matching | |
109 eppstein_matching | |
110 | |
111 References | |
112 ---------- | |
113 .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for | |
114 Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing** | |
115 2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>. | |
116 | |
117 """ | |
118 # First we define some auxiliary search functions. | |
119 # | |
120 # If you are a human reading these auxiliary search functions, the "global" | |
121 # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined | |
122 # below the functions, so that they are initialized close to the initial | |
123 # invocation of the search functions. | |
124 def breadth_first_search(): | |
125 for v in left: | |
126 if leftmatches[v] is None: | |
127 distances[v] = 0 | |
128 queue.append(v) | |
129 else: | |
130 distances[v] = INFINITY | |
131 distances[None] = INFINITY | |
132 while queue: | |
133 v = queue.popleft() | |
134 if distances[v] < distances[None]: | |
135 for u in G[v]: | |
136 if distances[rightmatches[u]] is INFINITY: | |
137 distances[rightmatches[u]] = distances[v] + 1 | |
138 queue.append(rightmatches[u]) | |
139 return distances[None] is not INFINITY | |
140 | |
141 def depth_first_search(v): | |
142 if v is not None: | |
143 for u in G[v]: | |
144 if distances[rightmatches[u]] == distances[v] + 1: | |
145 if depth_first_search(rightmatches[u]): | |
146 rightmatches[u] = v | |
147 leftmatches[v] = u | |
148 return True | |
149 distances[v] = INFINITY | |
150 return False | |
151 return True | |
152 | |
153 # Initialize the "global" variables that maintain state during the search. | |
154 left, right = bipartite_sets(G, top_nodes) | |
155 leftmatches = {v: None for v in left} | |
156 rightmatches = {v: None for v in right} | |
157 distances = {} | |
158 queue = collections.deque() | |
159 | |
160 # Implementation note: this counter is incremented as pairs are matched but | |
161 # it is currently not used elsewhere in the computation. | |
162 num_matched_pairs = 0 | |
163 while breadth_first_search(): | |
164 for v in left: | |
165 if leftmatches[v] is None: | |
166 if depth_first_search(v): | |
167 num_matched_pairs += 1 | |
168 | |
169 # Strip the entries matched to `None`. | |
170 leftmatches = {k: v for k, v in leftmatches.items() if v is not None} | |
171 rightmatches = {k: v for k, v in rightmatches.items() if v is not None} | |
172 | |
173 # At this point, the left matches and the right matches are inverses of one | |
174 # another. In other words, | |
175 # | |
176 # leftmatches == {v, k for k, v in rightmatches.items()} | |
177 # | |
178 # Finally, we combine both the left matches and right matches. | |
179 return dict(itertools.chain(leftmatches.items(), rightmatches.items())) | |
180 | |
181 | |
182 def eppstein_matching(G, top_nodes=None): | |
183 """Returns the maximum cardinality matching of the bipartite graph `G`. | |
184 | |
185 Parameters | |
186 ---------- | |
187 G : NetworkX graph | |
188 | |
189 Undirected bipartite graph | |
190 | |
191 top_nodes : container | |
192 | |
193 Container with all nodes in one bipartite node set. If not supplied | |
194 it will be computed. But if more than one solution exists an exception | |
195 will be raised. | |
196 | |
197 Returns | |
198 ------- | |
199 matches : dictionary | |
200 | |
201 The matching is returned as a dictionary, `matching`, such that | |
202 ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched | |
203 nodes do not occur as a key in `matching`. | |
204 | |
205 Raises | |
206 ------ | |
207 AmbiguousSolution | |
208 Raised if the input bipartite graph is disconnected and no container | |
209 with all nodes in one bipartite set is provided. When determining | |
210 the nodes in each bipartite set more than one valid solution is | |
211 possible if the input graph is disconnected. | |
212 | |
213 Notes | |
214 ----- | |
215 This function is implemented with David Eppstein's version of the algorithm | |
216 Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which | |
217 originally appeared in the `Python Algorithms and Data Structures library | |
218 (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_. | |
219 | |
220 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` | |
221 for further details on how bipartite graphs are handled in NetworkX. | |
222 | |
223 See Also | |
224 -------- | |
225 | |
226 hopcroft_karp_matching | |
227 | |
228 """ | |
229 # Due to its original implementation, a directed graph is needed | |
230 # so that the two sets of bipartite nodes can be distinguished | |
231 left, right = bipartite_sets(G, top_nodes) | |
232 G = nx.DiGraph(G.edges(left)) | |
233 # initialize greedy matching (redundant, but faster than full search) | |
234 matching = {} | |
235 for u in G: | |
236 for v in G[u]: | |
237 if v not in matching: | |
238 matching[v] = u | |
239 break | |
240 while True: | |
241 # structure residual graph into layers | |
242 # pred[u] gives the neighbor in the previous layer for u in U | |
243 # preds[v] gives a list of neighbors in the previous layer for v in V | |
244 # unmatched gives a list of unmatched vertices in final layer of V, | |
245 # and is also used as a flag value for pred[u] when u is in the first | |
246 # layer | |
247 preds = {} | |
248 unmatched = [] | |
249 pred = {u: unmatched for u in G} | |
250 for v in matching: | |
251 del pred[matching[v]] | |
252 layer = list(pred) | |
253 | |
254 # repeatedly extend layering structure by another pair of layers | |
255 while layer and not unmatched: | |
256 newLayer = {} | |
257 for u in layer: | |
258 for v in G[u]: | |
259 if v not in preds: | |
260 newLayer.setdefault(v, []).append(u) | |
261 layer = [] | |
262 for v in newLayer: | |
263 preds[v] = newLayer[v] | |
264 if v in matching: | |
265 layer.append(matching[v]) | |
266 pred[matching[v]] = v | |
267 else: | |
268 unmatched.append(v) | |
269 | |
270 # did we finish layering without finding any alternating paths? | |
271 if not unmatched: | |
272 unlayered = {} | |
273 for u in G: | |
274 # TODO Why is extra inner loop necessary? | |
275 for v in G[u]: | |
276 if v not in preds: | |
277 unlayered[v] = None | |
278 # TODO Originally, this function returned a three-tuple: | |
279 # | |
280 # return (matching, list(pred), list(unlayered)) | |
281 # | |
282 # For some reason, the documentation for this function | |
283 # indicated that the second and third elements of the returned | |
284 # three-tuple would be the vertices in the left and right vertex | |
285 # sets, respectively, that are also in the maximum independent set. | |
286 # However, what I think the author meant was that the second | |
287 # element is the list of vertices that were unmatched and the third | |
288 # element was the list of vertices that were matched. Since that | |
289 # seems to be the case, they don't really need to be returned, | |
290 # since that information can be inferred from the matching | |
291 # dictionary. | |
292 | |
293 # All the matched nodes must be a key in the dictionary | |
294 for key in matching.copy(): | |
295 matching[matching[key]] = key | |
296 return matching | |
297 | |
298 # recursively search backward through layers to find alternating paths | |
299 # recursion returns true if found path, false otherwise | |
300 def recurse(v): | |
301 if v in preds: | |
302 L = preds.pop(v) | |
303 for u in L: | |
304 if u in pred: | |
305 pu = pred.pop(u) | |
306 if pu is unmatched or recurse(pu): | |
307 matching[v] = u | |
308 return True | |
309 return False | |
310 | |
311 for v in unmatched: | |
312 recurse(v) | |
313 | |
314 | |
315 def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets): | |
316 """Returns True if and only if the vertex `v` is connected to one of | |
317 the target vertices by an alternating path in `G`. | |
318 | |
319 An *alternating path* is a path in which every other edge is in the | |
320 specified maximum matching (and the remaining edges in the path are not in | |
321 the matching). An alternating path may have matched edges in the even | |
322 positions or in the odd positions, as long as the edges alternate between | |
323 'matched' and 'unmatched'. | |
324 | |
325 `G` is an undirected bipartite NetworkX graph. | |
326 | |
327 `v` is a vertex in `G`. | |
328 | |
329 `matched_edges` is a set of edges present in a maximum matching in `G`. | |
330 | |
331 `unmatched_edges` is a set of edges not present in a maximum | |
332 matching in `G`. | |
333 | |
334 `targets` is a set of vertices. | |
335 | |
336 """ | |
337 | |
338 def _alternating_dfs(u, along_matched=True): | |
339 """Returns True if and only if `u` is connected to one of the | |
340 targets by an alternating path. | |
341 | |
342 `u` is a vertex in the graph `G`. | |
343 | |
344 If `along_matched` is True, this step of the depth-first search | |
345 will continue only through edges in the given matching. Otherwise, it | |
346 will continue only through edges *not* in the given matching. | |
347 | |
348 """ | |
349 if along_matched: | |
350 edges = itertools.cycle([matched_edges, unmatched_edges]) | |
351 else: | |
352 edges = itertools.cycle([unmatched_edges, matched_edges]) | |
353 visited = set() | |
354 stack = [(u, iter(G[u]), next(edges))] | |
355 while stack: | |
356 parent, children, valid_edges = stack[-1] | |
357 try: | |
358 child = next(children) | |
359 if child not in visited: | |
360 if (parent, child) in valid_edges or (child, parent) in valid_edges: | |
361 if child in targets: | |
362 return True | |
363 visited.add(child) | |
364 stack.append((child, iter(G[child]), next(edges))) | |
365 except StopIteration: | |
366 stack.pop() | |
367 return False | |
368 | |
369 # Check for alternating paths starting with edges in the matching, then | |
370 # check for alternating paths starting with edges not in the | |
371 # matching. | |
372 return _alternating_dfs(v, along_matched=True) or _alternating_dfs( | |
373 v, along_matched=False | |
374 ) | |
375 | |
376 | |
377 def _connected_by_alternating_paths(G, matching, targets): | |
378 """Returns the set of vertices that are connected to one of the target | |
379 vertices by an alternating path in `G` or are themselves a target. | |
380 | |
381 An *alternating path* is a path in which every other edge is in the | |
382 specified maximum matching (and the remaining edges in the path are not in | |
383 the matching). An alternating path may have matched edges in the even | |
384 positions or in the odd positions, as long as the edges alternate between | |
385 'matched' and 'unmatched'. | |
386 | |
387 `G` is an undirected bipartite NetworkX graph. | |
388 | |
389 `matching` is a dictionary representing a maximum matching in `G`, as | |
390 returned by, for example, :func:`maximum_matching`. | |
391 | |
392 `targets` is a set of vertices. | |
393 | |
394 """ | |
395 # Get the set of matched edges and the set of unmatched edges. Only include | |
396 # one version of each undirected edge (for example, include edge (1, 2) but | |
397 # not edge (2, 1)). Using frozensets as an intermediary step we do not | |
398 # require nodes to be orderable. | |
399 edge_sets = {frozenset((u, v)) for u, v in matching.items()} | |
400 matched_edges = {tuple(edge) for edge in edge_sets} | |
401 unmatched_edges = { | |
402 (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets | |
403 } | |
404 | |
405 return { | |
406 v | |
407 for v in G | |
408 if v in targets | |
409 or _is_connected_by_alternating_path( | |
410 G, v, matched_edges, unmatched_edges, targets | |
411 ) | |
412 } | |
413 | |
414 | |
415 def to_vertex_cover(G, matching, top_nodes=None): | |
416 """Returns the minimum vertex cover corresponding to the given maximum | |
417 matching of the bipartite graph `G`. | |
418 | |
419 Parameters | |
420 ---------- | |
421 G : NetworkX graph | |
422 | |
423 Undirected bipartite graph | |
424 | |
425 matching : dictionary | |
426 | |
427 A dictionary whose keys are vertices in `G` and whose values are the | |
428 distinct neighbors comprising the maximum matching for `G`, as returned | |
429 by, for example, :func:`maximum_matching`. The dictionary *must* | |
430 represent the maximum matching. | |
431 | |
432 top_nodes : container | |
433 | |
434 Container with all nodes in one bipartite node set. If not supplied | |
435 it will be computed. But if more than one solution exists an exception | |
436 will be raised. | |
437 | |
438 Returns | |
439 ------- | |
440 vertex_cover : :class:`set` | |
441 | |
442 The minimum vertex cover in `G`. | |
443 | |
444 Raises | |
445 ------ | |
446 AmbiguousSolution | |
447 Raised if the input bipartite graph is disconnected and no container | |
448 with all nodes in one bipartite set is provided. When determining | |
449 the nodes in each bipartite set more than one valid solution is | |
450 possible if the input graph is disconnected. | |
451 | |
452 Notes | |
453 ----- | |
454 This function is implemented using the procedure guaranteed by `Konig's | |
455 theorem | |
456 <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_, | |
457 which proves an equivalence between a maximum matching and a minimum vertex | |
458 cover in bipartite graphs. | |
459 | |
460 Since a minimum vertex cover is the complement of a maximum independent set | |
461 for any graph, one can compute the maximum independent set of a bipartite | |
462 graph this way: | |
463 | |
464 >>> G = nx.complete_bipartite_graph(2, 3) | |
465 >>> matching = nx.bipartite.maximum_matching(G) | |
466 >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching) | |
467 >>> independent_set = set(G) - vertex_cover | |
468 >>> print(list(independent_set)) | |
469 [2, 3, 4] | |
470 | |
471 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` | |
472 for further details on how bipartite graphs are handled in NetworkX. | |
473 | |
474 """ | |
475 # This is a Python implementation of the algorithm described at | |
476 # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>. | |
477 L, R = bipartite_sets(G, top_nodes) | |
478 # Let U be the set of unmatched vertices in the left vertex set. | |
479 unmatched_vertices = set(G) - set(matching) | |
480 U = unmatched_vertices & L | |
481 # Let Z be the set of vertices that are either in U or are connected to U | |
482 # by alternating paths. | |
483 Z = _connected_by_alternating_paths(G, matching, U) | |
484 # At this point, every edge either has a right endpoint in Z or a left | |
485 # endpoint not in Z. This gives us the vertex cover. | |
486 return (L - Z) | (R & Z) | |
487 | |
488 | |
489 #: Returns the maximum cardinality matching in the given bipartite graph. | |
490 #: | |
491 #: This function is simply an alias for :func:`hopcroft_karp_matching`. | |
492 maximum_matching = hopcroft_karp_matching | |
493 | |
494 | |
495 def minimum_weight_full_matching(G, top_nodes=None, weight="weight"): | |
496 r"""Returns a minimum weight full matching of the bipartite graph `G`. | |
497 | |
498 Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights | |
499 :math:`w : E \to \mathbb{R}`. This function then produces a matching | |
500 :math:`M \subseteq E` with cardinality | |
501 | |
502 .. math:: | |
503 \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert), | |
504 | |
505 which minimizes the sum of the weights of the edges included in the | |
506 matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such | |
507 matching exists. | |
508 | |
509 When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly | |
510 referred to as a perfect matching; here, since we allow | |
511 :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we | |
512 follow Karp [1]_ and refer to the matching as *full*. | |
513 | |
514 Parameters | |
515 ---------- | |
516 G : NetworkX graph | |
517 | |
518 Undirected bipartite graph | |
519 | |
520 top_nodes : container | |
521 | |
522 Container with all nodes in one bipartite node set. If not supplied | |
523 it will be computed. | |
524 | |
525 weight : string, optional (default='weight') | |
526 | |
527 The edge data key used to provide each value in the matrix. | |
528 | |
529 Returns | |
530 ------- | |
531 matches : dictionary | |
532 | |
533 The matching is returned as a dictionary, `matches`, such that | |
534 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched | |
535 nodes do not occur as a key in `matches`. | |
536 | |
537 Raises | |
538 ------ | |
539 ValueError | |
540 Raised if no full matching exists. | |
541 | |
542 ImportError | |
543 Raised if SciPy is not available. | |
544 | |
545 Notes | |
546 ----- | |
547 The problem of determining a minimum weight full matching is also known as | |
548 the rectangular linear assignment problem. This implementation defers the | |
549 calculation of the assignment to SciPy. | |
550 | |
551 References | |
552 ---------- | |
553 .. [1] Richard Manning Karp: | |
554 An algorithm to Solve the m x n Assignment Problem in Expected Time | |
555 O(mn log n). | |
556 Networks, 10(2):143–152, 1980. | |
557 | |
558 """ | |
559 try: | |
560 import numpy as np | |
561 import scipy.optimize | |
562 except ImportError as e: | |
563 raise ImportError( | |
564 "minimum_weight_full_matching requires SciPy: " + "https://scipy.org/" | |
565 ) from e | |
566 left, right = nx.bipartite.sets(G, top_nodes) | |
567 U = list(left) | |
568 V = list(right) | |
569 # We explicitly create the biadjancency matrix having infinities | |
570 # where edges are missing (as opposed to zeros, which is what one would | |
571 # get by using toarray on the sparse matrix). | |
572 weights_sparse = biadjacency_matrix( | |
573 G, row_order=U, column_order=V, weight=weight, format="coo" | |
574 ) | |
575 weights = np.full(weights_sparse.shape, np.inf) | |
576 weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data | |
577 left_matches = scipy.optimize.linear_sum_assignment(weights) | |
578 d = {U[u]: V[v] for u, v in zip(*left_matches)} | |
579 # d will contain the matching from edges in left to right; we need to | |
580 # add the ones from right to left as well. | |
581 d.update({v: u for u, v in d.items()}) | |
582 return d |