comparison env/lib/python3.9/site-packages/networkx/algorithms/bipartite/matching.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # This module uses material from the Wikipedia article Hopcroft--Karp algorithm
2 # <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on
3 # January 3, 2015, which is released under the Creative Commons
4 # Attribution-Share-Alike License 3.0
5 # <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes
6 # pseudocode, which has been translated into the corresponding Python code.
7 #
8 # Portions of this module use code from David Eppstein's Python Algorithms and
9 # Data Structures (PADS) library, which is dedicated to the public domain (for
10 # proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>).
11 """Provides functions for computing maximum cardinality matchings and minimum
12 weight full matchings in a bipartite graph.
13
14 If you don't care about the particular implementation of the maximum matching
15 algorithm, simply use the :func:`maximum_matching`. If you do care, you can
16 import one of the named maximum matching algorithms directly.
17
18 For example, to find a maximum matching in the complete bipartite graph with
19 two vertices on the left and three vertices on the right:
20
21 >>> G = nx.complete_bipartite_graph(2, 3)
22 >>> left, right = nx.bipartite.sets(G)
23 >>> list(left)
24 [0, 1]
25 >>> list(right)
26 [2, 3, 4]
27 >>> nx.bipartite.maximum_matching(G)
28 {0: 2, 1: 3, 2: 0, 3: 1}
29
30 The dictionary returned by :func:`maximum_matching` includes a mapping for
31 vertices in both the left and right vertex sets.
32
33 Similarly, :func:`minimum_weight_full_matching` produces, for a complete
34 weighted bipartite graph, a matching whose cardinality is the cardinality of
35 the smaller of the two partitions, and for which the sum of the weights of the
36 edges included in the matching is minimal.
37
38 """
39 import collections
40 import itertools
41
42 from networkx.algorithms.bipartite.matrix import biadjacency_matrix
43 from networkx.algorithms.bipartite import sets as bipartite_sets
44 import networkx as nx
45
46 __all__ = [
47 "maximum_matching",
48 "hopcroft_karp_matching",
49 "eppstein_matching",
50 "to_vertex_cover",
51 "minimum_weight_full_matching",
52 ]
53
54 INFINITY = float("inf")
55
56
57 def hopcroft_karp_matching(G, top_nodes=None):
58 """Returns the maximum cardinality matching of the bipartite graph `G`.
59
60 A matching is a set of edges that do not share any nodes. A maximum
61 cardinality matching is a matching with the most edges possible. It
62 is not always unique. Finding a matching in a bipartite graph can be
63 treated as a networkx flow problem.
64
65 The functions ``hopcroft_karp_matching`` and ``maximum_matching``
66 are aliases of the same function.
67
68 Parameters
69 ----------
70 G : NetworkX graph
71
72 Undirected bipartite graph
73
74 top_nodes : container of nodes
75
76 Container with all nodes in one bipartite node set. If not supplied
77 it will be computed. But if more than one solution exists an exception
78 will be raised.
79
80 Returns
81 -------
82 matches : dictionary
83
84 The matching is returned as a dictionary, `matches`, such that
85 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
86 nodes do not occur as a key in `matches`.
87
88 Raises
89 ------
90 AmbiguousSolution
91 Raised if the input bipartite graph is disconnected and no container
92 with all nodes in one bipartite set is provided. When determining
93 the nodes in each bipartite set more than one valid solution is
94 possible if the input graph is disconnected.
95
96 Notes
97 -----
98 This function is implemented with the `Hopcroft--Karp matching algorithm
99 <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for
100 bipartite graphs.
101
102 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
103 for further details on how bipartite graphs are handled in NetworkX.
104
105 See Also
106 --------
107 maximum_matching
108 hopcroft_karp_matching
109 eppstein_matching
110
111 References
112 ----------
113 .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for
114 Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing**
115 2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>.
116
117 """
118 # First we define some auxiliary search functions.
119 #
120 # If you are a human reading these auxiliary search functions, the "global"
121 # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined
122 # below the functions, so that they are initialized close to the initial
123 # invocation of the search functions.
124 def breadth_first_search():
125 for v in left:
126 if leftmatches[v] is None:
127 distances[v] = 0
128 queue.append(v)
129 else:
130 distances[v] = INFINITY
131 distances[None] = INFINITY
132 while queue:
133 v = queue.popleft()
134 if distances[v] < distances[None]:
135 for u in G[v]:
136 if distances[rightmatches[u]] is INFINITY:
137 distances[rightmatches[u]] = distances[v] + 1
138 queue.append(rightmatches[u])
139 return distances[None] is not INFINITY
140
141 def depth_first_search(v):
142 if v is not None:
143 for u in G[v]:
144 if distances[rightmatches[u]] == distances[v] + 1:
145 if depth_first_search(rightmatches[u]):
146 rightmatches[u] = v
147 leftmatches[v] = u
148 return True
149 distances[v] = INFINITY
150 return False
151 return True
152
153 # Initialize the "global" variables that maintain state during the search.
154 left, right = bipartite_sets(G, top_nodes)
155 leftmatches = {v: None for v in left}
156 rightmatches = {v: None for v in right}
157 distances = {}
158 queue = collections.deque()
159
160 # Implementation note: this counter is incremented as pairs are matched but
161 # it is currently not used elsewhere in the computation.
162 num_matched_pairs = 0
163 while breadth_first_search():
164 for v in left:
165 if leftmatches[v] is None:
166 if depth_first_search(v):
167 num_matched_pairs += 1
168
169 # Strip the entries matched to `None`.
170 leftmatches = {k: v for k, v in leftmatches.items() if v is not None}
171 rightmatches = {k: v for k, v in rightmatches.items() if v is not None}
172
173 # At this point, the left matches and the right matches are inverses of one
174 # another. In other words,
175 #
176 # leftmatches == {v, k for k, v in rightmatches.items()}
177 #
178 # Finally, we combine both the left matches and right matches.
179 return dict(itertools.chain(leftmatches.items(), rightmatches.items()))
180
181
182 def eppstein_matching(G, top_nodes=None):
183 """Returns the maximum cardinality matching of the bipartite graph `G`.
184
185 Parameters
186 ----------
187 G : NetworkX graph
188
189 Undirected bipartite graph
190
191 top_nodes : container
192
193 Container with all nodes in one bipartite node set. If not supplied
194 it will be computed. But if more than one solution exists an exception
195 will be raised.
196
197 Returns
198 -------
199 matches : dictionary
200
201 The matching is returned as a dictionary, `matching`, such that
202 ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched
203 nodes do not occur as a key in `matching`.
204
205 Raises
206 ------
207 AmbiguousSolution
208 Raised if the input bipartite graph is disconnected and no container
209 with all nodes in one bipartite set is provided. When determining
210 the nodes in each bipartite set more than one valid solution is
211 possible if the input graph is disconnected.
212
213 Notes
214 -----
215 This function is implemented with David Eppstein's version of the algorithm
216 Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which
217 originally appeared in the `Python Algorithms and Data Structures library
218 (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_.
219
220 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
221 for further details on how bipartite graphs are handled in NetworkX.
222
223 See Also
224 --------
225
226 hopcroft_karp_matching
227
228 """
229 # Due to its original implementation, a directed graph is needed
230 # so that the two sets of bipartite nodes can be distinguished
231 left, right = bipartite_sets(G, top_nodes)
232 G = nx.DiGraph(G.edges(left))
233 # initialize greedy matching (redundant, but faster than full search)
234 matching = {}
235 for u in G:
236 for v in G[u]:
237 if v not in matching:
238 matching[v] = u
239 break
240 while True:
241 # structure residual graph into layers
242 # pred[u] gives the neighbor in the previous layer for u in U
243 # preds[v] gives a list of neighbors in the previous layer for v in V
244 # unmatched gives a list of unmatched vertices in final layer of V,
245 # and is also used as a flag value for pred[u] when u is in the first
246 # layer
247 preds = {}
248 unmatched = []
249 pred = {u: unmatched for u in G}
250 for v in matching:
251 del pred[matching[v]]
252 layer = list(pred)
253
254 # repeatedly extend layering structure by another pair of layers
255 while layer and not unmatched:
256 newLayer = {}
257 for u in layer:
258 for v in G[u]:
259 if v not in preds:
260 newLayer.setdefault(v, []).append(u)
261 layer = []
262 for v in newLayer:
263 preds[v] = newLayer[v]
264 if v in matching:
265 layer.append(matching[v])
266 pred[matching[v]] = v
267 else:
268 unmatched.append(v)
269
270 # did we finish layering without finding any alternating paths?
271 if not unmatched:
272 unlayered = {}
273 for u in G:
274 # TODO Why is extra inner loop necessary?
275 for v in G[u]:
276 if v not in preds:
277 unlayered[v] = None
278 # TODO Originally, this function returned a three-tuple:
279 #
280 # return (matching, list(pred), list(unlayered))
281 #
282 # For some reason, the documentation for this function
283 # indicated that the second and third elements of the returned
284 # three-tuple would be the vertices in the left and right vertex
285 # sets, respectively, that are also in the maximum independent set.
286 # However, what I think the author meant was that the second
287 # element is the list of vertices that were unmatched and the third
288 # element was the list of vertices that were matched. Since that
289 # seems to be the case, they don't really need to be returned,
290 # since that information can be inferred from the matching
291 # dictionary.
292
293 # All the matched nodes must be a key in the dictionary
294 for key in matching.copy():
295 matching[matching[key]] = key
296 return matching
297
298 # recursively search backward through layers to find alternating paths
299 # recursion returns true if found path, false otherwise
300 def recurse(v):
301 if v in preds:
302 L = preds.pop(v)
303 for u in L:
304 if u in pred:
305 pu = pred.pop(u)
306 if pu is unmatched or recurse(pu):
307 matching[v] = u
308 return True
309 return False
310
311 for v in unmatched:
312 recurse(v)
313
314
315 def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets):
316 """Returns True if and only if the vertex `v` is connected to one of
317 the target vertices by an alternating path in `G`.
318
319 An *alternating path* is a path in which every other edge is in the
320 specified maximum matching (and the remaining edges in the path are not in
321 the matching). An alternating path may have matched edges in the even
322 positions or in the odd positions, as long as the edges alternate between
323 'matched' and 'unmatched'.
324
325 `G` is an undirected bipartite NetworkX graph.
326
327 `v` is a vertex in `G`.
328
329 `matched_edges` is a set of edges present in a maximum matching in `G`.
330
331 `unmatched_edges` is a set of edges not present in a maximum
332 matching in `G`.
333
334 `targets` is a set of vertices.
335
336 """
337
338 def _alternating_dfs(u, along_matched=True):
339 """Returns True if and only if `u` is connected to one of the
340 targets by an alternating path.
341
342 `u` is a vertex in the graph `G`.
343
344 If `along_matched` is True, this step of the depth-first search
345 will continue only through edges in the given matching. Otherwise, it
346 will continue only through edges *not* in the given matching.
347
348 """
349 if along_matched:
350 edges = itertools.cycle([matched_edges, unmatched_edges])
351 else:
352 edges = itertools.cycle([unmatched_edges, matched_edges])
353 visited = set()
354 stack = [(u, iter(G[u]), next(edges))]
355 while stack:
356 parent, children, valid_edges = stack[-1]
357 try:
358 child = next(children)
359 if child not in visited:
360 if (parent, child) in valid_edges or (child, parent) in valid_edges:
361 if child in targets:
362 return True
363 visited.add(child)
364 stack.append((child, iter(G[child]), next(edges)))
365 except StopIteration:
366 stack.pop()
367 return False
368
369 # Check for alternating paths starting with edges in the matching, then
370 # check for alternating paths starting with edges not in the
371 # matching.
372 return _alternating_dfs(v, along_matched=True) or _alternating_dfs(
373 v, along_matched=False
374 )
375
376
377 def _connected_by_alternating_paths(G, matching, targets):
378 """Returns the set of vertices that are connected to one of the target
379 vertices by an alternating path in `G` or are themselves a target.
380
381 An *alternating path* is a path in which every other edge is in the
382 specified maximum matching (and the remaining edges in the path are not in
383 the matching). An alternating path may have matched edges in the even
384 positions or in the odd positions, as long as the edges alternate between
385 'matched' and 'unmatched'.
386
387 `G` is an undirected bipartite NetworkX graph.
388
389 `matching` is a dictionary representing a maximum matching in `G`, as
390 returned by, for example, :func:`maximum_matching`.
391
392 `targets` is a set of vertices.
393
394 """
395 # Get the set of matched edges and the set of unmatched edges. Only include
396 # one version of each undirected edge (for example, include edge (1, 2) but
397 # not edge (2, 1)). Using frozensets as an intermediary step we do not
398 # require nodes to be orderable.
399 edge_sets = {frozenset((u, v)) for u, v in matching.items()}
400 matched_edges = {tuple(edge) for edge in edge_sets}
401 unmatched_edges = {
402 (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets
403 }
404
405 return {
406 v
407 for v in G
408 if v in targets
409 or _is_connected_by_alternating_path(
410 G, v, matched_edges, unmatched_edges, targets
411 )
412 }
413
414
415 def to_vertex_cover(G, matching, top_nodes=None):
416 """Returns the minimum vertex cover corresponding to the given maximum
417 matching of the bipartite graph `G`.
418
419 Parameters
420 ----------
421 G : NetworkX graph
422
423 Undirected bipartite graph
424
425 matching : dictionary
426
427 A dictionary whose keys are vertices in `G` and whose values are the
428 distinct neighbors comprising the maximum matching for `G`, as returned
429 by, for example, :func:`maximum_matching`. The dictionary *must*
430 represent the maximum matching.
431
432 top_nodes : container
433
434 Container with all nodes in one bipartite node set. If not supplied
435 it will be computed. But if more than one solution exists an exception
436 will be raised.
437
438 Returns
439 -------
440 vertex_cover : :class:`set`
441
442 The minimum vertex cover in `G`.
443
444 Raises
445 ------
446 AmbiguousSolution
447 Raised if the input bipartite graph is disconnected and no container
448 with all nodes in one bipartite set is provided. When determining
449 the nodes in each bipartite set more than one valid solution is
450 possible if the input graph is disconnected.
451
452 Notes
453 -----
454 This function is implemented using the procedure guaranteed by `Konig's
455 theorem
456 <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_,
457 which proves an equivalence between a maximum matching and a minimum vertex
458 cover in bipartite graphs.
459
460 Since a minimum vertex cover is the complement of a maximum independent set
461 for any graph, one can compute the maximum independent set of a bipartite
462 graph this way:
463
464 >>> G = nx.complete_bipartite_graph(2, 3)
465 >>> matching = nx.bipartite.maximum_matching(G)
466 >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching)
467 >>> independent_set = set(G) - vertex_cover
468 >>> print(list(independent_set))
469 [2, 3, 4]
470
471 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
472 for further details on how bipartite graphs are handled in NetworkX.
473
474 """
475 # This is a Python implementation of the algorithm described at
476 # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>.
477 L, R = bipartite_sets(G, top_nodes)
478 # Let U be the set of unmatched vertices in the left vertex set.
479 unmatched_vertices = set(G) - set(matching)
480 U = unmatched_vertices & L
481 # Let Z be the set of vertices that are either in U or are connected to U
482 # by alternating paths.
483 Z = _connected_by_alternating_paths(G, matching, U)
484 # At this point, every edge either has a right endpoint in Z or a left
485 # endpoint not in Z. This gives us the vertex cover.
486 return (L - Z) | (R & Z)
487
488
489 #: Returns the maximum cardinality matching in the given bipartite graph.
490 #:
491 #: This function is simply an alias for :func:`hopcroft_karp_matching`.
492 maximum_matching = hopcroft_karp_matching
493
494
495 def minimum_weight_full_matching(G, top_nodes=None, weight="weight"):
496 r"""Returns a minimum weight full matching of the bipartite graph `G`.
497
498 Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights
499 :math:`w : E \to \mathbb{R}`. This function then produces a matching
500 :math:`M \subseteq E` with cardinality
501
502 .. math::
503 \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert),
504
505 which minimizes the sum of the weights of the edges included in the
506 matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such
507 matching exists.
508
509 When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly
510 referred to as a perfect matching; here, since we allow
511 :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we
512 follow Karp [1]_ and refer to the matching as *full*.
513
514 Parameters
515 ----------
516 G : NetworkX graph
517
518 Undirected bipartite graph
519
520 top_nodes : container
521
522 Container with all nodes in one bipartite node set. If not supplied
523 it will be computed.
524
525 weight : string, optional (default='weight')
526
527 The edge data key used to provide each value in the matrix.
528
529 Returns
530 -------
531 matches : dictionary
532
533 The matching is returned as a dictionary, `matches`, such that
534 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
535 nodes do not occur as a key in `matches`.
536
537 Raises
538 ------
539 ValueError
540 Raised if no full matching exists.
541
542 ImportError
543 Raised if SciPy is not available.
544
545 Notes
546 -----
547 The problem of determining a minimum weight full matching is also known as
548 the rectangular linear assignment problem. This implementation defers the
549 calculation of the assignment to SciPy.
550
551 References
552 ----------
553 .. [1] Richard Manning Karp:
554 An algorithm to Solve the m x n Assignment Problem in Expected Time
555 O(mn log n).
556 Networks, 10(2):143–152, 1980.
557
558 """
559 try:
560 import numpy as np
561 import scipy.optimize
562 except ImportError as e:
563 raise ImportError(
564 "minimum_weight_full_matching requires SciPy: " + "https://scipy.org/"
565 ) from e
566 left, right = nx.bipartite.sets(G, top_nodes)
567 U = list(left)
568 V = list(right)
569 # We explicitly create the biadjancency matrix having infinities
570 # where edges are missing (as opposed to zeros, which is what one would
571 # get by using toarray on the sparse matrix).
572 weights_sparse = biadjacency_matrix(
573 G, row_order=U, column_order=V, weight=weight, format="coo"
574 )
575 weights = np.full(weights_sparse.shape, np.inf)
576 weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data
577 left_matches = scipy.optimize.linear_sum_assignment(weights)
578 d = {U[u]: V[v] for u, v in zip(*left_matches)}
579 # d will contain the matching from edges in left to right; we need to
580 # add the ones from right to left as well.
581 d.update({v: u for u, v in d.items()})
582 return d