### annotate env/lib/python3.9/site-packages/networkx/algorithms/bipartite/matching.py @ 0:4f3585e2f14bdraftdefaulttip

author shellac Mon, 22 Mar 2021 18:12:50 +0000
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
shellac
parents:
diff changeset
1 # This module uses material from the Wikipedia article Hopcroft--Karp algorithm
shellac
parents:
diff changeset
2 # <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on
shellac
parents:
diff changeset
3 # January 3, 2015, which is released under the Creative Commons
shellac
parents:
diff changeset
shellac
parents:
diff changeset
5 # <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes
shellac
parents:
diff changeset
6 # pseudocode, which has been translated into the corresponding Python code.
shellac
parents:
diff changeset
7 #
shellac
parents:
diff changeset
8 # Portions of this module use code from David Eppstein's Python Algorithms and
shellac
parents:
diff changeset
9 # Data Structures (PADS) library, which is dedicated to the public domain (for
shellac
parents:
diff changeset
shellac
parents:
diff changeset
11 """Provides functions for computing maximum cardinality matchings and minimum
shellac
parents:
diff changeset
12 weight full matchings in a bipartite graph.
shellac
parents:
diff changeset
13
shellac
parents:
diff changeset
14 If you don't care about the particular implementation of the maximum matching
shellac
parents:
diff changeset
15 algorithm, simply use the :func:maximum_matching. If you do care, you can
shellac
parents:
diff changeset
16 import one of the named maximum matching algorithms directly.
shellac
parents:
diff changeset
17
shellac
parents:
diff changeset
18 For example, to find a maximum matching in the complete bipartite graph with
shellac
parents:
diff changeset
19 two vertices on the left and three vertices on the right:
shellac
parents:
diff changeset
20
shellac
parents:
diff changeset
21 >>> G = nx.complete_bipartite_graph(2, 3)
shellac
parents:
diff changeset
22 >>> left, right = nx.bipartite.sets(G)
shellac
parents:
diff changeset
23 >>> list(left)
shellac
parents:
diff changeset
24 [0, 1]
shellac
parents:
diff changeset
25 >>> list(right)
shellac
parents:
diff changeset
26 [2, 3, 4]
shellac
parents:
diff changeset
27 >>> nx.bipartite.maximum_matching(G)
shellac
parents:
diff changeset
28 {0: 2, 1: 3, 2: 0, 3: 1}
shellac
parents:
diff changeset
29
shellac
parents:
diff changeset
30 The dictionary returned by :func:maximum_matching includes a mapping for
shellac
parents:
diff changeset
31 vertices in both the left and right vertex sets.
shellac
parents:
diff changeset
32
shellac
parents:
diff changeset
33 Similarly, :func:minimum_weight_full_matching produces, for a complete
shellac
parents:
diff changeset
34 weighted bipartite graph, a matching whose cardinality is the cardinality of
shellac
parents:
diff changeset
35 the smaller of the two partitions, and for which the sum of the weights of the
shellac
parents:
diff changeset
36 edges included in the matching is minimal.
shellac
parents:
diff changeset
37
shellac
parents:
diff changeset
38 """
shellac
parents:
diff changeset
39 import collections
shellac
parents:
diff changeset
40 import itertools
shellac
parents:
diff changeset
41
shellac
parents:
diff changeset
shellac
parents:
diff changeset
43 from networkx.algorithms.bipartite import sets as bipartite_sets
shellac
parents:
diff changeset
44 import networkx as nx
shellac
parents:
diff changeset
45
shellac
parents:
diff changeset
46 __all__ = [
shellac
parents:
diff changeset
47 "maximum_matching",
shellac
parents:
diff changeset
48 "hopcroft_karp_matching",
shellac
parents:
diff changeset
49 "eppstein_matching",
shellac
parents:
diff changeset
50 "to_vertex_cover",
shellac
parents:
diff changeset
51 "minimum_weight_full_matching",
shellac
parents:
diff changeset
52 ]
shellac
parents:
diff changeset
53
shellac
parents:
diff changeset
54 INFINITY = float("inf")
shellac
parents:
diff changeset
55
shellac
parents:
diff changeset
56
shellac
parents:
diff changeset
57 def hopcroft_karp_matching(G, top_nodes=None):
shellac
parents:
diff changeset
58 """Returns the maximum cardinality matching of the bipartite graph G.
shellac
parents:
diff changeset
59
shellac
parents:
diff changeset
60 A matching is a set of edges that do not share any nodes. A maximum
shellac
parents:
diff changeset
61 cardinality matching is a matching with the most edges possible. It
shellac
parents:
diff changeset
62 is not always unique. Finding a matching in a bipartite graph can be
shellac
parents:
diff changeset
63 treated as a networkx flow problem.
shellac
parents:
diff changeset
64
shellac
parents:
diff changeset
65 The functions hopcroft_karp_matching and maximum_matching
shellac
parents:
diff changeset
66 are aliases of the same function.
shellac
parents:
diff changeset
67
shellac
parents:
diff changeset
68 Parameters
shellac
parents:
diff changeset
69 ----------
shellac
parents:
diff changeset
70 G : NetworkX graph
shellac
parents:
diff changeset
71
shellac
parents:
diff changeset
72 Undirected bipartite graph
shellac
parents:
diff changeset
73
shellac
parents:
diff changeset
74 top_nodes : container of nodes
shellac
parents:
diff changeset
75
shellac
parents:
diff changeset
76 Container with all nodes in one bipartite node set. If not supplied
shellac
parents:
diff changeset
77 it will be computed. But if more than one solution exists an exception
shellac
parents:
diff changeset
78 will be raised.
shellac
parents:
diff changeset
79
shellac
parents:
diff changeset
80 Returns
shellac
parents:
diff changeset
81 -------
shellac
parents:
diff changeset
82 matches : dictionary
shellac
parents:
diff changeset
83
shellac
parents:
diff changeset
84 The matching is returned as a dictionary, matches, such that
shellac
parents:
diff changeset
85 matches[v] == w if node v is matched to node w. Unmatched
shellac
parents:
diff changeset
86 nodes do not occur as a key in matches.
shellac
parents:
diff changeset
87
shellac
parents:
diff changeset
88 Raises
shellac
parents:
diff changeset
89 ------
shellac
parents:
diff changeset
90 AmbiguousSolution
shellac
parents:
diff changeset
91 Raised if the input bipartite graph is disconnected and no container
shellac
parents:
diff changeset
92 with all nodes in one bipartite set is provided. When determining
shellac
parents:
diff changeset
93 the nodes in each bipartite set more than one valid solution is
shellac
parents:
diff changeset
94 possible if the input graph is disconnected.
shellac
parents:
diff changeset
95
shellac
parents:
diff changeset
96 Notes
shellac
parents:
diff changeset
97 -----
shellac
parents:
diff changeset
98 This function is implemented with the Hopcroft--Karp matching algorithm
shellac
parents:
diff changeset
99 <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>_ for
shellac
parents:
diff changeset
100 bipartite graphs.
shellac
parents:
diff changeset
101
shellac
parents:
diff changeset
102 See :mod:bipartite documentation <networkx.algorithms.bipartite>
shellac
parents:
diff changeset
103 for further details on how bipartite graphs are handled in NetworkX.
shellac
parents:
diff changeset
104
shellac
parents:
diff changeset
shellac
parents:
diff changeset
106 --------
shellac
parents:
diff changeset
107 maximum_matching
shellac
parents:
diff changeset
108 hopcroft_karp_matching
shellac
parents:
diff changeset
109 eppstein_matching
shellac
parents:
diff changeset
110
shellac
parents:
diff changeset
111 References
shellac
parents:
diff changeset
112 ----------
shellac
parents:
diff changeset
113 .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for
shellac
parents:
diff changeset
114 Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing**
shellac
parents:
diff changeset
115 2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>.
shellac
parents:
diff changeset
116
shellac
parents:
diff changeset
117 """
shellac
parents:
diff changeset
118 # First we define some auxiliary search functions.
shellac
parents:
diff changeset
119 #
shellac
parents:
diff changeset
120 # If you are a human reading these auxiliary search functions, the "global"
shellac
parents:
diff changeset
121 # variables leftmatches, rightmatches, distances, etc. are defined
shellac
parents:
diff changeset
122 # below the functions, so that they are initialized close to the initial
shellac
parents:
diff changeset
123 # invocation of the search functions.
shellac
parents:
diff changeset
shellac
parents:
diff changeset
125 for v in left:
shellac
parents:
diff changeset
126 if leftmatches[v] is None:
shellac
parents:
diff changeset
127 distances[v] = 0
shellac
parents:
diff changeset
128 queue.append(v)
shellac
parents:
diff changeset
129 else:
shellac
parents:
diff changeset
130 distances[v] = INFINITY
shellac
parents:
diff changeset
131 distances[None] = INFINITY
shellac
parents:
diff changeset
132 while queue:
shellac
parents:
diff changeset
133 v = queue.popleft()
shellac
parents:
diff changeset
134 if distances[v] < distances[None]:
shellac
parents:
diff changeset
135 for u in G[v]:
shellac
parents:
diff changeset
136 if distances[rightmatches[u]] is INFINITY:
shellac
parents:
diff changeset
137 distances[rightmatches[u]] = distances[v] + 1
shellac
parents:
diff changeset
138 queue.append(rightmatches[u])
shellac
parents:
diff changeset
139 return distances[None] is not INFINITY
shellac
parents:
diff changeset
140
shellac
parents:
diff changeset
141 def depth_first_search(v):
shellac
parents:
diff changeset
142 if v is not None:
shellac
parents:
diff changeset
143 for u in G[v]:
shellac
parents:
diff changeset
144 if distances[rightmatches[u]] == distances[v] + 1:
shellac
parents:
diff changeset
145 if depth_first_search(rightmatches[u]):
shellac
parents:
diff changeset
146 rightmatches[u] = v
shellac
parents:
diff changeset
147 leftmatches[v] = u
shellac
parents:
diff changeset
148 return True
shellac
parents:
diff changeset
149 distances[v] = INFINITY
shellac
parents:
diff changeset
150 return False
shellac
parents:
diff changeset
151 return True
shellac
parents:
diff changeset
152
shellac
parents:
diff changeset
153 # Initialize the "global" variables that maintain state during the search.
shellac
parents:
diff changeset
154 left, right = bipartite_sets(G, top_nodes)
shellac
parents:
diff changeset
155 leftmatches = {v: None for v in left}
shellac
parents:
diff changeset
156 rightmatches = {v: None for v in right}
shellac
parents:
diff changeset
157 distances = {}
shellac
parents:
diff changeset
158 queue = collections.deque()
shellac
parents:
diff changeset
159
shellac
parents:
diff changeset
160 # Implementation note: this counter is incremented as pairs are matched but
shellac
parents:
diff changeset
161 # it is currently not used elsewhere in the computation.
shellac
parents:
diff changeset
162 num_matched_pairs = 0
shellac
parents:
diff changeset
shellac
parents:
diff changeset
164 for v in left:
shellac
parents:
diff changeset
165 if leftmatches[v] is None:
shellac
parents:
diff changeset
166 if depth_first_search(v):
shellac
parents:
diff changeset
167 num_matched_pairs += 1
shellac
parents:
diff changeset
168
shellac
parents:
diff changeset
169 # Strip the entries matched to None.
shellac
parents:
diff changeset
170 leftmatches = {k: v for k, v in leftmatches.items() if v is not None}
shellac
parents:
diff changeset
171 rightmatches = {k: v for k, v in rightmatches.items() if v is not None}
shellac
parents:
diff changeset
172
shellac
parents:
diff changeset
173 # At this point, the left matches and the right matches are inverses of one
shellac
parents:
diff changeset
174 # another. In other words,
shellac
parents:
diff changeset
175 #
shellac
parents:
diff changeset
176 # leftmatches == {v, k for k, v in rightmatches.items()}
shellac
parents:
diff changeset
177 #
shellac
parents:
diff changeset
178 # Finally, we combine both the left matches and right matches.
shellac
parents:
diff changeset
179 return dict(itertools.chain(leftmatches.items(), rightmatches.items()))
shellac
parents:
diff changeset
180
shellac
parents:
diff changeset
181
shellac
parents:
diff changeset
182 def eppstein_matching(G, top_nodes=None):
shellac
parents:
diff changeset
183 """Returns the maximum cardinality matching of the bipartite graph G.
shellac
parents:
diff changeset
184
shellac
parents:
diff changeset
185 Parameters
shellac
parents:
diff changeset
186 ----------
shellac
parents:
diff changeset
187 G : NetworkX graph
shellac
parents:
diff changeset
188
shellac
parents:
diff changeset
189 Undirected bipartite graph
shellac
parents:
diff changeset
190
shellac
parents:
diff changeset
191 top_nodes : container
shellac
parents:
diff changeset
192
shellac
parents:
diff changeset
193 Container with all nodes in one bipartite node set. If not supplied
shellac
parents:
diff changeset
194 it will be computed. But if more than one solution exists an exception
shellac
parents:
diff changeset
195 will be raised.
shellac
parents:
diff changeset
196
shellac
parents:
diff changeset
197 Returns
shellac
parents:
diff changeset
198 -------
shellac
parents:
diff changeset
199 matches : dictionary
shellac
parents:
diff changeset
200
shellac
parents:
diff changeset
201 The matching is returned as a dictionary, matching, such that
shellac
parents:
diff changeset
202 matching[v] == w if node v is matched to node w. Unmatched
shellac
parents:
diff changeset
203 nodes do not occur as a key in matching.
shellac
parents:
diff changeset
204
shellac
parents:
diff changeset
205 Raises
shellac
parents:
diff changeset
206 ------
shellac
parents:
diff changeset
207 AmbiguousSolution
shellac
parents:
diff changeset
208 Raised if the input bipartite graph is disconnected and no container
shellac
parents:
diff changeset
209 with all nodes in one bipartite set is provided. When determining
shellac
parents:
diff changeset
210 the nodes in each bipartite set more than one valid solution is
shellac
parents:
diff changeset
211 possible if the input graph is disconnected.
shellac
parents:
diff changeset
212
shellac
parents:
diff changeset
213 Notes
shellac
parents:
diff changeset
214 -----
shellac
parents:
diff changeset
215 This function is implemented with David Eppstein's version of the algorithm
shellac
parents:
diff changeset
216 Hopcroft--Karp algorithm (see :func:hopcroft_karp_matching), which
shellac
parents:
diff changeset
217 originally appeared in the Python Algorithms and Data Structures library
shellac
parents:
diff changeset
shellac
parents:
diff changeset
219
shellac
parents:
diff changeset
220 See :mod:bipartite documentation <networkx.algorithms.bipartite>
shellac
parents:
diff changeset
221 for further details on how bipartite graphs are handled in NetworkX.
shellac
parents:
diff changeset
222
shellac
parents:
diff changeset
shellac
parents:
diff changeset
224 --------
shellac
parents:
diff changeset
225
shellac
parents:
diff changeset
226 hopcroft_karp_matching
shellac
parents:
diff changeset
227
shellac
parents:
diff changeset
228 """
shellac
parents:
diff changeset
229 # Due to its original implementation, a directed graph is needed
shellac
parents:
diff changeset
230 # so that the two sets of bipartite nodes can be distinguished
shellac
parents:
diff changeset
231 left, right = bipartite_sets(G, top_nodes)
shellac
parents:
diff changeset
232 G = nx.DiGraph(G.edges(left))
shellac
parents:
diff changeset
233 # initialize greedy matching (redundant, but faster than full search)
shellac
parents:
diff changeset
234 matching = {}
shellac
parents:
diff changeset
235 for u in G:
shellac
parents:
diff changeset
236 for v in G[u]:
shellac
parents:
diff changeset
237 if v not in matching:
shellac
parents:
diff changeset
238 matching[v] = u
shellac
parents:
diff changeset
239 break
shellac
parents:
diff changeset
240 while True:
shellac
parents:
diff changeset
241 # structure residual graph into layers
shellac
parents:
diff changeset
242 # pred[u] gives the neighbor in the previous layer for u in U
shellac
parents:
diff changeset
243 # preds[v] gives a list of neighbors in the previous layer for v in V
shellac
parents:
diff changeset
244 # unmatched gives a list of unmatched vertices in final layer of V,
shellac
parents:
diff changeset
245 # and is also used as a flag value for pred[u] when u is in the first
shellac
parents:
diff changeset
246 # layer
shellac
parents:
diff changeset
247 preds = {}
shellac
parents:
diff changeset
248 unmatched = []
shellac
parents:
diff changeset
249 pred = {u: unmatched for u in G}
shellac
parents:
diff changeset
250 for v in matching:
shellac
parents:
diff changeset
251 del pred[matching[v]]
shellac
parents:
diff changeset
252 layer = list(pred)
shellac
parents:
diff changeset
253
shellac
parents:
diff changeset
254 # repeatedly extend layering structure by another pair of layers
shellac
parents:
diff changeset
255 while layer and not unmatched:
shellac
parents:
diff changeset
256 newLayer = {}
shellac
parents:
diff changeset
257 for u in layer:
shellac
parents:
diff changeset
258 for v in G[u]:
shellac
parents:
diff changeset
259 if v not in preds:
shellac
parents:
diff changeset
260 newLayer.setdefault(v, []).append(u)
shellac
parents:
diff changeset
261 layer = []
shellac
parents:
diff changeset
262 for v in newLayer:
shellac
parents:
diff changeset
263 preds[v] = newLayer[v]
shellac
parents:
diff changeset
264 if v in matching:
shellac
parents:
diff changeset
265 layer.append(matching[v])
shellac
parents:
diff changeset
266 pred[matching[v]] = v
shellac
parents:
diff changeset
267 else:
shellac
parents:
diff changeset
268 unmatched.append(v)
shellac
parents:
diff changeset
269
shellac
parents:
diff changeset
270 # did we finish layering without finding any alternating paths?
shellac
parents:
diff changeset
271 if not unmatched:
shellac
parents:
diff changeset
272 unlayered = {}
shellac
parents:
diff changeset
273 for u in G:
shellac
parents:
diff changeset
274 # TODO Why is extra inner loop necessary?
shellac
parents:
diff changeset
275 for v in G[u]:
shellac
parents:
diff changeset
276 if v not in preds:
shellac
parents:
diff changeset
277 unlayered[v] = None
shellac
parents:
diff changeset
278 # TODO Originally, this function returned a three-tuple:
shellac
parents:
diff changeset
279 #
shellac
parents:
diff changeset
280 # return (matching, list(pred), list(unlayered))
shellac
parents:
diff changeset
281 #
shellac
parents:
diff changeset
282 # For some reason, the documentation for this function
shellac
parents:
diff changeset
283 # indicated that the second and third elements of the returned
shellac
parents:
diff changeset
284 # three-tuple would be the vertices in the left and right vertex
shellac
parents:
diff changeset
285 # sets, respectively, that are also in the maximum independent set.
shellac
parents:
diff changeset
286 # However, what I think the author meant was that the second
shellac
parents:
diff changeset
287 # element is the list of vertices that were unmatched and the third
shellac
parents:
diff changeset
288 # element was the list of vertices that were matched. Since that
shellac
parents:
diff changeset
289 # seems to be the case, they don't really need to be returned,
shellac
parents:
diff changeset
290 # since that information can be inferred from the matching
shellac
parents:
diff changeset
291 # dictionary.
shellac
parents:
diff changeset
292
shellac
parents:
diff changeset
293 # All the matched nodes must be a key in the dictionary
shellac
parents:
diff changeset
294 for key in matching.copy():
shellac
parents:
diff changeset
295 matching[matching[key]] = key
shellac
parents:
diff changeset
296 return matching
shellac
parents:
diff changeset
297
shellac
parents:
diff changeset
298 # recursively search backward through layers to find alternating paths
shellac
parents:
diff changeset
299 # recursion returns true if found path, false otherwise
shellac
parents:
diff changeset
300 def recurse(v):
shellac
parents:
diff changeset
301 if v in preds:
shellac
parents:
diff changeset
302 L = preds.pop(v)
shellac
parents:
diff changeset
303 for u in L:
shellac
parents:
diff changeset
304 if u in pred:
shellac
parents:
diff changeset
305 pu = pred.pop(u)
shellac
parents:
diff changeset
306 if pu is unmatched or recurse(pu):
shellac
parents:
diff changeset
307 matching[v] = u
shellac
parents:
diff changeset
308 return True
shellac
parents:
diff changeset
309 return False
shellac
parents:
diff changeset
310
shellac
parents:
diff changeset
311 for v in unmatched:
shellac
parents:
diff changeset
312 recurse(v)
shellac
parents:
diff changeset
313
shellac
parents:
diff changeset
314
shellac
parents:
diff changeset
315 def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets):
shellac
parents:
diff changeset
316 """Returns True if and only if the vertex v is connected to one of
shellac
parents:
diff changeset
317 the target vertices by an alternating path in G.
shellac
parents:
diff changeset
318
shellac
parents:
diff changeset
319 An *alternating path* is a path in which every other edge is in the
shellac
parents:
diff changeset
320 specified maximum matching (and the remaining edges in the path are not in
shellac
parents:
diff changeset
321 the matching). An alternating path may have matched edges in the even
shellac
parents:
diff changeset
322 positions or in the odd positions, as long as the edges alternate between
shellac
parents:
diff changeset
323 'matched' and 'unmatched'.
shellac
parents:
diff changeset
324
shellac
parents:
diff changeset
325 G is an undirected bipartite NetworkX graph.
shellac
parents:
diff changeset
326
shellac
parents:
diff changeset
327 v is a vertex in G.
shellac
parents:
diff changeset
328
shellac
parents:
diff changeset
329 matched_edges is a set of edges present in a maximum matching in G.
shellac
parents:
diff changeset
330
shellac
parents:
diff changeset
331 unmatched_edges is a set of edges not present in a maximum
shellac
parents:
diff changeset
332 matching in G.
shellac
parents:
diff changeset
333
shellac
parents:
diff changeset
334 targets is a set of vertices.
shellac
parents:
diff changeset
335
shellac
parents:
diff changeset
336 """
shellac
parents:
diff changeset
337
shellac
parents:
diff changeset
338 def _alternating_dfs(u, along_matched=True):
shellac
parents:
diff changeset
339 """Returns True if and only if u is connected to one of the
shellac
parents:
diff changeset
340 targets by an alternating path.
shellac
parents:
diff changeset
341
shellac
parents:
diff changeset
342 u is a vertex in the graph G.
shellac
parents:
diff changeset
343
shellac
parents:
diff changeset
344 If along_matched is True, this step of the depth-first search
shellac
parents:
diff changeset
345 will continue only through edges in the given matching. Otherwise, it
shellac
parents:
diff changeset
346 will continue only through edges *not* in the given matching.
shellac
parents:
diff changeset
347
shellac
parents:
diff changeset
348 """
shellac
parents:
diff changeset
349 if along_matched:
shellac
parents:
diff changeset
350 edges = itertools.cycle([matched_edges, unmatched_edges])
shellac
parents:
diff changeset
351 else:
shellac
parents:
diff changeset
352 edges = itertools.cycle([unmatched_edges, matched_edges])
shellac
parents:
diff changeset
353 visited = set()
shellac
parents:
diff changeset
354 stack = [(u, iter(G[u]), next(edges))]
shellac
parents:
diff changeset
355 while stack:
shellac
parents:
diff changeset
356 parent, children, valid_edges = stack[-1]
shellac
parents:
diff changeset
357 try:
shellac
parents:
diff changeset
358 child = next(children)
shellac
parents:
diff changeset
359 if child not in visited:
shellac
parents:
diff changeset
360 if (parent, child) in valid_edges or (child, parent) in valid_edges:
shellac
parents:
diff changeset
361 if child in targets:
shellac
parents:
diff changeset
362 return True
shellac
parents:
diff changeset
shellac
parents:
diff changeset
364 stack.append((child, iter(G[child]), next(edges)))
shellac
parents:
diff changeset
365 except StopIteration:
shellac
parents:
diff changeset
366 stack.pop()
shellac
parents:
diff changeset
367 return False
shellac
parents:
diff changeset
368
shellac
parents:
diff changeset
369 # Check for alternating paths starting with edges in the matching, then
shellac
parents:
diff changeset
370 # check for alternating paths starting with edges not in the
shellac
parents:
diff changeset
371 # matching.
shellac
parents:
diff changeset
372 return _alternating_dfs(v, along_matched=True) or _alternating_dfs(
shellac
parents:
diff changeset
373 v, along_matched=False
shellac
parents:
diff changeset
374 )
shellac
parents:
diff changeset
375
shellac
parents:
diff changeset
376
shellac
parents:
diff changeset
377 def _connected_by_alternating_paths(G, matching, targets):
shellac
parents:
diff changeset
378 """Returns the set of vertices that are connected to one of the target
shellac
parents:
diff changeset
379 vertices by an alternating path in G or are themselves a target.
shellac
parents:
diff changeset
380
shellac
parents:
diff changeset
381 An *alternating path* is a path in which every other edge is in the
shellac
parents:
diff changeset
382 specified maximum matching (and the remaining edges in the path are not in
shellac
parents:
diff changeset
383 the matching). An alternating path may have matched edges in the even
shellac
parents:
diff changeset
384 positions or in the odd positions, as long as the edges alternate between
shellac
parents:
diff changeset
385 'matched' and 'unmatched'.
shellac
parents:
diff changeset
386
shellac
parents:
diff changeset
387 G is an undirected bipartite NetworkX graph.
shellac
parents:
diff changeset
388
shellac
parents:
diff changeset
389 matching is a dictionary representing a maximum matching in G, as
shellac
parents:
diff changeset
390 returned by, for example, :func:maximum_matching.
shellac
parents:
diff changeset
391
shellac
parents:
diff changeset
392 targets is a set of vertices.
shellac
parents:
diff changeset
393
shellac
parents:
diff changeset
394 """
shellac
parents:
diff changeset
395 # Get the set of matched edges and the set of unmatched edges. Only include
shellac
parents:
diff changeset
396 # one version of each undirected edge (for example, include edge (1, 2) but
shellac
parents:
diff changeset
397 # not edge (2, 1)). Using frozensets as an intermediary step we do not
shellac
parents:
diff changeset
398 # require nodes to be orderable.
shellac
parents:
diff changeset
399 edge_sets = {frozenset((u, v)) for u, v in matching.items()}
shellac
parents:
diff changeset
400 matched_edges = {tuple(edge) for edge in edge_sets}
shellac
parents:
diff changeset
401 unmatched_edges = {
shellac
parents:
diff changeset
402 (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets
shellac
parents:
diff changeset
403 }
shellac
parents:
diff changeset
404
shellac
parents:
diff changeset
405 return {
shellac
parents:
diff changeset
406 v
shellac
parents:
diff changeset
407 for v in G
shellac
parents:
diff changeset
408 if v in targets
shellac
parents:
diff changeset
409 or _is_connected_by_alternating_path(
shellac
parents:
diff changeset
410 G, v, matched_edges, unmatched_edges, targets
shellac
parents:
diff changeset
411 )
shellac
parents:
diff changeset
412 }
shellac
parents:
diff changeset
413
shellac
parents:
diff changeset
414
shellac
parents:
diff changeset
415 def to_vertex_cover(G, matching, top_nodes=None):
shellac
parents:
diff changeset
416 """Returns the minimum vertex cover corresponding to the given maximum
shellac
parents:
diff changeset
417 matching of the bipartite graph G.
shellac
parents:
diff changeset
418
shellac
parents:
diff changeset
419 Parameters
shellac
parents:
diff changeset
420 ----------
shellac
parents:
diff changeset
421 G : NetworkX graph
shellac
parents:
diff changeset
422
shellac
parents:
diff changeset
423 Undirected bipartite graph
shellac
parents:
diff changeset
424
shellac
parents:
diff changeset
425 matching : dictionary
shellac
parents:
diff changeset
426
shellac
parents:
diff changeset
427 A dictionary whose keys are vertices in G and whose values are the
shellac
parents:
diff changeset
428 distinct neighbors comprising the maximum matching for G, as returned
shellac
parents:
diff changeset
429 by, for example, :func:maximum_matching. The dictionary *must*
shellac
parents:
diff changeset
430 represent the maximum matching.
shellac
parents:
diff changeset
431
shellac
parents:
diff changeset
432 top_nodes : container
shellac
parents:
diff changeset
433
shellac
parents:
diff changeset
434 Container with all nodes in one bipartite node set. If not supplied
shellac
parents:
diff changeset
435 it will be computed. But if more than one solution exists an exception
shellac
parents:
diff changeset
436 will be raised.
shellac
parents:
diff changeset
437
shellac
parents:
diff changeset
438 Returns
shellac
parents:
diff changeset
439 -------
shellac
parents:
diff changeset
440 vertex_cover : :class:set
shellac
parents:
diff changeset
441
shellac
parents:
diff changeset
442 The minimum vertex cover in G.
shellac
parents:
diff changeset
443
shellac
parents:
diff changeset
444 Raises
shellac
parents:
diff changeset
445 ------
shellac
parents:
diff changeset
446 AmbiguousSolution
shellac
parents:
diff changeset
447 Raised if the input bipartite graph is disconnected and no container
shellac
parents:
diff changeset
448 with all nodes in one bipartite set is provided. When determining
shellac
parents:
diff changeset
449 the nodes in each bipartite set more than one valid solution is
shellac
parents:
diff changeset
450 possible if the input graph is disconnected.
shellac
parents:
diff changeset
451
shellac
parents:
diff changeset
452 Notes
shellac
parents:
diff changeset
453 -----
shellac
parents:
diff changeset
454 This function is implemented using the procedure guaranteed by Konig's
shellac
parents:
diff changeset
455 theorem
shellac
parents:
diff changeset
456 <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>_,
shellac
parents:
diff changeset
457 which proves an equivalence between a maximum matching and a minimum vertex
shellac
parents:
diff changeset
458 cover in bipartite graphs.
shellac
parents:
diff changeset
459
shellac
parents:
diff changeset
460 Since a minimum vertex cover is the complement of a maximum independent set
shellac
parents:
diff changeset
461 for any graph, one can compute the maximum independent set of a bipartite
shellac
parents:
diff changeset
462 graph this way:
shellac
parents:
diff changeset
463
shellac
parents:
diff changeset
464 >>> G = nx.complete_bipartite_graph(2, 3)
shellac
parents:
diff changeset
465 >>> matching = nx.bipartite.maximum_matching(G)
shellac
parents:
diff changeset
466 >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching)
shellac
parents:
diff changeset
467 >>> independent_set = set(G) - vertex_cover
shellac
parents:
diff changeset
468 >>> print(list(independent_set))
shellac
parents:
diff changeset
469 [2, 3, 4]
shellac
parents:
diff changeset
470
shellac
parents:
diff changeset
471 See :mod:bipartite documentation <networkx.algorithms.bipartite>
shellac
parents:
diff changeset
472 for further details on how bipartite graphs are handled in NetworkX.
shellac
parents:
diff changeset
473
shellac
parents:
diff changeset
474 """
shellac
parents:
diff changeset
475 # This is a Python implementation of the algorithm described at
shellac
parents:
diff changeset
476 # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>.
shellac
parents:
diff changeset
477 L, R = bipartite_sets(G, top_nodes)
shellac
parents:
diff changeset
478 # Let U be the set of unmatched vertices in the left vertex set.
shellac
parents:
diff changeset
479 unmatched_vertices = set(G) - set(matching)
shellac
parents:
diff changeset
480 U = unmatched_vertices & L
shellac
parents:
diff changeset
481 # Let Z be the set of vertices that are either in U or are connected to U
shellac
parents:
diff changeset
482 # by alternating paths.
shellac
parents:
diff changeset
483 Z = _connected_by_alternating_paths(G, matching, U)
shellac
parents:
diff changeset
484 # At this point, every edge either has a right endpoint in Z or a left
shellac
parents:
diff changeset
485 # endpoint not in Z. This gives us the vertex cover.
shellac
parents:
diff changeset
486 return (L - Z) | (R & Z)
shellac
parents:
diff changeset
487
shellac
parents:
diff changeset
488
shellac
parents:
diff changeset
489 #: Returns the maximum cardinality matching in the given bipartite graph.
shellac
parents:
diff changeset
490 #:
shellac
parents:
diff changeset
491 #: This function is simply an alias for :func:hopcroft_karp_matching.
shellac
parents:
diff changeset
492 maximum_matching = hopcroft_karp_matching
shellac
parents:
diff changeset
493
shellac
parents:
diff changeset
494
shellac
parents:
diff changeset
495 def minimum_weight_full_matching(G, top_nodes=None, weight="weight"):
shellac
parents:
diff changeset
496 r"""Returns a minimum weight full matching of the bipartite graph G.
shellac
parents:
diff changeset
497
shellac
parents:
diff changeset
498 Let :math:G = ((U, V), E) be a weighted bipartite graph with real weights
shellac
parents:
diff changeset
499 :math:w : E \to \mathbb{R}. This function then produces a matching
shellac
parents:
diff changeset
500 :math:M \subseteq E with cardinality
shellac
parents:
diff changeset
501
shellac
parents:
diff changeset
502 .. math::
shellac
parents:
diff changeset
503 \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert),
shellac
parents:
diff changeset
504
shellac
parents:
diff changeset
505 which minimizes the sum of the weights of the edges included in the
shellac
parents:
diff changeset
506 matching, :math:\sum_{e \in M} w(e), or raises an error if no such
shellac
parents:
diff changeset
507 matching exists.
shellac
parents:
diff changeset
508
shellac
parents:
diff changeset
509 When :math:\lvert U \rvert = \lvert V \rvert, this is commonly
shellac
parents:
diff changeset
510 referred to as a perfect matching; here, since we allow
shellac
parents:
diff changeset
511 :math:\lvert U \rvert and :math:\lvert V \rvert to differ, we
shellac
parents:
diff changeset
512 follow Karp [1]_ and refer to the matching as *full*.
shellac
parents:
diff changeset
513
shellac
parents:
diff changeset
514 Parameters
shellac
parents:
diff changeset
515 ----------
shellac
parents:
diff changeset
516 G : NetworkX graph
shellac
parents:
diff changeset
517
shellac
parents:
diff changeset
518 Undirected bipartite graph
shellac
parents:
diff changeset
519
shellac
parents:
diff changeset
520 top_nodes : container
shellac
parents:
diff changeset
521
shellac
parents:
diff changeset
522 Container with all nodes in one bipartite node set. If not supplied
shellac
parents:
diff changeset
523 it will be computed.
shellac
parents:
diff changeset
524
shellac
parents:
diff changeset
525 weight : string, optional (default='weight')
shellac
parents:
diff changeset
526
shellac
parents:
diff changeset
527 The edge data key used to provide each value in the matrix.
shellac
parents:
diff changeset
528
shellac
parents:
diff changeset
529 Returns
shellac
parents:
diff changeset
530 -------
shellac
parents:
diff changeset
531 matches : dictionary
shellac
parents:
diff changeset
532
shellac
parents:
diff changeset
533 The matching is returned as a dictionary, matches, such that
shellac
parents:
diff changeset
534 matches[v] == w if node v is matched to node w. Unmatched
shellac
parents:
diff changeset
535 nodes do not occur as a key in matches.
shellac
parents:
diff changeset
536
shellac
parents:
diff changeset
537 Raises
shellac
parents:
diff changeset
538 ------
shellac
parents:
diff changeset
539 ValueError
shellac
parents:
diff changeset
540 Raised if no full matching exists.
shellac
parents:
diff changeset
541
shellac
parents:
diff changeset
542 ImportError
shellac
parents:
diff changeset
543 Raised if SciPy is not available.
shellac
parents:
diff changeset
544
shellac
parents:
diff changeset
545 Notes
shellac
parents:
diff changeset
546 -----
shellac
parents:
diff changeset
547 The problem of determining a minimum weight full matching is also known as
shellac
parents:
diff changeset
548 the rectangular linear assignment problem. This implementation defers the
shellac
parents:
diff changeset
549 calculation of the assignment to SciPy.
shellac
parents:
diff changeset
550
shellac
parents:
diff changeset
551 References
shellac
parents:
diff changeset
552 ----------
shellac
parents:
diff changeset
553 .. [1] Richard Manning Karp:
shellac
parents:
diff changeset
554 An algorithm to Solve the m x n Assignment Problem in Expected Time
shellac
parents:
diff changeset
555 O(mn log n).
shellac
parents:
diff changeset
556 Networks, 10(2):143–152, 1980.
shellac
parents:
diff changeset
557
shellac
parents:
diff changeset
558 """
shellac
parents:
diff changeset
559 try:
shellac
parents:
diff changeset
560 import numpy as np
shellac
parents:
diff changeset
561 import scipy.optimize
shellac
parents:
diff changeset
562 except ImportError as e:
shellac
parents:
diff changeset
563 raise ImportError(
shellac
parents:
diff changeset
564 "minimum_weight_full_matching requires SciPy: " + "https://scipy.org/"
shellac
parents:
diff changeset
565 ) from e
shellac
parents:
diff changeset
566 left, right = nx.bipartite.sets(G, top_nodes)
shellac
parents:
diff changeset
567 U = list(left)
shellac
parents:
diff changeset
568 V = list(right)
shellac
parents:
diff changeset
569 # We explicitly create the biadjancency matrix having infinities
shellac
parents:
diff changeset
570 # where edges are missing (as opposed to zeros, which is what one would
shellac
parents:
diff changeset
571 # get by using toarray on the sparse matrix).
shellac
parents:
diff changeset
shellac
parents:
diff changeset
573 G, row_order=U, column_order=V, weight=weight, format="coo"
shellac
parents:
diff changeset
574 )
shellac
parents:
diff changeset
575 weights = np.full(weights_sparse.shape, np.inf)
shellac
parents:
diff changeset
576 weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data
shellac
parents:
diff changeset
577 left_matches = scipy.optimize.linear_sum_assignment(weights)
shellac
parents:
diff changeset
578 d = {U[u]: V[v] for u, v in zip(*left_matches)}
shellac
parents:
diff changeset
579 # d will contain the matching from edges in left to right; we need to
shellac
parents:
diff changeset
580 # add the ones from right to left as well.