## Mercurial > repos > shellac > sam_consensus_v3

### comparison env/lib/python3.9/site-packages/networkx/algorithms/bipartite/matching.py @ 0:4f3585e2f14b draft default tip

Find changesets by keywords (author, files, the commit message), revision
number or hash, or revset expression.

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"

author | shellac |
---|---|

date | Mon, 22 Mar 2021 18:12:50 +0000 |

parents | |

children |

comparison

equal
deleted
inserted
replaced

-1:000000000000 | 0:4f3585e2f14b |
---|---|

1 # This module uses material from the Wikipedia article Hopcroft--Karp algorithm | |

2 # <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on | |

3 # January 3, 2015, which is released under the Creative Commons | |

4 # Attribution-Share-Alike License 3.0 | |

5 # <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes | |

6 # pseudocode, which has been translated into the corresponding Python code. | |

7 # | |

8 # Portions of this module use code from David Eppstein's Python Algorithms and | |

9 # Data Structures (PADS) library, which is dedicated to the public domain (for | |

10 # proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>). | |

11 """Provides functions for computing maximum cardinality matchings and minimum | |

12 weight full matchings in a bipartite graph. | |

13 | |

14 If you don't care about the particular implementation of the maximum matching | |

15 algorithm, simply use the :func:`maximum_matching`. If you do care, you can | |

16 import one of the named maximum matching algorithms directly. | |

17 | |

18 For example, to find a maximum matching in the complete bipartite graph with | |

19 two vertices on the left and three vertices on the right: | |

20 | |

21 >>> G = nx.complete_bipartite_graph(2, 3) | |

22 >>> left, right = nx.bipartite.sets(G) | |

23 >>> list(left) | |

24 [0, 1] | |

25 >>> list(right) | |

26 [2, 3, 4] | |

27 >>> nx.bipartite.maximum_matching(G) | |

28 {0: 2, 1: 3, 2: 0, 3: 1} | |

29 | |

30 The dictionary returned by :func:`maximum_matching` includes a mapping for | |

31 vertices in both the left and right vertex sets. | |

32 | |

33 Similarly, :func:`minimum_weight_full_matching` produces, for a complete | |

34 weighted bipartite graph, a matching whose cardinality is the cardinality of | |

35 the smaller of the two partitions, and for which the sum of the weights of the | |

36 edges included in the matching is minimal. | |

37 | |

38 """ | |

39 import collections | |

40 import itertools | |

41 | |

42 from networkx.algorithms.bipartite.matrix import biadjacency_matrix | |

43 from networkx.algorithms.bipartite import sets as bipartite_sets | |

44 import networkx as nx | |

45 | |

46 __all__ = [ | |

47 "maximum_matching", | |

48 "hopcroft_karp_matching", | |

49 "eppstein_matching", | |

50 "to_vertex_cover", | |

51 "minimum_weight_full_matching", | |

52 ] | |

53 | |

54 INFINITY = float("inf") | |

55 | |

56 | |

57 def hopcroft_karp_matching(G, top_nodes=None): | |

58 """Returns the maximum cardinality matching of the bipartite graph `G`. | |

59 | |

60 A matching is a set of edges that do not share any nodes. A maximum | |

61 cardinality matching is a matching with the most edges possible. It | |

62 is not always unique. Finding a matching in a bipartite graph can be | |

63 treated as a networkx flow problem. | |

64 | |

65 The functions ``hopcroft_karp_matching`` and ``maximum_matching`` | |

66 are aliases of the same function. | |

67 | |

68 Parameters | |

69 ---------- | |

70 G : NetworkX graph | |

71 | |

72 Undirected bipartite graph | |

73 | |

74 top_nodes : container of nodes | |

75 | |

76 Container with all nodes in one bipartite node set. If not supplied | |

77 it will be computed. But if more than one solution exists an exception | |

78 will be raised. | |

79 | |

80 Returns | |

81 ------- | |

82 matches : dictionary | |

83 | |

84 The matching is returned as a dictionary, `matches`, such that | |

85 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched | |

86 nodes do not occur as a key in `matches`. | |

87 | |

88 Raises | |

89 ------ | |

90 AmbiguousSolution | |

91 Raised if the input bipartite graph is disconnected and no container | |

92 with all nodes in one bipartite set is provided. When determining | |

93 the nodes in each bipartite set more than one valid solution is | |

94 possible if the input graph is disconnected. | |

95 | |

96 Notes | |

97 ----- | |

98 This function is implemented with the `Hopcroft--Karp matching algorithm | |

99 <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for | |

100 bipartite graphs. | |

101 | |

102 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` | |

103 for further details on how bipartite graphs are handled in NetworkX. | |

104 | |

105 See Also | |

106 -------- | |

107 maximum_matching | |

108 hopcroft_karp_matching | |

109 eppstein_matching | |

110 | |

111 References | |

112 ---------- | |

113 .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for | |

114 Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing** | |

115 2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>. | |

116 | |

117 """ | |

118 # First we define some auxiliary search functions. | |

119 # | |

120 # If you are a human reading these auxiliary search functions, the "global" | |

121 # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined | |

122 # below the functions, so that they are initialized close to the initial | |

123 # invocation of the search functions. | |

124 def breadth_first_search(): | |

125 for v in left: | |

126 if leftmatches[v] is None: | |

127 distances[v] = 0 | |

128 queue.append(v) | |

129 else: | |

130 distances[v] = INFINITY | |

131 distances[None] = INFINITY | |

132 while queue: | |

133 v = queue.popleft() | |

134 if distances[v] < distances[None]: | |

135 for u in G[v]: | |

136 if distances[rightmatches[u]] is INFINITY: | |

137 distances[rightmatches[u]] = distances[v] + 1 | |

138 queue.append(rightmatches[u]) | |

139 return distances[None] is not INFINITY | |

140 | |

141 def depth_first_search(v): | |

142 if v is not None: | |

143 for u in G[v]: | |

144 if distances[rightmatches[u]] == distances[v] + 1: | |

145 if depth_first_search(rightmatches[u]): | |

146 rightmatches[u] = v | |

147 leftmatches[v] = u | |

148 return True | |

149 distances[v] = INFINITY | |

150 return False | |

151 return True | |

152 | |

153 # Initialize the "global" variables that maintain state during the search. | |

154 left, right = bipartite_sets(G, top_nodes) | |

155 leftmatches = {v: None for v in left} | |

156 rightmatches = {v: None for v in right} | |

157 distances = {} | |

158 queue = collections.deque() | |

159 | |

160 # Implementation note: this counter is incremented as pairs are matched but | |

161 # it is currently not used elsewhere in the computation. | |

162 num_matched_pairs = 0 | |

163 while breadth_first_search(): | |

164 for v in left: | |

165 if leftmatches[v] is None: | |

166 if depth_first_search(v): | |

167 num_matched_pairs += 1 | |

168 | |

169 # Strip the entries matched to `None`. | |

170 leftmatches = {k: v for k, v in leftmatches.items() if v is not None} | |

171 rightmatches = {k: v for k, v in rightmatches.items() if v is not None} | |

172 | |

173 # At this point, the left matches and the right matches are inverses of one | |

174 # another. In other words, | |

175 # | |

176 # leftmatches == {v, k for k, v in rightmatches.items()} | |

177 # | |

178 # Finally, we combine both the left matches and right matches. | |

179 return dict(itertools.chain(leftmatches.items(), rightmatches.items())) | |

180 | |

181 | |

182 def eppstein_matching(G, top_nodes=None): | |

183 """Returns the maximum cardinality matching of the bipartite graph `G`. | |

184 | |

185 Parameters | |

186 ---------- | |

187 G : NetworkX graph | |

188 | |

189 Undirected bipartite graph | |

190 | |

191 top_nodes : container | |

192 | |

193 Container with all nodes in one bipartite node set. If not supplied | |

194 it will be computed. But if more than one solution exists an exception | |

195 will be raised. | |

196 | |

197 Returns | |

198 ------- | |

199 matches : dictionary | |

200 | |

201 The matching is returned as a dictionary, `matching`, such that | |

202 ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched | |

203 nodes do not occur as a key in `matching`. | |

204 | |

205 Raises | |

206 ------ | |

207 AmbiguousSolution | |

208 Raised if the input bipartite graph is disconnected and no container | |

209 with all nodes in one bipartite set is provided. When determining | |

210 the nodes in each bipartite set more than one valid solution is | |

211 possible if the input graph is disconnected. | |

212 | |

213 Notes | |

214 ----- | |

215 This function is implemented with David Eppstein's version of the algorithm | |

216 Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which | |

217 originally appeared in the `Python Algorithms and Data Structures library | |

218 (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_. | |

219 | |

220 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` | |

221 for further details on how bipartite graphs are handled in NetworkX. | |

222 | |

223 See Also | |

224 -------- | |

225 | |

226 hopcroft_karp_matching | |

227 | |

228 """ | |

229 # Due to its original implementation, a directed graph is needed | |

230 # so that the two sets of bipartite nodes can be distinguished | |

231 left, right = bipartite_sets(G, top_nodes) | |

232 G = nx.DiGraph(G.edges(left)) | |

233 # initialize greedy matching (redundant, but faster than full search) | |

234 matching = {} | |

235 for u in G: | |

236 for v in G[u]: | |

237 if v not in matching: | |

238 matching[v] = u | |

239 break | |

240 while True: | |

241 # structure residual graph into layers | |

242 # pred[u] gives the neighbor in the previous layer for u in U | |

243 # preds[v] gives a list of neighbors in the previous layer for v in V | |

244 # unmatched gives a list of unmatched vertices in final layer of V, | |

245 # and is also used as a flag value for pred[u] when u is in the first | |

246 # layer | |

247 preds = {} | |

248 unmatched = [] | |

249 pred = {u: unmatched for u in G} | |

250 for v in matching: | |

251 del pred[matching[v]] | |

252 layer = list(pred) | |

253 | |

254 # repeatedly extend layering structure by another pair of layers | |

255 while layer and not unmatched: | |

256 newLayer = {} | |

257 for u in layer: | |

258 for v in G[u]: | |

259 if v not in preds: | |

260 newLayer.setdefault(v, []).append(u) | |

261 layer = [] | |

262 for v in newLayer: | |

263 preds[v] = newLayer[v] | |

264 if v in matching: | |

265 layer.append(matching[v]) | |

266 pred[matching[v]] = v | |

267 else: | |

268 unmatched.append(v) | |

269 | |

270 # did we finish layering without finding any alternating paths? | |

271 if not unmatched: | |

272 unlayered = {} | |

273 for u in G: | |

274 # TODO Why is extra inner loop necessary? | |

275 for v in G[u]: | |

276 if v not in preds: | |

277 unlayered[v] = None | |

278 # TODO Originally, this function returned a three-tuple: | |

279 # | |

280 # return (matching, list(pred), list(unlayered)) | |

281 # | |

282 # For some reason, the documentation for this function | |

283 # indicated that the second and third elements of the returned | |

284 # three-tuple would be the vertices in the left and right vertex | |

285 # sets, respectively, that are also in the maximum independent set. | |

286 # However, what I think the author meant was that the second | |

287 # element is the list of vertices that were unmatched and the third | |

288 # element was the list of vertices that were matched. Since that | |

289 # seems to be the case, they don't really need to be returned, | |

290 # since that information can be inferred from the matching | |

291 # dictionary. | |

292 | |

293 # All the matched nodes must be a key in the dictionary | |

294 for key in matching.copy(): | |

295 matching[matching[key]] = key | |

296 return matching | |

297 | |

298 # recursively search backward through layers to find alternating paths | |

299 # recursion returns true if found path, false otherwise | |

300 def recurse(v): | |

301 if v in preds: | |

302 L = preds.pop(v) | |

303 for u in L: | |

304 if u in pred: | |

305 pu = pred.pop(u) | |

306 if pu is unmatched or recurse(pu): | |

307 matching[v] = u | |

308 return True | |

309 return False | |

310 | |

311 for v in unmatched: | |

312 recurse(v) | |

313 | |

314 | |

315 def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets): | |

316 """Returns True if and only if the vertex `v` is connected to one of | |

317 the target vertices by an alternating path in `G`. | |

318 | |

319 An *alternating path* is a path in which every other edge is in the | |

320 specified maximum matching (and the remaining edges in the path are not in | |

321 the matching). An alternating path may have matched edges in the even | |

322 positions or in the odd positions, as long as the edges alternate between | |

323 'matched' and 'unmatched'. | |

324 | |

325 `G` is an undirected bipartite NetworkX graph. | |

326 | |

327 `v` is a vertex in `G`. | |

328 | |

329 `matched_edges` is a set of edges present in a maximum matching in `G`. | |

330 | |

331 `unmatched_edges` is a set of edges not present in a maximum | |

332 matching in `G`. | |

333 | |

334 `targets` is a set of vertices. | |

335 | |

336 """ | |

337 | |

338 def _alternating_dfs(u, along_matched=True): | |

339 """Returns True if and only if `u` is connected to one of the | |

340 targets by an alternating path. | |

341 | |

342 `u` is a vertex in the graph `G`. | |

343 | |

344 If `along_matched` is True, this step of the depth-first search | |

345 will continue only through edges in the given matching. Otherwise, it | |

346 will continue only through edges *not* in the given matching. | |

347 | |

348 """ | |

349 if along_matched: | |

350 edges = itertools.cycle([matched_edges, unmatched_edges]) | |

351 else: | |

352 edges = itertools.cycle([unmatched_edges, matched_edges]) | |

353 visited = set() | |

354 stack = [(u, iter(G[u]), next(edges))] | |

355 while stack: | |

356 parent, children, valid_edges = stack[-1] | |

357 try: | |

358 child = next(children) | |

359 if child not in visited: | |

360 if (parent, child) in valid_edges or (child, parent) in valid_edges: | |

361 if child in targets: | |

362 return True | |

363 visited.add(child) | |

364 stack.append((child, iter(G[child]), next(edges))) | |

365 except StopIteration: | |

366 stack.pop() | |

367 return False | |

368 | |

369 # Check for alternating paths starting with edges in the matching, then | |

370 # check for alternating paths starting with edges not in the | |

371 # matching. | |

372 return _alternating_dfs(v, along_matched=True) or _alternating_dfs( | |

373 v, along_matched=False | |

374 ) | |

375 | |

376 | |

377 def _connected_by_alternating_paths(G, matching, targets): | |

378 """Returns the set of vertices that are connected to one of the target | |

379 vertices by an alternating path in `G` or are themselves a target. | |

380 | |

381 An *alternating path* is a path in which every other edge is in the | |

382 specified maximum matching (and the remaining edges in the path are not in | |

383 the matching). An alternating path may have matched edges in the even | |

384 positions or in the odd positions, as long as the edges alternate between | |

385 'matched' and 'unmatched'. | |

386 | |

387 `G` is an undirected bipartite NetworkX graph. | |

388 | |

389 `matching` is a dictionary representing a maximum matching in `G`, as | |

390 returned by, for example, :func:`maximum_matching`. | |

391 | |

392 `targets` is a set of vertices. | |

393 | |

394 """ | |

395 # Get the set of matched edges and the set of unmatched edges. Only include | |

396 # one version of each undirected edge (for example, include edge (1, 2) but | |

397 # not edge (2, 1)). Using frozensets as an intermediary step we do not | |

398 # require nodes to be orderable. | |

399 edge_sets = {frozenset((u, v)) for u, v in matching.items()} | |

400 matched_edges = {tuple(edge) for edge in edge_sets} | |

401 unmatched_edges = { | |

402 (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets | |

403 } | |

404 | |

405 return { | |

406 v | |

407 for v in G | |

408 if v in targets | |

409 or _is_connected_by_alternating_path( | |

410 G, v, matched_edges, unmatched_edges, targets | |

411 ) | |

412 } | |

413 | |

414 | |

415 def to_vertex_cover(G, matching, top_nodes=None): | |

416 """Returns the minimum vertex cover corresponding to the given maximum | |

417 matching of the bipartite graph `G`. | |

418 | |

419 Parameters | |

420 ---------- | |

421 G : NetworkX graph | |

422 | |

423 Undirected bipartite graph | |

424 | |

425 matching : dictionary | |

426 | |

427 A dictionary whose keys are vertices in `G` and whose values are the | |

428 distinct neighbors comprising the maximum matching for `G`, as returned | |

429 by, for example, :func:`maximum_matching`. The dictionary *must* | |

430 represent the maximum matching. | |

431 | |

432 top_nodes : container | |

433 | |

434 Container with all nodes in one bipartite node set. If not supplied | |

435 it will be computed. But if more than one solution exists an exception | |

436 will be raised. | |

437 | |

438 Returns | |

439 ------- | |

440 vertex_cover : :class:`set` | |

441 | |

442 The minimum vertex cover in `G`. | |

443 | |

444 Raises | |

445 ------ | |

446 AmbiguousSolution | |

447 Raised if the input bipartite graph is disconnected and no container | |

448 with all nodes in one bipartite set is provided. When determining | |

449 the nodes in each bipartite set more than one valid solution is | |

450 possible if the input graph is disconnected. | |

451 | |

452 Notes | |

453 ----- | |

454 This function is implemented using the procedure guaranteed by `Konig's | |

455 theorem | |

456 <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_, | |

457 which proves an equivalence between a maximum matching and a minimum vertex | |

458 cover in bipartite graphs. | |

459 | |

460 Since a minimum vertex cover is the complement of a maximum independent set | |

461 for any graph, one can compute the maximum independent set of a bipartite | |

462 graph this way: | |

463 | |

464 >>> G = nx.complete_bipartite_graph(2, 3) | |

465 >>> matching = nx.bipartite.maximum_matching(G) | |

466 >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching) | |

467 >>> independent_set = set(G) - vertex_cover | |

468 >>> print(list(independent_set)) | |

469 [2, 3, 4] | |

470 | |

471 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` | |

472 for further details on how bipartite graphs are handled in NetworkX. | |

473 | |

474 """ | |

475 # This is a Python implementation of the algorithm described at | |

476 # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>. | |

477 L, R = bipartite_sets(G, top_nodes) | |

478 # Let U be the set of unmatched vertices in the left vertex set. | |

479 unmatched_vertices = set(G) - set(matching) | |

480 U = unmatched_vertices & L | |

481 # Let Z be the set of vertices that are either in U or are connected to U | |

482 # by alternating paths. | |

483 Z = _connected_by_alternating_paths(G, matching, U) | |

484 # At this point, every edge either has a right endpoint in Z or a left | |

485 # endpoint not in Z. This gives us the vertex cover. | |

486 return (L - Z) | (R & Z) | |

487 | |

488 | |

489 #: Returns the maximum cardinality matching in the given bipartite graph. | |

490 #: | |

491 #: This function is simply an alias for :func:`hopcroft_karp_matching`. | |

492 maximum_matching = hopcroft_karp_matching | |

493 | |

494 | |

495 def minimum_weight_full_matching(G, top_nodes=None, weight="weight"): | |

496 r"""Returns a minimum weight full matching of the bipartite graph `G`. | |

497 | |

498 Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights | |

499 :math:`w : E \to \mathbb{R}`. This function then produces a matching | |

500 :math:`M \subseteq E` with cardinality | |

501 | |

502 .. math:: | |

503 \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert), | |

504 | |

505 which minimizes the sum of the weights of the edges included in the | |

506 matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such | |

507 matching exists. | |

508 | |

509 When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly | |

510 referred to as a perfect matching; here, since we allow | |

511 :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we | |

512 follow Karp [1]_ and refer to the matching as *full*. | |

513 | |

514 Parameters | |

515 ---------- | |

516 G : NetworkX graph | |

517 | |

518 Undirected bipartite graph | |

519 | |

520 top_nodes : container | |

521 | |

522 Container with all nodes in one bipartite node set. If not supplied | |

523 it will be computed. | |

524 | |

525 weight : string, optional (default='weight') | |

526 | |

527 The edge data key used to provide each value in the matrix. | |

528 | |

529 Returns | |

530 ------- | |

531 matches : dictionary | |

532 | |

533 The matching is returned as a dictionary, `matches`, such that | |

534 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched | |

535 nodes do not occur as a key in `matches`. | |

536 | |

537 Raises | |

538 ------ | |

539 ValueError | |

540 Raised if no full matching exists. | |

541 | |

542 ImportError | |

543 Raised if SciPy is not available. | |

544 | |

545 Notes | |

546 ----- | |

547 The problem of determining a minimum weight full matching is also known as | |

548 the rectangular linear assignment problem. This implementation defers the | |

549 calculation of the assignment to SciPy. | |

550 | |

551 References | |

552 ---------- | |

553 .. [1] Richard Manning Karp: | |

554 An algorithm to Solve the m x n Assignment Problem in Expected Time | |

555 O(mn log n). | |

556 Networks, 10(2):143–152, 1980. | |

557 | |

558 """ | |

559 try: | |

560 import numpy as np | |

561 import scipy.optimize | |

562 except ImportError as e: | |

563 raise ImportError( | |

564 "minimum_weight_full_matching requires SciPy: " + "https://scipy.org/" | |

565 ) from e | |

566 left, right = nx.bipartite.sets(G, top_nodes) | |

567 U = list(left) | |

568 V = list(right) | |

569 # We explicitly create the biadjancency matrix having infinities | |

570 # where edges are missing (as opposed to zeros, which is what one would | |

571 # get by using toarray on the sparse matrix). | |

572 weights_sparse = biadjacency_matrix( | |

573 G, row_order=U, column_order=V, weight=weight, format="coo" | |

574 ) | |

575 weights = np.full(weights_sparse.shape, np.inf) | |

576 weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data | |

577 left_matches = scipy.optimize.linear_sum_assignment(weights) | |

578 d = {U[u]: V[v] for u, v in zip(*left_matches)} | |

579 # d will contain the matching from edges in left to right; we need to | |

580 # add the ones from right to left as well. | |

581 d.update({v: u for u, v in d.items()}) | |

582 return d |