diff env/lib/python3.9/site-packages/networkx/algorithms/bipartite/matching.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/networkx/algorithms/bipartite/matching.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,582 @@
+# This module uses material from the Wikipedia article Hopcroft--Karp algorithm
+# <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on
+# January 3, 2015, which is released under the Creative Commons
+# Attribution-Share-Alike License 3.0
+# <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes
+# pseudocode, which has been translated into the corresponding Python code.
+#
+# Portions of this module use code from David Eppstein's Python Algorithms and
+# Data Structures (PADS) library, which is dedicated to the public domain (for
+# proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>).
+"""Provides functions for computing maximum cardinality matchings and minimum
+weight full matchings in a bipartite graph.
+
+If you don't care about the particular implementation of the maximum matching
+algorithm, simply use the :func:`maximum_matching`. If you do care, you can
+import one of the named maximum matching algorithms directly.
+
+For example, to find a maximum matching in the complete bipartite graph with
+two vertices on the left and three vertices on the right:
+
+>>> G = nx.complete_bipartite_graph(2, 3)
+>>> left, right = nx.bipartite.sets(G)
+>>> list(left)
+[0, 1]
+>>> list(right)
+[2, 3, 4]
+>>> nx.bipartite.maximum_matching(G)
+{0: 2, 1: 3, 2: 0, 3: 1}
+
+The dictionary returned by :func:`maximum_matching` includes a mapping for
+vertices in both the left and right vertex sets.
+
+Similarly, :func:`minimum_weight_full_matching` produces, for a complete
+weighted bipartite graph, a matching whose cardinality is the cardinality of
+the smaller of the two partitions, and for which the sum of the weights of the
+edges included in the matching is minimal.
+
+"""
+import collections
+import itertools
+
+from networkx.algorithms.bipartite.matrix import biadjacency_matrix
+from networkx.algorithms.bipartite import sets as bipartite_sets
+import networkx as nx
+
+__all__ = [
+    "maximum_matching",
+    "hopcroft_karp_matching",
+    "eppstein_matching",
+    "to_vertex_cover",
+    "minimum_weight_full_matching",
+]
+
+INFINITY = float("inf")
+
+
+def hopcroft_karp_matching(G, top_nodes=None):
+    """Returns the maximum cardinality matching of the bipartite graph `G`.
+
+    A matching is a set of edges that do not share any nodes. A maximum 
+    cardinality matching is a matching with the most edges possible. It
+    is not always unique. Finding a matching in a bipartite graph can be
+    treated as a networkx flow problem.
+    
+    The functions ``hopcroft_karp_matching`` and ``maximum_matching``
+    are aliases of the same function. 
+
+    Parameters
+    ----------
+    G : NetworkX graph
+
+      Undirected bipartite graph
+
+    top_nodes : container of nodes
+
+      Container with all nodes in one bipartite node set. If not supplied
+      it will be computed. But if more than one solution exists an exception
+      will be raised.
+
+    Returns
+    -------
+    matches : dictionary
+
+      The matching is returned as a dictionary, `matches`, such that
+      ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
+      nodes do not occur as a key in `matches`.
+
+    Raises
+    ------
+    AmbiguousSolution
+      Raised if the input bipartite graph is disconnected and no container
+      with all nodes in one bipartite set is provided. When determining
+      the nodes in each bipartite set more than one valid solution is
+      possible if the input graph is disconnected.
+
+    Notes
+    -----
+    This function is implemented with the `Hopcroft--Karp matching algorithm
+    <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for
+    bipartite graphs.
+
+    See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
+    for further details on how bipartite graphs are handled in NetworkX.
+
+    See Also
+    --------
+    maximum_matching
+    hopcroft_karp_matching
+    eppstein_matching
+
+    References
+    ----------
+    .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for
+       Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing**
+       2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>.
+
+    """
+    # First we define some auxiliary search functions.
+    #
+    # If you are a human reading these auxiliary search functions, the "global"
+    # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined
+    # below the functions, so that they are initialized close to the initial
+    # invocation of the search functions.
+    def breadth_first_search():
+        for v in left:
+            if leftmatches[v] is None:
+                distances[v] = 0
+                queue.append(v)
+            else:
+                distances[v] = INFINITY
+        distances[None] = INFINITY
+        while queue:
+            v = queue.popleft()
+            if distances[v] < distances[None]:
+                for u in G[v]:
+                    if distances[rightmatches[u]] is INFINITY:
+                        distances[rightmatches[u]] = distances[v] + 1
+                        queue.append(rightmatches[u])
+        return distances[None] is not INFINITY
+
+    def depth_first_search(v):
+        if v is not None:
+            for u in G[v]:
+                if distances[rightmatches[u]] == distances[v] + 1:
+                    if depth_first_search(rightmatches[u]):
+                        rightmatches[u] = v
+                        leftmatches[v] = u
+                        return True
+            distances[v] = INFINITY
+            return False
+        return True
+
+    # Initialize the "global" variables that maintain state during the search.
+    left, right = bipartite_sets(G, top_nodes)
+    leftmatches = {v: None for v in left}
+    rightmatches = {v: None for v in right}
+    distances = {}
+    queue = collections.deque()
+
+    # Implementation note: this counter is incremented as pairs are matched but
+    # it is currently not used elsewhere in the computation.
+    num_matched_pairs = 0
+    while breadth_first_search():
+        for v in left:
+            if leftmatches[v] is None:
+                if depth_first_search(v):
+                    num_matched_pairs += 1
+
+    # Strip the entries matched to `None`.
+    leftmatches = {k: v for k, v in leftmatches.items() if v is not None}
+    rightmatches = {k: v for k, v in rightmatches.items() if v is not None}
+
+    # At this point, the left matches and the right matches are inverses of one
+    # another. In other words,
+    #
+    #     leftmatches == {v, k for k, v in rightmatches.items()}
+    #
+    # Finally, we combine both the left matches and right matches.
+    return dict(itertools.chain(leftmatches.items(), rightmatches.items()))
+
+
+def eppstein_matching(G, top_nodes=None):
+    """Returns the maximum cardinality matching of the bipartite graph `G`.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+
+      Undirected bipartite graph
+
+    top_nodes : container
+
+      Container with all nodes in one bipartite node set. If not supplied
+      it will be computed. But if more than one solution exists an exception
+      will be raised.
+
+    Returns
+    -------
+    matches : dictionary
+
+      The matching is returned as a dictionary, `matching`, such that
+      ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched
+      nodes do not occur as a key in `matching`.
+
+    Raises
+    ------
+    AmbiguousSolution
+      Raised if the input bipartite graph is disconnected and no container
+      with all nodes in one bipartite set is provided. When determining
+      the nodes in each bipartite set more than one valid solution is
+      possible if the input graph is disconnected.
+
+    Notes
+    -----
+    This function is implemented with David Eppstein's version of the algorithm
+    Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which
+    originally appeared in the `Python Algorithms and Data Structures library
+    (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_.
+
+    See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
+    for further details on how bipartite graphs are handled in NetworkX.
+
+    See Also
+    --------
+
+    hopcroft_karp_matching
+
+    """
+    # Due to its original implementation, a directed graph is needed
+    # so that the two sets of bipartite nodes can be distinguished
+    left, right = bipartite_sets(G, top_nodes)
+    G = nx.DiGraph(G.edges(left))
+    # initialize greedy matching (redundant, but faster than full search)
+    matching = {}
+    for u in G:
+        for v in G[u]:
+            if v not in matching:
+                matching[v] = u
+                break
+    while True:
+        # structure residual graph into layers
+        # pred[u] gives the neighbor in the previous layer for u in U
+        # preds[v] gives a list of neighbors in the previous layer for v in V
+        # unmatched gives a list of unmatched vertices in final layer of V,
+        # and is also used as a flag value for pred[u] when u is in the first
+        # layer
+        preds = {}
+        unmatched = []
+        pred = {u: unmatched for u in G}
+        for v in matching:
+            del pred[matching[v]]
+        layer = list(pred)
+
+        # repeatedly extend layering structure by another pair of layers
+        while layer and not unmatched:
+            newLayer = {}
+            for u in layer:
+                for v in G[u]:
+                    if v not in preds:
+                        newLayer.setdefault(v, []).append(u)
+            layer = []
+            for v in newLayer:
+                preds[v] = newLayer[v]
+                if v in matching:
+                    layer.append(matching[v])
+                    pred[matching[v]] = v
+                else:
+                    unmatched.append(v)
+
+        # did we finish layering without finding any alternating paths?
+        if not unmatched:
+            unlayered = {}
+            for u in G:
+                # TODO Why is extra inner loop necessary?
+                for v in G[u]:
+                    if v not in preds:
+                        unlayered[v] = None
+            # TODO Originally, this function returned a three-tuple:
+            #
+            #     return (matching, list(pred), list(unlayered))
+            #
+            # For some reason, the documentation for this function
+            # indicated that the second and third elements of the returned
+            # three-tuple would be the vertices in the left and right vertex
+            # sets, respectively, that are also in the maximum independent set.
+            # However, what I think the author meant was that the second
+            # element is the list of vertices that were unmatched and the third
+            # element was the list of vertices that were matched. Since that
+            # seems to be the case, they don't really need to be returned,
+            # since that information can be inferred from the matching
+            # dictionary.
+
+            # All the matched nodes must be a key in the dictionary
+            for key in matching.copy():
+                matching[matching[key]] = key
+            return matching
+
+        # recursively search backward through layers to find alternating paths
+        # recursion returns true if found path, false otherwise
+        def recurse(v):
+            if v in preds:
+                L = preds.pop(v)
+                for u in L:
+                    if u in pred:
+                        pu = pred.pop(u)
+                        if pu is unmatched or recurse(pu):
+                            matching[v] = u
+                            return True
+            return False
+
+        for v in unmatched:
+            recurse(v)
+
+
+def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets):
+    """Returns True if and only if the vertex `v` is connected to one of
+    the target vertices by an alternating path in `G`.
+
+    An *alternating path* is a path in which every other edge is in the
+    specified maximum matching (and the remaining edges in the path are not in
+    the matching). An alternating path may have matched edges in the even
+    positions or in the odd positions, as long as the edges alternate between
+    'matched' and 'unmatched'.
+
+    `G` is an undirected bipartite NetworkX graph.
+
+    `v` is a vertex in `G`.
+
+    `matched_edges` is a set of edges present in a maximum matching in `G`.
+
+    `unmatched_edges` is a set of edges not present in a maximum
+    matching in `G`.
+
+    `targets` is a set of vertices.
+
+    """
+
+    def _alternating_dfs(u, along_matched=True):
+        """Returns True if and only if `u` is connected to one of the
+        targets by an alternating path.
+
+        `u` is a vertex in the graph `G`.
+
+        If `along_matched` is True, this step of the depth-first search
+        will continue only through edges in the given matching. Otherwise, it
+        will continue only through edges *not* in the given matching.
+
+        """
+        if along_matched:
+            edges = itertools.cycle([matched_edges, unmatched_edges])
+        else:
+            edges = itertools.cycle([unmatched_edges, matched_edges])
+        visited = set()
+        stack = [(u, iter(G[u]), next(edges))]
+        while stack:
+            parent, children, valid_edges = stack[-1]
+            try:
+                child = next(children)
+                if child not in visited:
+                    if (parent, child) in valid_edges or (child, parent) in valid_edges:
+                        if child in targets:
+                            return True
+                        visited.add(child)
+                        stack.append((child, iter(G[child]), next(edges)))
+            except StopIteration:
+                stack.pop()
+        return False
+
+    # Check for alternating paths starting with edges in the matching, then
+    # check for alternating paths starting with edges not in the
+    # matching.
+    return _alternating_dfs(v, along_matched=True) or _alternating_dfs(
+        v, along_matched=False
+    )
+
+
+def _connected_by_alternating_paths(G, matching, targets):
+    """Returns the set of vertices that are connected to one of the target
+    vertices by an alternating path in `G` or are themselves a target.
+
+    An *alternating path* is a path in which every other edge is in the
+    specified maximum matching (and the remaining edges in the path are not in
+    the matching). An alternating path may have matched edges in the even
+    positions or in the odd positions, as long as the edges alternate between
+    'matched' and 'unmatched'.
+
+    `G` is an undirected bipartite NetworkX graph.
+
+    `matching` is a dictionary representing a maximum matching in `G`, as
+    returned by, for example, :func:`maximum_matching`.
+
+    `targets` is a set of vertices.
+
+    """
+    # Get the set of matched edges and the set of unmatched edges. Only include
+    # one version of each undirected edge (for example, include edge (1, 2) but
+    # not edge (2, 1)). Using frozensets as an intermediary step we do not
+    # require nodes to be orderable.
+    edge_sets = {frozenset((u, v)) for u, v in matching.items()}
+    matched_edges = {tuple(edge) for edge in edge_sets}
+    unmatched_edges = {
+        (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets
+    }
+
+    return {
+        v
+        for v in G
+        if v in targets
+        or _is_connected_by_alternating_path(
+            G, v, matched_edges, unmatched_edges, targets
+        )
+    }
+
+
+def to_vertex_cover(G, matching, top_nodes=None):
+    """Returns the minimum vertex cover corresponding to the given maximum
+    matching of the bipartite graph `G`.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+
+      Undirected bipartite graph
+
+    matching : dictionary
+
+      A dictionary whose keys are vertices in `G` and whose values are the
+      distinct neighbors comprising the maximum matching for `G`, as returned
+      by, for example, :func:`maximum_matching`. The dictionary *must*
+      represent the maximum matching.
+
+    top_nodes : container
+
+      Container with all nodes in one bipartite node set. If not supplied
+      it will be computed. But if more than one solution exists an exception
+      will be raised.
+
+    Returns
+    -------
+    vertex_cover : :class:`set`
+
+      The minimum vertex cover in `G`.
+
+    Raises
+    ------
+    AmbiguousSolution
+      Raised if the input bipartite graph is disconnected and no container
+      with all nodes in one bipartite set is provided. When determining
+      the nodes in each bipartite set more than one valid solution is
+      possible if the input graph is disconnected.
+
+    Notes
+    -----
+    This function is implemented using the procedure guaranteed by `Konig's
+    theorem
+    <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_,
+    which proves an equivalence between a maximum matching and a minimum vertex
+    cover in bipartite graphs.
+
+    Since a minimum vertex cover is the complement of a maximum independent set
+    for any graph, one can compute the maximum independent set of a bipartite
+    graph this way:
+
+    >>> G = nx.complete_bipartite_graph(2, 3)
+    >>> matching = nx.bipartite.maximum_matching(G)
+    >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching)
+    >>> independent_set = set(G) - vertex_cover
+    >>> print(list(independent_set))
+    [2, 3, 4]
+
+    See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
+    for further details on how bipartite graphs are handled in NetworkX.
+
+    """
+    # This is a Python implementation of the algorithm described at
+    # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>.
+    L, R = bipartite_sets(G, top_nodes)
+    # Let U be the set of unmatched vertices in the left vertex set.
+    unmatched_vertices = set(G) - set(matching)
+    U = unmatched_vertices & L
+    # Let Z be the set of vertices that are either in U or are connected to U
+    # by alternating paths.
+    Z = _connected_by_alternating_paths(G, matching, U)
+    # At this point, every edge either has a right endpoint in Z or a left
+    # endpoint not in Z. This gives us the vertex cover.
+    return (L - Z) | (R & Z)
+
+
+#: Returns the maximum cardinality matching in the given bipartite graph.
+#:
+#: This function is simply an alias for :func:`hopcroft_karp_matching`.
+maximum_matching = hopcroft_karp_matching
+
+
+def minimum_weight_full_matching(G, top_nodes=None, weight="weight"):
+    r"""Returns a minimum weight full matching of the bipartite graph `G`.
+
+    Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights
+    :math:`w : E \to \mathbb{R}`. This function then produces a matching
+    :math:`M \subseteq E` with cardinality
+
+    .. math::
+       \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert),
+
+    which minimizes the sum of the weights of the edges included in the
+    matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such
+    matching exists.
+
+    When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly
+    referred to as a perfect matching; here, since we allow
+    :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we
+    follow Karp [1]_ and refer to the matching as *full*.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+
+      Undirected bipartite graph
+
+    top_nodes : container
+
+      Container with all nodes in one bipartite node set. If not supplied
+      it will be computed.
+
+    weight : string, optional (default='weight')
+
+       The edge data key used to provide each value in the matrix.
+
+    Returns
+    -------
+    matches : dictionary
+
+      The matching is returned as a dictionary, `matches`, such that
+      ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
+      nodes do not occur as a key in `matches`.
+
+    Raises
+    ------
+    ValueError
+      Raised if no full matching exists.
+
+    ImportError
+      Raised if SciPy is not available.
+
+    Notes
+    -----
+    The problem of determining a minimum weight full matching is also known as
+    the rectangular linear assignment problem. This implementation defers the
+    calculation of the assignment to SciPy.
+
+    References
+    ----------
+    .. [1] Richard Manning Karp:
+       An algorithm to Solve the m x n Assignment Problem in Expected Time
+       O(mn log n).
+       Networks, 10(2):143–152, 1980.
+
+    """
+    try:
+        import numpy as np
+        import scipy.optimize
+    except ImportError as e:
+        raise ImportError(
+            "minimum_weight_full_matching requires SciPy: " + "https://scipy.org/"
+        ) from e
+    left, right = nx.bipartite.sets(G, top_nodes)
+    U = list(left)
+    V = list(right)
+    # We explicitly create the biadjancency matrix having infinities
+    # where edges are missing (as opposed to zeros, which is what one would
+    # get by using toarray on the sparse matrix).
+    weights_sparse = biadjacency_matrix(
+        G, row_order=U, column_order=V, weight=weight, format="coo"
+    )
+    weights = np.full(weights_sparse.shape, np.inf)
+    weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data
+    left_matches = scipy.optimize.linear_sum_assignment(weights)
+    d = {U[u]: V[v] for u, v in zip(*left_matches)}
+    # d will contain the matching from edges in left to right; we need to
+    # add the ones from right to left as well.
+    d.update({v: u for u, v in d.items()})
+    return d