diff env/lib/python3.9/site-packages/networkx/algorithms/graph_hashing.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/networkx/algorithms/graph_hashing.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,151 @@
+"""
+Functions for hashing graphs to strings.
+Isomorphic graphs should be assigned identical hashes.
+For now, only Weisfeiler-Lehman hashing is implemented.
+"""
+
+from collections import Counter
+from hashlib import blake2b
+
+__all__ = ["weisfeiler_lehman_graph_hash"]
+
+
+def weisfeiler_lehman_graph_hash(
+    G, edge_attr=None, node_attr=None, iterations=3, digest_size=16
+):
+    """Return Weisfeiler Lehman (WL) graph hash.
+
+    The function iteratively aggregates and hashes neighbourhoods of each node.
+    After each node's neighbors are hashed to obtain updated node labels,
+    a hashed histogram of resulting labels is returned as the final hash.
+
+    Hashes are identical for isomorphic graphs and strong guarantees that
+    non-isomorphic graphs will get different hashes. See [1] for details.
+
+    Note: Similarity between hashes does not imply similarity between graphs.
+
+    If no node or edge attributes are provided, the degree of each node
+    is used as its initial label.
+    Otherwise, node and/or edge labels are used to compute the hash.
+
+    Parameters
+    ----------
+    G: graph
+        The graph to be hashed.
+        Can have node and/or edge attributes. Can also have no attributes.
+    edge_attr: string
+        The key in edge attribute dictionary to be used for hashing.
+        If None, edge labels are ignored.
+    node_attr: string
+        The key in node attribute dictionary to be used for hashing.
+        If None, and no edge_attr given, use
+        degree of node as label.
+    iterations: int
+        Number of neighbor aggregations to perform.
+        Should be larger for larger graphs.
+    digest_size: int
+        Size of blake2b hash digest to use for hashing node labels.
+
+    Returns
+    -------
+    h : string
+        Hexadecimal string corresponding to hash of the input graph.
+
+    Examples
+    --------
+    Two graphs with edge attributes that are isomorphic, except for
+    differences in the edge labels.
+
+    >>> G1 = nx.Graph()
+    >>> G1.add_edges_from(
+    ...     [
+    ...         (1, 2, {"label": "A"}),
+    ...         (2, 3, {"label": "A"}),
+    ...         (3, 1, {"label": "A"}),
+    ...         (1, 4, {"label": "B"}),
+    ...     ]
+    ... )
+    >>> G2 = nx.Graph()
+    >>> G2.add_edges_from(
+    ...     [
+    ...         (5, 6, {"label": "B"}),
+    ...         (6, 7, {"label": "A"}),
+    ...         (7, 5, {"label": "A"}),
+    ...         (7, 8, {"label": "A"}),
+    ...     ]
+    ... )
+
+    Omitting the `edge_attr` option, results in identical hashes.
+
+    >>> weisfeiler_lehman_graph_hash(G1)
+    '0db442538bb6dc81d675bd94e6ebb7ca'
+    >>> weisfeiler_lehman_graph_hash(G2)
+    '0db442538bb6dc81d675bd94e6ebb7ca'
+
+    With edge labels, the graphs are no longer assigned
+    the same hash digest.
+
+    >>> weisfeiler_lehman_graph_hash(G1, edge_attr="label")
+    '408c18537e67d3e56eb7dc92c72cb79e'
+    >>> weisfeiler_lehman_graph_hash(G2, edge_attr="label")
+    'f9e9cb01c6d2f3b17f83ffeaa24e5986'
+
+    References
+    -------
+    .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen,
+       Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman
+       Graph Kernels. Journal of Machine Learning Research. 2011.
+       http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf
+    """
+
+    def neighborhood_aggregate(G, node, node_labels, edge_attr=None):
+        """
+        Compute new labels for given node by aggregating
+        the labels of each node's neighbors.
+        """
+        label_list = [node_labels[node]]
+        for nei in G.neighbors(node):
+            prefix = "" if not edge_attr else G[node][nei][edge_attr]
+            label_list.append(prefix + node_labels[nei])
+        return "".join(sorted(label_list))
+
+    def weisfeiler_lehman_step(G, labels, edge_attr=None, node_attr=None):
+        """
+        Apply neighborhood aggregation to each node
+        in the graph.
+        Computes a dictionary with labels for each node.
+        """
+        new_labels = dict()
+        for node in G.nodes():
+            new_labels[node] = neighborhood_aggregate(
+                G, node, labels, edge_attr=edge_attr
+            )
+        return new_labels
+
+    items = []
+    node_labels = dict()
+    # set initial node labels
+    for node in G.nodes():
+        if (not node_attr) and (not edge_attr):
+            node_labels[node] = str(G.degree(node))
+        elif node_attr:
+            node_labels[node] = str(G.nodes[node][node_attr])
+        else:
+            node_labels[node] = ""
+
+    for k in range(iterations):
+        node_labels = weisfeiler_lehman_step(G, node_labels, edge_attr=edge_attr)
+        counter = Counter()
+        # count node labels
+        for node, d in node_labels.items():
+            h = blake2b(digest_size=digest_size)
+            h.update(d.encode("ascii"))
+            counter.update([h.hexdigest()])
+        # sort the counter, extend total counts
+        items.extend(sorted(counter.items(), key=lambda x: x[0]))
+
+    # hash the final counter
+    h = blake2b(digest_size=digest_size)
+    h.update(str(tuple(items)).encode("ascii"))
+    h = h.hexdigest()
+    return h