Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/graph_hashing.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ | |
2 Functions for hashing graphs to strings. | |
3 Isomorphic graphs should be assigned identical hashes. | |
4 For now, only Weisfeiler-Lehman hashing is implemented. | |
5 """ | |
6 | |
7 from collections import Counter | |
8 from hashlib import blake2b | |
9 | |
10 __all__ = ["weisfeiler_lehman_graph_hash"] | |
11 | |
12 | |
13 def weisfeiler_lehman_graph_hash( | |
14 G, edge_attr=None, node_attr=None, iterations=3, digest_size=16 | |
15 ): | |
16 """Return Weisfeiler Lehman (WL) graph hash. | |
17 | |
18 The function iteratively aggregates and hashes neighbourhoods of each node. | |
19 After each node's neighbors are hashed to obtain updated node labels, | |
20 a hashed histogram of resulting labels is returned as the final hash. | |
21 | |
22 Hashes are identical for isomorphic graphs and strong guarantees that | |
23 non-isomorphic graphs will get different hashes. See [1] for details. | |
24 | |
25 Note: Similarity between hashes does not imply similarity between graphs. | |
26 | |
27 If no node or edge attributes are provided, the degree of each node | |
28 is used as its initial label. | |
29 Otherwise, node and/or edge labels are used to compute the hash. | |
30 | |
31 Parameters | |
32 ---------- | |
33 G: graph | |
34 The graph to be hashed. | |
35 Can have node and/or edge attributes. Can also have no attributes. | |
36 edge_attr: string | |
37 The key in edge attribute dictionary to be used for hashing. | |
38 If None, edge labels are ignored. | |
39 node_attr: string | |
40 The key in node attribute dictionary to be used for hashing. | |
41 If None, and no edge_attr given, use | |
42 degree of node as label. | |
43 iterations: int | |
44 Number of neighbor aggregations to perform. | |
45 Should be larger for larger graphs. | |
46 digest_size: int | |
47 Size of blake2b hash digest to use for hashing node labels. | |
48 | |
49 Returns | |
50 ------- | |
51 h : string | |
52 Hexadecimal string corresponding to hash of the input graph. | |
53 | |
54 Examples | |
55 -------- | |
56 Two graphs with edge attributes that are isomorphic, except for | |
57 differences in the edge labels. | |
58 | |
59 >>> G1 = nx.Graph() | |
60 >>> G1.add_edges_from( | |
61 ... [ | |
62 ... (1, 2, {"label": "A"}), | |
63 ... (2, 3, {"label": "A"}), | |
64 ... (3, 1, {"label": "A"}), | |
65 ... (1, 4, {"label": "B"}), | |
66 ... ] | |
67 ... ) | |
68 >>> G2 = nx.Graph() | |
69 >>> G2.add_edges_from( | |
70 ... [ | |
71 ... (5, 6, {"label": "B"}), | |
72 ... (6, 7, {"label": "A"}), | |
73 ... (7, 5, {"label": "A"}), | |
74 ... (7, 8, {"label": "A"}), | |
75 ... ] | |
76 ... ) | |
77 | |
78 Omitting the `edge_attr` option, results in identical hashes. | |
79 | |
80 >>> weisfeiler_lehman_graph_hash(G1) | |
81 '0db442538bb6dc81d675bd94e6ebb7ca' | |
82 >>> weisfeiler_lehman_graph_hash(G2) | |
83 '0db442538bb6dc81d675bd94e6ebb7ca' | |
84 | |
85 With edge labels, the graphs are no longer assigned | |
86 the same hash digest. | |
87 | |
88 >>> weisfeiler_lehman_graph_hash(G1, edge_attr="label") | |
89 '408c18537e67d3e56eb7dc92c72cb79e' | |
90 >>> weisfeiler_lehman_graph_hash(G2, edge_attr="label") | |
91 'f9e9cb01c6d2f3b17f83ffeaa24e5986' | |
92 | |
93 References | |
94 ------- | |
95 .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen, | |
96 Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman | |
97 Graph Kernels. Journal of Machine Learning Research. 2011. | |
98 http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf | |
99 """ | |
100 | |
101 def neighborhood_aggregate(G, node, node_labels, edge_attr=None): | |
102 """ | |
103 Compute new labels for given node by aggregating | |
104 the labels of each node's neighbors. | |
105 """ | |
106 label_list = [node_labels[node]] | |
107 for nei in G.neighbors(node): | |
108 prefix = "" if not edge_attr else G[node][nei][edge_attr] | |
109 label_list.append(prefix + node_labels[nei]) | |
110 return "".join(sorted(label_list)) | |
111 | |
112 def weisfeiler_lehman_step(G, labels, edge_attr=None, node_attr=None): | |
113 """ | |
114 Apply neighborhood aggregation to each node | |
115 in the graph. | |
116 Computes a dictionary with labels for each node. | |
117 """ | |
118 new_labels = dict() | |
119 for node in G.nodes(): | |
120 new_labels[node] = neighborhood_aggregate( | |
121 G, node, labels, edge_attr=edge_attr | |
122 ) | |
123 return new_labels | |
124 | |
125 items = [] | |
126 node_labels = dict() | |
127 # set initial node labels | |
128 for node in G.nodes(): | |
129 if (not node_attr) and (not edge_attr): | |
130 node_labels[node] = str(G.degree(node)) | |
131 elif node_attr: | |
132 node_labels[node] = str(G.nodes[node][node_attr]) | |
133 else: | |
134 node_labels[node] = "" | |
135 | |
136 for k in range(iterations): | |
137 node_labels = weisfeiler_lehman_step(G, node_labels, edge_attr=edge_attr) | |
138 counter = Counter() | |
139 # count node labels | |
140 for node, d in node_labels.items(): | |
141 h = blake2b(digest_size=digest_size) | |
142 h.update(d.encode("ascii")) | |
143 counter.update([h.hexdigest()]) | |
144 # sort the counter, extend total counts | |
145 items.extend(sorted(counter.items(), key=lambda x: x[0])) | |
146 | |
147 # hash the final counter | |
148 h = blake2b(digest_size=digest_size) | |
149 h.update(str(tuple(items)).encode("ascii")) | |
150 h = h.hexdigest() | |
151 return h |