Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/compare.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 A collection of utilities for canonicalizing and inspecting graphs. | |
| 4 | |
| 5 Among other things, they solve of the problem of deterministic bnode | |
| 6 comparisons. | |
| 7 | |
| 8 Warning: the time to canonicalize bnodes may increase exponentially on | |
| 9 degenerate larger graphs. Use with care! | |
| 10 | |
| 11 Example of comparing two graphs:: | |
| 12 | |
| 13 >>> g1 = Graph().parse(format='n3', data=''' | |
| 14 ... @prefix : <http://example.org/ns#> . | |
| 15 ... <http://example.org> :rel | |
| 16 ... <http://example.org/same>, | |
| 17 ... [ :label "Same" ], | |
| 18 ... <http://example.org/a>, | |
| 19 ... [ :label "A" ] . | |
| 20 ... ''') | |
| 21 >>> g2 = Graph().parse(format='n3', data=''' | |
| 22 ... @prefix : <http://example.org/ns#> . | |
| 23 ... <http://example.org> :rel | |
| 24 ... <http://example.org/same>, | |
| 25 ... [ :label "Same" ], | |
| 26 ... <http://example.org/b>, | |
| 27 ... [ :label "B" ] . | |
| 28 ... ''') | |
| 29 >>> | |
| 30 >>> iso1 = to_isomorphic(g1) | |
| 31 >>> iso2 = to_isomorphic(g2) | |
| 32 | |
| 33 These are not isomorphic:: | |
| 34 | |
| 35 >>> iso1 == iso2 | |
| 36 False | |
| 37 | |
| 38 Diff the two graphs:: | |
| 39 | |
| 40 >>> in_both, in_first, in_second = graph_diff(iso1, iso2) | |
| 41 | |
| 42 Present in both:: | |
| 43 | |
| 44 >>> def dump_nt_sorted(g): | |
| 45 ... for l in sorted(g.serialize(format='nt').splitlines()): | |
| 46 ... if l: print(l.decode('ascii')) | |
| 47 | |
| 48 >>> dump_nt_sorted(in_both) #doctest: +SKIP | |
| 49 <http://example.org> | |
| 50 <http://example.org/ns#rel> <http://example.org/same> . | |
| 51 <http://example.org> | |
| 52 <http://example.org/ns#rel> _:cbcaabaaba17fecbc304a64f8edee4335e . | |
| 53 _:cbcaabaaba17fecbc304a64f8edee4335e | |
| 54 <http://example.org/ns#label> "Same" . | |
| 55 | |
| 56 Only in first:: | |
| 57 | |
| 58 >>> dump_nt_sorted(in_first) #doctest: +SKIP | |
| 59 <http://example.org> | |
| 60 <http://example.org/ns#rel> <http://example.org/a> . | |
| 61 <http://example.org> | |
| 62 <http://example.org/ns#rel> _:cb124e4c6da0579f810c0ffe4eff485bd9 . | |
| 63 _:cb124e4c6da0579f810c0ffe4eff485bd9 | |
| 64 <http://example.org/ns#label> "A" . | |
| 65 | |
| 66 Only in second:: | |
| 67 | |
| 68 >>> dump_nt_sorted(in_second) #doctest: +SKIP | |
| 69 <http://example.org> | |
| 70 <http://example.org/ns#rel> <http://example.org/b> . | |
| 71 <http://example.org> | |
| 72 <http://example.org/ns#rel> _:cb558f30e21ddfc05ca53108348338ade8 . | |
| 73 _:cb558f30e21ddfc05ca53108348338ade8 | |
| 74 <http://example.org/ns#label> "B" . | |
| 75 """ | |
| 76 from __future__ import absolute_import | |
| 77 from __future__ import division | |
| 78 from __future__ import print_function | |
| 79 | |
| 80 | |
| 81 # TODO: | |
| 82 # - Doesn't handle quads. | |
| 83 # - Add warning and/or safety mechanism before working on large graphs? | |
| 84 # - use this in existing Graph.isomorphic? | |
| 85 | |
| 86 __all__ = ['IsomorphicGraph', 'to_isomorphic', 'isomorphic', | |
| 87 'to_canonical_graph', 'graph_diff', 'similar'] | |
| 88 | |
| 89 from rdflib.graph import Graph, ConjunctiveGraph, ReadOnlyGraphAggregate | |
| 90 from rdflib.term import BNode, Node | |
| 91 from hashlib import sha256 | |
| 92 | |
| 93 from datetime import datetime | |
| 94 from collections import defaultdict | |
| 95 | |
| 96 from six import text_type | |
| 97 | |
| 98 | |
| 99 def _total_seconds(td): | |
| 100 result = td.days * 24 * 60 * 60 | |
| 101 result += td.seconds | |
| 102 result += td.microseconds / 1000000.0 | |
| 103 return result | |
| 104 | |
| 105 | |
| 106 class _runtime(object): | |
| 107 def __init__(self, label): | |
| 108 self.label = label | |
| 109 | |
| 110 def __call__(self, f): | |
| 111 if self.label is None: | |
| 112 self.label = f.__name__ + "_runtime" | |
| 113 | |
| 114 def wrapped_f(*args, **kwargs): | |
| 115 start = datetime.now() | |
| 116 result = f(*args, **kwargs) | |
| 117 if 'stats' in kwargs and kwargs['stats'] is not None: | |
| 118 stats = kwargs['stats'] | |
| 119 stats[self.label] = _total_seconds(datetime.now() - start) | |
| 120 return result | |
| 121 return wrapped_f | |
| 122 | |
| 123 | |
| 124 class _call_count(object): | |
| 125 def __init__(self, label): | |
| 126 self.label = label | |
| 127 | |
| 128 def __call__(self, f): | |
| 129 if self.label is None: | |
| 130 self.label = f.__name__ + "_runtime" | |
| 131 | |
| 132 def wrapped_f(*args, **kwargs): | |
| 133 if 'stats' in kwargs and kwargs['stats'] is not None: | |
| 134 stats = kwargs['stats'] | |
| 135 if self.label not in stats: | |
| 136 stats[self.label] = 0 | |
| 137 stats[self.label] += 1 | |
| 138 return f(*args, **kwargs) | |
| 139 return wrapped_f | |
| 140 | |
| 141 | |
| 142 class IsomorphicGraph(ConjunctiveGraph): | |
| 143 """An implementation of the RGDA1 graph digest algorithm. | |
| 144 | |
| 145 An implementation of RGDA1 (publication below), | |
| 146 a combination of Sayers & Karp's graph digest algorithm using | |
| 147 sum and SHA-256 <http://www.hpl.hp.com/techreports/2003/HPL-2003-235R1.pdf> | |
| 148 and traces <http://pallini.di.uniroma1.it>, an average case | |
| 149 polynomial time algorithm for graph canonicalization. | |
| 150 | |
| 151 McCusker, J. P. (2015). WebSig: A Digital Signature Framework for the Web. | |
| 152 Rensselaer Polytechnic Institute, Troy, NY. | |
| 153 http://gradworks.umi.com/3727015.pdf | |
| 154 """ | |
| 155 | |
| 156 def __init__(self, **kwargs): | |
| 157 super(IsomorphicGraph, self).__init__(**kwargs) | |
| 158 | |
| 159 def __eq__(self, other): | |
| 160 """Graph isomorphism testing.""" | |
| 161 if not isinstance(other, IsomorphicGraph): | |
| 162 return False | |
| 163 elif len(self) != len(other): | |
| 164 return False | |
| 165 return self.internal_hash() == other.internal_hash() | |
| 166 | |
| 167 def __ne__(self, other): | |
| 168 """Negative graph isomorphism testing.""" | |
| 169 return not self.__eq__(other) | |
| 170 | |
| 171 def __hash__(self): | |
| 172 return super(IsomorphicGraph, self).__hash__() | |
| 173 | |
| 174 def graph_digest(self, stats=None): | |
| 175 """Synonym for IsomorphicGraph.internal_hash.""" | |
| 176 return self.internal_hash(stats=stats) | |
| 177 | |
| 178 def internal_hash(self, stats=None): | |
| 179 """ | |
| 180 This is defined instead of __hash__ to avoid a circular recursion | |
| 181 scenario with the Memory store for rdflib which requires a hash lookup | |
| 182 in order to return a generator of triples. | |
| 183 """ | |
| 184 return _TripleCanonicalizer(self).to_hash(stats=stats) | |
| 185 | |
| 186 | |
| 187 class Color: | |
| 188 def __init__(self, nodes, hashfunc, color=(), hash_cache=None): | |
| 189 if hash_cache is None: | |
| 190 hash_cache = {} | |
| 191 self._hash_cache = hash_cache | |
| 192 self.color = color | |
| 193 self.nodes = nodes | |
| 194 self.hashfunc = hashfunc | |
| 195 self._hash_color = None | |
| 196 | |
| 197 def __str__(self): | |
| 198 nodes, color = self.key() | |
| 199 return "Color %s (%s nodes)" % (color, nodes) | |
| 200 | |
| 201 def key(self): | |
| 202 return (len(self.nodes), self.hash_color()) | |
| 203 | |
| 204 def hash_color(self, color=None): | |
| 205 if color is None: | |
| 206 color = self.color | |
| 207 if color in self._hash_cache: | |
| 208 return self._hash_cache[color] | |
| 209 | |
| 210 def stringify(x): | |
| 211 if isinstance(x, Node): | |
| 212 return x.n3() | |
| 213 else: | |
| 214 return text_type(x) | |
| 215 if isinstance(color, Node): | |
| 216 return stringify(color) | |
| 217 value = 0 | |
| 218 for triple in color: | |
| 219 value += self.hashfunc(' '.join([stringify(x) for x in triple])) | |
| 220 val = u"%x" % value | |
| 221 self._hash_cache[color] = val | |
| 222 return val | |
| 223 | |
| 224 def distinguish(self, W, graph): | |
| 225 colors = {} | |
| 226 for n in self.nodes: | |
| 227 new_color = list(self.color) | |
| 228 for node in W.nodes: | |
| 229 new_color += [ | |
| 230 (1, p, W.hash_color()) | |
| 231 for s, p, o in graph.triples((n, None, node))] | |
| 232 new_color += [ | |
| 233 (W.hash_color(), p, 3) | |
| 234 for s, p, o in graph.triples((node, None, n))] | |
| 235 new_color = tuple(new_color) | |
| 236 new_hash_color = self.hash_color(new_color) | |
| 237 | |
| 238 if new_hash_color not in colors: | |
| 239 c = Color( | |
| 240 [], self.hashfunc, new_color, | |
| 241 hash_cache=self._hash_cache) | |
| 242 colors[new_hash_color] = c | |
| 243 colors[new_hash_color].nodes.append(n) | |
| 244 return colors.values() | |
| 245 | |
| 246 def discrete(self): | |
| 247 return len(self.nodes) == 1 | |
| 248 | |
| 249 def copy(self): | |
| 250 return Color( | |
| 251 self.nodes[:], self.hashfunc, self.color, | |
| 252 hash_cache=self._hash_cache) | |
| 253 | |
| 254 | |
| 255 class _TripleCanonicalizer(object): | |
| 256 | |
| 257 def __init__(self, graph, hashfunc=sha256): | |
| 258 self.graph = graph | |
| 259 | |
| 260 def _hashfunc(s): | |
| 261 h = hashfunc() | |
| 262 h.update(text_type(s).encode("utf8")) | |
| 263 return int(h.hexdigest(), 16) | |
| 264 self._hash_cache = {} | |
| 265 self.hashfunc = _hashfunc | |
| 266 | |
| 267 def _discrete(self, coloring): | |
| 268 return len([c for c in coloring if not c.discrete()]) == 0 | |
| 269 | |
| 270 def _initial_color(self): | |
| 271 """Finds an initial color for the graph. | |
| 272 | |
| 273 Finds an initial color fo the graph by finding all blank nodes and | |
| 274 non-blank nodes that are adjacent. Nodes that are not adjacent to blank | |
| 275 nodes are not included, as they are a) already colored (by URI or literal) | |
| 276 and b) do not factor into the color of any blank node. | |
| 277 """ | |
| 278 bnodes = set() | |
| 279 others = set() | |
| 280 self._neighbors = defaultdict(set) | |
| 281 for s, p, o in self.graph: | |
| 282 nodes = set([s, p, o]) | |
| 283 b = set([x for x in nodes if isinstance(x, BNode)]) | |
| 284 if len(b) > 0: | |
| 285 others |= nodes - b | |
| 286 bnodes |= b | |
| 287 if isinstance(s, BNode): | |
| 288 self._neighbors[s].add(o) | |
| 289 if isinstance(o, BNode): | |
| 290 self._neighbors[o].add(s) | |
| 291 if isinstance(p, BNode): | |
| 292 self._neighbors[p].add(s) | |
| 293 self._neighbors[p].add(p) | |
| 294 if len(bnodes) > 0: | |
| 295 return [ | |
| 296 Color(list(bnodes), self.hashfunc, hash_cache=self._hash_cache) | |
| 297 ] + [ | |
| 298 Color([x], self.hashfunc, x, hash_cache=self._hash_cache) | |
| 299 for x in others | |
| 300 ] | |
| 301 else: | |
| 302 return [] | |
| 303 | |
| 304 def _individuate(self, color, individual): | |
| 305 new_color = list(color.color) | |
| 306 new_color.append((len(color.nodes),)) | |
| 307 | |
| 308 color.nodes.remove(individual) | |
| 309 c = Color([individual], self.hashfunc, tuple(new_color), | |
| 310 hash_cache=self._hash_cache) | |
| 311 return c | |
| 312 | |
| 313 def _get_candidates(self, coloring): | |
| 314 candidates = [c for c in coloring if not c.discrete()] | |
| 315 for c in [c for c in coloring if not c.discrete()]: | |
| 316 for node in c.nodes: | |
| 317 yield node, c | |
| 318 | |
| 319 def _refine(self, coloring, sequence): | |
| 320 sequence = sorted(sequence, key=lambda x: x.key(), reverse=True) | |
| 321 coloring = coloring[:] | |
| 322 while len(sequence) > 0 and not self._discrete(coloring): | |
| 323 W = sequence.pop() | |
| 324 for c in coloring[:]: | |
| 325 if len(c.nodes) > 1 or isinstance(c.nodes[0], BNode): | |
| 326 colors = sorted(c.distinguish(W, self.graph), | |
| 327 key=lambda x: x.key(), | |
| 328 reverse=True) | |
| 329 coloring.remove(c) | |
| 330 coloring.extend(colors) | |
| 331 try: | |
| 332 si = sequence.index(c) | |
| 333 sequence = sequence[:si] + colors + sequence[si+1:] | |
| 334 except ValueError: | |
| 335 sequence = colors[1:] + sequence | |
| 336 combined_colors = [] | |
| 337 combined_color_map = dict() | |
| 338 for color in coloring: | |
| 339 color_hash = color.hash_color() | |
| 340 # This is a hash collision, and be combined into a single color for individuation. | |
| 341 if color_hash in combined_color_map: | |
| 342 combined_color_map[color_hash].nodes.extend(color.nodes) | |
| 343 else: | |
| 344 combined_colors.append(color) | |
| 345 combined_color_map[color_hash] = color | |
| 346 return combined_colors | |
| 347 | |
| 348 @_runtime("to_hash_runtime") | |
| 349 def to_hash(self, stats=None): | |
| 350 result = 0 | |
| 351 for triple in self.canonical_triples(stats=stats): | |
| 352 result += self.hashfunc(' '.join([x.n3() for x in triple])) | |
| 353 if stats is not None: | |
| 354 stats['graph_digest'] = "%x" % result | |
| 355 return result | |
| 356 | |
| 357 def _experimental_path(self, coloring): | |
| 358 coloring = [c.copy() for c in coloring] | |
| 359 while not self._discrete(coloring): | |
| 360 color = [x for x in coloring if not x.discrete()][0] | |
| 361 node = color.nodes[0] | |
| 362 new_color = self._individuate(color, node) | |
| 363 coloring.append(new_color) | |
| 364 coloring = self._refine(coloring, [new_color]) | |
| 365 return coloring | |
| 366 | |
| 367 def _create_generator(self, colorings, groupings=None): | |
| 368 if not groupings: | |
| 369 groupings = defaultdict(set) | |
| 370 for group in zip(*colorings): | |
| 371 g = set([c.nodes[0] for c in group]) | |
| 372 for n in group: | |
| 373 g |= groupings[n] | |
| 374 for n in g: | |
| 375 groupings[n] = g | |
| 376 return groupings | |
| 377 | |
| 378 @_call_count("individuations") | |
| 379 def _traces(self, coloring, stats=None, depth=[0]): | |
| 380 if stats is not None and 'prunings' not in stats: | |
| 381 stats['prunings'] = 0 | |
| 382 depth[0] += 1 | |
| 383 candidates = self._get_candidates(coloring) | |
| 384 best = [] | |
| 385 best_score = None | |
| 386 best_experimental = None | |
| 387 best_experimental_score = None | |
| 388 last_coloring = None | |
| 389 generator = defaultdict(set) | |
| 390 visited = set() | |
| 391 for candidate, color in candidates: | |
| 392 if candidate in generator: | |
| 393 v = generator[candidate] & visited | |
| 394 if len(v) > 0: | |
| 395 visited.add(candidate) | |
| 396 continue | |
| 397 visited.add(candidate) | |
| 398 coloring_copy = [] | |
| 399 color_copy = None | |
| 400 for c in coloring: | |
| 401 c_copy = c.copy() | |
| 402 coloring_copy.append(c_copy) | |
| 403 if c == color: | |
| 404 color_copy = c_copy | |
| 405 new_color = self._individuate(color_copy, candidate) | |
| 406 coloring_copy.append(new_color) | |
| 407 refined_coloring = self._refine(coloring_copy, [new_color]) | |
| 408 color_score = tuple([c.key() for c in refined_coloring]) | |
| 409 experimental = self._experimental_path(coloring_copy) | |
| 410 experimental_score = set([c.key() for c in experimental]) | |
| 411 if last_coloring: | |
| 412 generator = self._create_generator( | |
| 413 [last_coloring, experimental], | |
| 414 generator) | |
| 415 last_coloring = experimental | |
| 416 if best_score is None or best_score < color_score: | |
| 417 best = [refined_coloring] | |
| 418 best_score = color_score | |
| 419 best_experimental = experimental | |
| 420 best_experimental_score = experimental_score | |
| 421 elif best_score > color_score: | |
| 422 # prune this branch. | |
| 423 if stats is not None: | |
| 424 stats['prunings'] += 1 | |
| 425 elif experimental_score != best_experimental_score: | |
| 426 best.append(refined_coloring) | |
| 427 else: | |
| 428 # prune this branch. | |
| 429 if stats is not None: | |
| 430 stats['prunings'] += 1 | |
| 431 discrete = [x for x in best if self._discrete(x)] | |
| 432 if len(discrete) == 0: | |
| 433 best_score = None | |
| 434 best_depth = None | |
| 435 for coloring in best: | |
| 436 d = [depth[0]] | |
| 437 new_color = self._traces(coloring, stats=stats, depth=d) | |
| 438 color_score = tuple([c.key() for c in refined_coloring]) | |
| 439 if best_score is None or color_score > best_score: | |
| 440 discrete = [new_color] | |
| 441 best_score = color_score | |
| 442 best_depth = d[0] | |
| 443 depth[0] = best_depth | |
| 444 return discrete[0] | |
| 445 | |
| 446 def canonical_triples(self, stats=None): | |
| 447 if stats is not None: | |
| 448 start_canonicalization = datetime.now() | |
| 449 if stats is not None: | |
| 450 start_coloring = datetime.now() | |
| 451 coloring = self._initial_color() | |
| 452 if stats is not None: | |
| 453 stats['triple_count'] = len(self.graph) | |
| 454 stats['adjacent_nodes'] = max(0, len(coloring) - 1) | |
| 455 coloring = self._refine(coloring, coloring[:]) | |
| 456 if stats is not None: | |
| 457 stats['initial_coloring_runtime'] = _total_seconds(datetime.now() - start_coloring) | |
| 458 stats['initial_color_count'] = len(coloring) | |
| 459 | |
| 460 if not self._discrete(coloring): | |
| 461 depth = [0] | |
| 462 coloring = self._traces(coloring, stats=stats, depth=depth) | |
| 463 if stats is not None: | |
| 464 stats['tree_depth'] = depth[0] | |
| 465 elif stats is not None: | |
| 466 stats['individuations'] = 0 | |
| 467 stats['tree_depth'] = 0 | |
| 468 if stats is not None: | |
| 469 stats['color_count'] = len(coloring) | |
| 470 | |
| 471 bnode_labels = dict([(c.nodes[0], c.hash_color()) for c in coloring]) | |
| 472 if stats is not None: | |
| 473 stats["canonicalize_triples_runtime"] = _total_seconds(datetime.now() - start_coloring) | |
| 474 for triple in self.graph: | |
| 475 result = tuple(self._canonicalize_bnodes(triple, bnode_labels)) | |
| 476 yield result | |
| 477 | |
| 478 def _canonicalize_bnodes(self, triple, labels): | |
| 479 for term in triple: | |
| 480 if isinstance(term, BNode): | |
| 481 yield BNode(value="cb%s" % labels[term]) | |
| 482 else: | |
| 483 yield term | |
| 484 | |
| 485 | |
| 486 def to_isomorphic(graph): | |
| 487 if isinstance(graph, IsomorphicGraph): | |
| 488 return graph | |
| 489 result = IsomorphicGraph() | |
| 490 if hasattr(graph, "identifier"): | |
| 491 result = IsomorphicGraph(identifier=graph.identifier) | |
| 492 result += graph | |
| 493 return result | |
| 494 | |
| 495 | |
| 496 def isomorphic(graph1, graph2): | |
| 497 """Compare graph for equality. | |
| 498 | |
| 499 Uses an algorithm to compute unique hashes which takes bnodes into account. | |
| 500 | |
| 501 Examples:: | |
| 502 | |
| 503 >>> g1 = Graph().parse(format='n3', data=''' | |
| 504 ... @prefix : <http://example.org/ns#> . | |
| 505 ... <http://example.org> :rel <http://example.org/a> . | |
| 506 ... <http://example.org> :rel <http://example.org/b> . | |
| 507 ... <http://example.org> :rel [ :label "A bnode." ] . | |
| 508 ... ''') | |
| 509 >>> g2 = Graph().parse(format='n3', data=''' | |
| 510 ... @prefix ns: <http://example.org/ns#> . | |
| 511 ... <http://example.org> ns:rel [ ns:label "A bnode." ] . | |
| 512 ... <http://example.org> ns:rel <http://example.org/b>, | |
| 513 ... <http://example.org/a> . | |
| 514 ... ''') | |
| 515 >>> isomorphic(g1, g2) | |
| 516 True | |
| 517 | |
| 518 >>> g3 = Graph().parse(format='n3', data=''' | |
| 519 ... @prefix : <http://example.org/ns#> . | |
| 520 ... <http://example.org> :rel <http://example.org/a> . | |
| 521 ... <http://example.org> :rel <http://example.org/b> . | |
| 522 ... <http://example.org> :rel <http://example.org/c> . | |
| 523 ... ''') | |
| 524 >>> isomorphic(g1, g3) | |
| 525 False | |
| 526 """ | |
| 527 gd1 = _TripleCanonicalizer(graph1).to_hash() | |
| 528 gd2 = _TripleCanonicalizer(graph2).to_hash() | |
| 529 return gd1 == gd2 | |
| 530 | |
| 531 | |
| 532 def to_canonical_graph(g1, stats=None): | |
| 533 """Creates a canonical, read-only graph. | |
| 534 | |
| 535 Creates a canonical, read-only graph where all bnode id:s are based on | |
| 536 deterministical SHA-256 checksums, correlated with the graph contents. | |
| 537 """ | |
| 538 graph = Graph() | |
| 539 graph += _TripleCanonicalizer(g1).canonical_triples(stats=stats) | |
| 540 return ReadOnlyGraphAggregate([graph]) | |
| 541 | |
| 542 | |
| 543 def graph_diff(g1, g2): | |
| 544 """Returns three sets of triples: "in both", "in first" and "in second".""" | |
| 545 # bnodes have deterministic values in canonical graphs: | |
| 546 cg1 = to_canonical_graph(g1) | |
| 547 cg2 = to_canonical_graph(g2) | |
| 548 in_both = cg1 * cg2 | |
| 549 in_first = cg1 - cg2 | |
| 550 in_second = cg2 - cg1 | |
| 551 return (in_both, in_first, in_second) | |
| 552 | |
| 553 | |
| 554 _MOCK_BNODE = BNode() | |
| 555 | |
| 556 | |
| 557 def similar(g1, g2): | |
| 558 """Checks if the two graphs are "similar". | |
| 559 | |
| 560 Checks if the two graphs are "similar", by comparing sorted triples where | |
| 561 all bnodes have been replaced by a singular mock bnode (the | |
| 562 ``_MOCK_BNODE``). | |
| 563 | |
| 564 This is a much cheaper, but less reliable, alternative to the comparison | |
| 565 algorithm in ``isomorphic``. | |
| 566 """ | |
| 567 return all(t1 == t2 for (t1, t2) in _squashed_graphs_triples(g1, g2)) | |
| 568 | |
| 569 | |
| 570 def _squashed_graphs_triples(g1, g2): | |
| 571 for (t1, t2) in zip(sorted(_squash_graph(g1)), sorted(_squash_graph(g2))): | |
| 572 yield t1, t2 | |
| 573 | |
| 574 | |
| 575 def _squash_graph(graph): | |
| 576 return (_squash_bnodes(triple) for triple in graph) | |
| 577 | |
| 578 | |
| 579 def _squash_bnodes(triple): | |
| 580 return tuple((isinstance(t, BNode) and _MOCK_BNODE) or t for t in triple) |
