Mercurial > repos > shellac > sam_consensus_v3
view env/lib/python3.9/site-packages/networkx/algorithms/tests/test_similarity.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line source
import pytest import networkx as nx from networkx.algorithms.similarity import ( graph_edit_distance, optimal_edit_paths, optimize_graph_edit_distance, ) from networkx.generators.classic import ( circular_ladder_graph, cycle_graph, path_graph, wheel_graph, ) def nmatch(n1, n2): return n1 == n2 def ematch(e1, e2): return e1 == e2 def getCanonical(): G = nx.Graph() G.add_node("A", label="A") G.add_node("B", label="B") G.add_node("C", label="C") G.add_node("D", label="D") G.add_edge("A", "B", label="a-b") G.add_edge("B", "C", label="b-c") G.add_edge("B", "D", label="b-d") return G class TestSimilarity: @classmethod def setup_class(cls): global numpy global scipy numpy = pytest.importorskip("numpy") scipy = pytest.importorskip("scipy") def test_graph_edit_distance_roots_and_timeout(self): G0 = nx.star_graph(5) G1 = G0.copy() pytest.raises(ValueError, graph_edit_distance, G0, G1, roots=[2]) pytest.raises(ValueError, graph_edit_distance, G0, G1, roots=[2, 3, 4]) pytest.raises(nx.NodeNotFound, graph_edit_distance, G0, G1, roots=(9, 3)) pytest.raises(nx.NodeNotFound, graph_edit_distance, G0, G1, roots=(3, 9)) pytest.raises(nx.NodeNotFound, graph_edit_distance, G0, G1, roots=(9, 9)) assert graph_edit_distance(G0, G1, roots=(1, 2)) == 0 assert graph_edit_distance(G0, G1, roots=(0, 1)) == 8 assert graph_edit_distance(G0, G1, roots=(1, 2), timeout=5) == 0 assert graph_edit_distance(G0, G1, roots=(0, 1), timeout=5) == 8 assert graph_edit_distance(G0, G1, roots=(0, 1), timeout=0.0001) is None # test raise on 0 timeout pytest.raises(nx.NetworkXError, graph_edit_distance, G0, G1, timeout=0) def test_graph_edit_distance(self): G0 = nx.Graph() G1 = path_graph(6) G2 = cycle_graph(6) G3 = wheel_graph(7) assert graph_edit_distance(G0, G0) == 0 assert graph_edit_distance(G0, G1) == 11 assert graph_edit_distance(G1, G0) == 11 assert graph_edit_distance(G0, G2) == 12 assert graph_edit_distance(G2, G0) == 12 assert graph_edit_distance(G0, G3) == 19 assert graph_edit_distance(G3, G0) == 19 assert graph_edit_distance(G1, G1) == 0 assert graph_edit_distance(G1, G2) == 1 assert graph_edit_distance(G2, G1) == 1 assert graph_edit_distance(G1, G3) == 8 assert graph_edit_distance(G3, G1) == 8 assert graph_edit_distance(G2, G2) == 0 assert graph_edit_distance(G2, G3) == 7 assert graph_edit_distance(G3, G2) == 7 assert graph_edit_distance(G3, G3) == 0 def test_graph_edit_distance_node_match(self): G1 = cycle_graph(5) G2 = cycle_graph(5) for n, attr in G1.nodes.items(): attr["color"] = "red" if n % 2 == 0 else "blue" for n, attr in G2.nodes.items(): attr["color"] = "red" if n % 2 == 1 else "blue" assert graph_edit_distance(G1, G2) == 0 assert ( graph_edit_distance( G1, G2, node_match=lambda n1, n2: n1["color"] == n2["color"] ) == 1 ) def test_graph_edit_distance_edge_match(self): G1 = path_graph(6) G2 = path_graph(6) for e, attr in G1.edges.items(): attr["color"] = "red" if min(e) % 2 == 0 else "blue" for e, attr in G2.edges.items(): attr["color"] = "red" if min(e) // 3 == 0 else "blue" assert graph_edit_distance(G1, G2) == 0 assert ( graph_edit_distance( G1, G2, edge_match=lambda e1, e2: e1["color"] == e2["color"] ) == 2 ) def test_graph_edit_distance_node_cost(self): G1 = path_graph(6) G2 = path_graph(6) for n, attr in G1.nodes.items(): attr["color"] = "red" if n % 2 == 0 else "blue" for n, attr in G2.nodes.items(): attr["color"] = "red" if n % 2 == 1 else "blue" def node_subst_cost(uattr, vattr): if uattr["color"] == vattr["color"]: return 1 else: return 10 def node_del_cost(attr): if attr["color"] == "blue": return 20 else: return 50 def node_ins_cost(attr): if attr["color"] == "blue": return 40 else: return 100 assert ( graph_edit_distance( G1, G2, node_subst_cost=node_subst_cost, node_del_cost=node_del_cost, node_ins_cost=node_ins_cost, ) == 6 ) def test_graph_edit_distance_edge_cost(self): G1 = path_graph(6) G2 = path_graph(6) for e, attr in G1.edges.items(): attr["color"] = "red" if min(e) % 2 == 0 else "blue" for e, attr in G2.edges.items(): attr["color"] = "red" if min(e) // 3 == 0 else "blue" def edge_subst_cost(gattr, hattr): if gattr["color"] == hattr["color"]: return 0.01 else: return 0.1 def edge_del_cost(attr): if attr["color"] == "blue": return 0.2 else: return 0.5 def edge_ins_cost(attr): if attr["color"] == "blue": return 0.4 else: return 1.0 assert ( graph_edit_distance( G1, G2, edge_subst_cost=edge_subst_cost, edge_del_cost=edge_del_cost, edge_ins_cost=edge_ins_cost, ) == 0.23 ) def test_graph_edit_distance_upper_bound(self): G1 = circular_ladder_graph(2) G2 = circular_ladder_graph(6) assert graph_edit_distance(G1, G2, upper_bound=5) is None assert graph_edit_distance(G1, G2, upper_bound=24) == 22 assert graph_edit_distance(G1, G2) == 22 def test_optimal_edit_paths(self): G1 = path_graph(3) G2 = cycle_graph(3) paths, cost = optimal_edit_paths(G1, G2) assert cost == 1 assert len(paths) == 6 def canonical(vertex_path, edge_path): return ( tuple(sorted(vertex_path)), tuple(sorted(edge_path, key=lambda x: (None in x, x))), ) expected_paths = [ ( [(0, 0), (1, 1), (2, 2)], [((0, 1), (0, 1)), ((1, 2), (1, 2)), (None, (0, 2))], ), ( [(0, 0), (1, 2), (2, 1)], [((0, 1), (0, 2)), ((1, 2), (1, 2)), (None, (0, 1))], ), ( [(0, 1), (1, 0), (2, 2)], [((0, 1), (0, 1)), ((1, 2), (0, 2)), (None, (1, 2))], ), ( [(0, 1), (1, 2), (2, 0)], [((0, 1), (1, 2)), ((1, 2), (0, 2)), (None, (0, 1))], ), ( [(0, 2), (1, 0), (2, 1)], [((0, 1), (0, 2)), ((1, 2), (0, 1)), (None, (1, 2))], ), ( [(0, 2), (1, 1), (2, 0)], [((0, 1), (1, 2)), ((1, 2), (0, 1)), (None, (0, 2))], ), ] assert {canonical(*p) for p in paths} == {canonical(*p) for p in expected_paths} def test_optimize_graph_edit_distance(self): G1 = circular_ladder_graph(2) G2 = circular_ladder_graph(6) bestcost = 1000 for cost in optimize_graph_edit_distance(G1, G2): assert cost < bestcost bestcost = cost assert bestcost == 22 # def test_graph_edit_distance_bigger(self): # G1 = circular_ladder_graph(12) # G2 = circular_ladder_graph(16) # assert_equal(graph_edit_distance(G1, G2), 22) def test_selfloops(self): G0 = nx.Graph() G1 = nx.Graph() G1.add_edges_from((("A", "A"), ("A", "B"))) G2 = nx.Graph() G2.add_edges_from((("A", "B"), ("B", "B"))) G3 = nx.Graph() G3.add_edges_from((("A", "A"), ("A", "B"), ("B", "B"))) assert graph_edit_distance(G0, G0) == 0 assert graph_edit_distance(G0, G1) == 4 assert graph_edit_distance(G1, G0) == 4 assert graph_edit_distance(G0, G2) == 4 assert graph_edit_distance(G2, G0) == 4 assert graph_edit_distance(G0, G3) == 5 assert graph_edit_distance(G3, G0) == 5 assert graph_edit_distance(G1, G1) == 0 assert graph_edit_distance(G1, G2) == 0 assert graph_edit_distance(G2, G1) == 0 assert graph_edit_distance(G1, G3) == 1 assert graph_edit_distance(G3, G1) == 1 assert graph_edit_distance(G2, G2) == 0 assert graph_edit_distance(G2, G3) == 1 assert graph_edit_distance(G3, G2) == 1 assert graph_edit_distance(G3, G3) == 0 def test_digraph(self): G0 = nx.DiGraph() G1 = nx.DiGraph() G1.add_edges_from((("A", "B"), ("B", "C"), ("C", "D"), ("D", "A"))) G2 = nx.DiGraph() G2.add_edges_from((("A", "B"), ("B", "C"), ("C", "D"), ("A", "D"))) G3 = nx.DiGraph() G3.add_edges_from((("A", "B"), ("A", "C"), ("B", "D"), ("C", "D"))) assert graph_edit_distance(G0, G0) == 0 assert graph_edit_distance(G0, G1) == 8 assert graph_edit_distance(G1, G0) == 8 assert graph_edit_distance(G0, G2) == 8 assert graph_edit_distance(G2, G0) == 8 assert graph_edit_distance(G0, G3) == 8 assert graph_edit_distance(G3, G0) == 8 assert graph_edit_distance(G1, G1) == 0 assert graph_edit_distance(G1, G2) == 2 assert graph_edit_distance(G2, G1) == 2 assert graph_edit_distance(G1, G3) == 4 assert graph_edit_distance(G3, G1) == 4 assert graph_edit_distance(G2, G2) == 0 assert graph_edit_distance(G2, G3) == 2 assert graph_edit_distance(G3, G2) == 2 assert graph_edit_distance(G3, G3) == 0 def test_multigraph(self): G0 = nx.MultiGraph() G1 = nx.MultiGraph() G1.add_edges_from((("A", "B"), ("B", "C"), ("A", "C"))) G2 = nx.MultiGraph() G2.add_edges_from((("A", "B"), ("B", "C"), ("B", "C"), ("A", "C"))) G3 = nx.MultiGraph() G3.add_edges_from((("A", "B"), ("B", "C"), ("A", "C"), ("A", "C"), ("A", "C"))) assert graph_edit_distance(G0, G0) == 0 assert graph_edit_distance(G0, G1) == 6 assert graph_edit_distance(G1, G0) == 6 assert graph_edit_distance(G0, G2) == 7 assert graph_edit_distance(G2, G0) == 7 assert graph_edit_distance(G0, G3) == 8 assert graph_edit_distance(G3, G0) == 8 assert graph_edit_distance(G1, G1) == 0 assert graph_edit_distance(G1, G2) == 1 assert graph_edit_distance(G2, G1) == 1 assert graph_edit_distance(G1, G3) == 2 assert graph_edit_distance(G3, G1) == 2 assert graph_edit_distance(G2, G2) == 0 assert graph_edit_distance(G2, G3) == 1 assert graph_edit_distance(G3, G2) == 1 assert graph_edit_distance(G3, G3) == 0 def test_multidigraph(self): G1 = nx.MultiDiGraph() G1.add_edges_from( ( ("hardware", "kernel"), ("kernel", "hardware"), ("kernel", "userspace"), ("userspace", "kernel"), ) ) G2 = nx.MultiDiGraph() G2.add_edges_from( ( ("winter", "spring"), ("spring", "summer"), ("summer", "autumn"), ("autumn", "winter"), ) ) assert graph_edit_distance(G1, G2) == 5 assert graph_edit_distance(G2, G1) == 5 # by https://github.com/jfbeaumont def testCopy(self): G = nx.Graph() G.add_node("A", label="A") G.add_node("B", label="B") G.add_edge("A", "B", label="a-b") assert ( graph_edit_distance(G, G.copy(), node_match=nmatch, edge_match=ematch) == 0 ) def testSame(self): G1 = nx.Graph() G1.add_node("A", label="A") G1.add_node("B", label="B") G1.add_edge("A", "B", label="a-b") G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_edge("A", "B", label="a-b") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 0 def testOneEdgeLabelDiff(self): G1 = nx.Graph() G1.add_node("A", label="A") G1.add_node("B", label="B") G1.add_edge("A", "B", label="a-b") G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_edge("A", "B", label="bad") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1 def testOneNodeLabelDiff(self): G1 = nx.Graph() G1.add_node("A", label="A") G1.add_node("B", label="B") G1.add_edge("A", "B", label="a-b") G2 = nx.Graph() G2.add_node("A", label="Z") G2.add_node("B", label="B") G2.add_edge("A", "B", label="a-b") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1 def testOneExtraNode(self): G1 = nx.Graph() G1.add_node("A", label="A") G1.add_node("B", label="B") G1.add_edge("A", "B", label="a-b") G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_edge("A", "B", label="a-b") G2.add_node("C", label="C") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1 def testOneExtraEdge(self): G1 = nx.Graph() G1.add_node("A", label="A") G1.add_node("B", label="B") G1.add_node("C", label="C") G1.add_node("C", label="C") G1.add_edge("A", "B", label="a-b") G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_node("C", label="C") G2.add_edge("A", "B", label="a-b") G2.add_edge("A", "C", label="a-c") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1 def testOneExtraNodeAndEdge(self): G1 = nx.Graph() G1.add_node("A", label="A") G1.add_node("B", label="B") G1.add_edge("A", "B", label="a-b") G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_node("C", label="C") G2.add_edge("A", "B", label="a-b") G2.add_edge("A", "C", label="a-c") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2 def testGraph1(self): G1 = getCanonical() G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_node("D", label="D") G2.add_node("E", label="E") G2.add_edge("A", "B", label="a-b") G2.add_edge("B", "D", label="b-d") G2.add_edge("D", "E", label="d-e") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 3 def testGraph2(self): G1 = getCanonical() G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_node("C", label="C") G2.add_node("D", label="D") G2.add_node("E", label="E") G2.add_edge("A", "B", label="a-b") G2.add_edge("B", "C", label="b-c") G2.add_edge("C", "D", label="c-d") G2.add_edge("C", "E", label="c-e") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 4 def testGraph3(self): G1 = getCanonical() G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_node("C", label="C") G2.add_node("D", label="D") G2.add_node("E", label="E") G2.add_node("F", label="F") G2.add_node("G", label="G") G2.add_edge("A", "C", label="a-c") G2.add_edge("A", "D", label="a-d") G2.add_edge("D", "E", label="d-e") G2.add_edge("D", "F", label="d-f") G2.add_edge("D", "G", label="d-g") G2.add_edge("E", "B", label="e-b") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 12 def testGraph4(self): G1 = getCanonical() G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_node("C", label="C") G2.add_node("D", label="D") G2.add_edge("A", "B", label="a-b") G2.add_edge("B", "C", label="b-c") G2.add_edge("C", "D", label="c-d") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2 def testGraph4_a(self): G1 = getCanonical() G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_node("C", label="C") G2.add_node("D", label="D") G2.add_edge("A", "B", label="a-b") G2.add_edge("B", "C", label="b-c") G2.add_edge("A", "D", label="a-d") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2 def testGraph4_b(self): G1 = getCanonical() G2 = nx.Graph() G2.add_node("A", label="A") G2.add_node("B", label="B") G2.add_node("C", label="C") G2.add_node("D", label="D") G2.add_edge("A", "B", label="a-b") G2.add_edge("B", "C", label="b-c") G2.add_edge("B", "D", label="bad") assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1 def test_simrank_no_source_no_target(self): G = nx.cycle_graph(5) expected = { 0: { 0: 1, 1: 0.3951219505902448, 2: 0.5707317069281646, 3: 0.5707317069281646, 4: 0.3951219505902449, }, 1: { 0: 0.3951219505902448, 1: 1, 2: 0.3951219505902449, 3: 0.5707317069281646, 4: 0.5707317069281646, }, 2: { 0: 0.5707317069281646, 1: 0.3951219505902449, 2: 1, 3: 0.3951219505902449, 4: 0.5707317069281646, }, 3: { 0: 0.5707317069281646, 1: 0.5707317069281646, 2: 0.3951219505902449, 3: 1, 4: 0.3951219505902449, }, 4: { 0: 0.3951219505902449, 1: 0.5707317069281646, 2: 0.5707317069281646, 3: 0.3951219505902449, 4: 1, }, } actual = nx.simrank_similarity(G) assert expected == actual # For a DiGraph test, use the first graph from the paper cited in # the docs: https://dl.acm.org/doi/pdf/10.1145/775047.775126 G = nx.DiGraph() G.add_node(0, label="Univ") G.add_node(1, label="ProfA") G.add_node(2, label="ProfB") G.add_node(3, label="StudentA") G.add_node(4, label="StudentB") G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 4), (4, 2), (3, 0)]) expected = { 0: {0: 1, 1: 0.0, 2: 0.1323363991265798, 3: 0.0, 4: 0.03387811817640443}, 1: {0: 0.0, 1: 1, 2: 0.4135512472705618, 3: 0.0, 4: 0.10586911930126384}, 2: { 0: 0.1323363991265798, 1: 0.4135512472705618, 2: 1, 3: 0.04234764772050554, 4: 0.08822426608438655, }, 3: {0: 0.0, 1: 0.0, 2: 0.04234764772050554, 3: 1, 4: 0.3308409978164495}, 4: { 0: 0.03387811817640443, 1: 0.10586911930126384, 2: 0.08822426608438655, 3: 0.3308409978164495, 4: 1, }, } # Use the importance_factor from the paper to get the same numbers. actual = nx.algorithms.similarity.simrank_similarity(G, importance_factor=0.8) assert expected == actual def test_simrank_source_no_target(self): G = nx.cycle_graph(5) expected = { 0: 1, 1: 0.3951219505902448, 2: 0.5707317069281646, 3: 0.5707317069281646, 4: 0.3951219505902449, } actual = nx.simrank_similarity(G, source=0) assert expected == actual # For a DiGraph test, use the first graph from the paper cited in # the docs: https://dl.acm.org/doi/pdf/10.1145/775047.775126 G = nx.DiGraph() G.add_node(0, label="Univ") G.add_node(1, label="ProfA") G.add_node(2, label="ProfB") G.add_node(3, label="StudentA") G.add_node(4, label="StudentB") G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 4), (4, 2), (3, 0)]) expected = {0: 1, 1: 0.0, 2: 0.1323363991265798, 3: 0.0, 4: 0.03387811817640443} # Use the importance_factor from the paper to get the same numbers. actual = nx.algorithms.similarity.simrank_similarity( G, importance_factor=0.8, source=0 ) assert expected == actual def test_simrank_source_and_target(self): G = nx.cycle_graph(5) expected = 1 actual = nx.simrank_similarity(G, source=0, target=0) # For a DiGraph test, use the first graph from the paper cited in # the docs: https://dl.acm.org/doi/pdf/10.1145/775047.775126 G = nx.DiGraph() G.add_node(0, label="Univ") G.add_node(1, label="ProfA") G.add_node(2, label="ProfB") G.add_node(3, label="StudentA") G.add_node(4, label="StudentB") G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 4), (4, 2), (3, 0)]) expected = 0.1323363991265798 # Use the importance_factor from the paper to get the same numbers. # Use the pair (0,2) because (0,0) and (0,1) have trivial results. actual = nx.algorithms.similarity.simrank_similarity( G, importance_factor=0.8, source=0, target=2 ) assert expected == actual def test_simrank_numpy_no_source_no_target(self): G = nx.cycle_graph(5) expected = numpy.array( [ [ 1.0, 0.3947180735764555, 0.570482097206368, 0.570482097206368, 0.3947180735764555, ], [ 0.3947180735764555, 1.0, 0.3947180735764555, 0.570482097206368, 0.570482097206368, ], [ 0.570482097206368, 0.3947180735764555, 1.0, 0.3947180735764555, 0.570482097206368, ], [ 0.570482097206368, 0.570482097206368, 0.3947180735764555, 1.0, 0.3947180735764555, ], [ 0.3947180735764555, 0.570482097206368, 0.570482097206368, 0.3947180735764555, 1.0, ], ] ) actual = nx.simrank_similarity_numpy(G) numpy.testing.assert_allclose(expected, actual, atol=1e-7) def test_simrank_numpy_source_no_target(self): G = nx.cycle_graph(5) expected = numpy.array( [ 1.0, 0.3947180735764555, 0.570482097206368, 0.570482097206368, 0.3947180735764555, ] ) actual = nx.simrank_similarity_numpy(G, source=0) numpy.testing.assert_allclose(expected, actual, atol=1e-7) def test_simrank_numpy_source_and_target(self): G = nx.cycle_graph(5) expected = 1.0 actual = nx.simrank_similarity_numpy(G, source=0, target=0) numpy.testing.assert_allclose(expected, actual, atol=1e-7)