comparison env/lib/python3.9/site-packages/networkx/algorithms/similarity.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """ Functions measuring similarity using graph edit distance.
2
3 The graph edit distance is the number of edge/node changes needed
4 to make two graphs isomorphic.
5
6 The default algorithm/implementation is sub-optimal for some graphs.
7 The problem of finding the exact Graph Edit Distance (GED) is NP-hard
8 so it is often slow. If the simple interface `graph_edit_distance`
9 takes too long for your graph, try `optimize_graph_edit_distance`
10 and/or `optimize_edit_paths`.
11
12 At the same time, I encourage capable people to investigate
13 alternative GED algorithms, in order to improve the choices available.
14 """
15 import time
16 from itertools import product
17 import networkx as nx
18
19 __all__ = [
20 "graph_edit_distance",
21 "optimal_edit_paths",
22 "optimize_graph_edit_distance",
23 "optimize_edit_paths",
24 "simrank_similarity",
25 "simrank_similarity_numpy",
26 ]
27
28
29 def debug_print(*args, **kwargs):
30 print(*args, **kwargs)
31
32
33 def graph_edit_distance(
34 G1,
35 G2,
36 node_match=None,
37 edge_match=None,
38 node_subst_cost=None,
39 node_del_cost=None,
40 node_ins_cost=None,
41 edge_subst_cost=None,
42 edge_del_cost=None,
43 edge_ins_cost=None,
44 roots=None,
45 upper_bound=None,
46 timeout=None,
47 ):
48 """Returns GED (graph edit distance) between graphs G1 and G2.
49
50 Graph edit distance is a graph similarity measure analogous to
51 Levenshtein distance for strings. It is defined as minimum cost
52 of edit path (sequence of node and edge edit operations)
53 transforming graph G1 to graph isomorphic to G2.
54
55 Parameters
56 ----------
57 G1, G2: graphs
58 The two graphs G1 and G2 must be of the same type.
59
60 node_match : callable
61 A function that returns True if node n1 in G1 and n2 in G2
62 should be considered equal during matching.
63
64 The function will be called like
65
66 node_match(G1.nodes[n1], G2.nodes[n2]).
67
68 That is, the function will receive the node attribute
69 dictionaries for n1 and n2 as inputs.
70
71 Ignored if node_subst_cost is specified. If neither
72 node_match nor node_subst_cost are specified then node
73 attributes are not considered.
74
75 edge_match : callable
76 A function that returns True if the edge attribute dictionaries
77 for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should
78 be considered equal during matching.
79
80 The function will be called like
81
82 edge_match(G1[u1][v1], G2[u2][v2]).
83
84 That is, the function will receive the edge attribute
85 dictionaries of the edges under consideration.
86
87 Ignored if edge_subst_cost is specified. If neither
88 edge_match nor edge_subst_cost are specified then edge
89 attributes are not considered.
90
91 node_subst_cost, node_del_cost, node_ins_cost : callable
92 Functions that return the costs of node substitution, node
93 deletion, and node insertion, respectively.
94
95 The functions will be called like
96
97 node_subst_cost(G1.nodes[n1], G2.nodes[n2]),
98 node_del_cost(G1.nodes[n1]),
99 node_ins_cost(G2.nodes[n2]).
100
101 That is, the functions will receive the node attribute
102 dictionaries as inputs. The functions are expected to return
103 positive numeric values.
104
105 Function node_subst_cost overrides node_match if specified.
106 If neither node_match nor node_subst_cost are specified then
107 default node substitution cost of 0 is used (node attributes
108 are not considered during matching).
109
110 If node_del_cost is not specified then default node deletion
111 cost of 1 is used. If node_ins_cost is not specified then
112 default node insertion cost of 1 is used.
113
114 edge_subst_cost, edge_del_cost, edge_ins_cost : callable
115 Functions that return the costs of edge substitution, edge
116 deletion, and edge insertion, respectively.
117
118 The functions will be called like
119
120 edge_subst_cost(G1[u1][v1], G2[u2][v2]),
121 edge_del_cost(G1[u1][v1]),
122 edge_ins_cost(G2[u2][v2]).
123
124 That is, the functions will receive the edge attribute
125 dictionaries as inputs. The functions are expected to return
126 positive numeric values.
127
128 Function edge_subst_cost overrides edge_match if specified.
129 If neither edge_match nor edge_subst_cost are specified then
130 default edge substitution cost of 0 is used (edge attributes
131 are not considered during matching).
132
133 If edge_del_cost is not specified then default edge deletion
134 cost of 1 is used. If edge_ins_cost is not specified then
135 default edge insertion cost of 1 is used.
136
137 roots : 2-tuple
138 Tuple where first element is a node in G1 and the second
139 is a node in G2.
140 These nodes are forced to be matched in the comparison to
141 allow comparison between rooted graphs.
142
143 upper_bound : numeric
144 Maximum edit distance to consider. Return None if no edit
145 distance under or equal to upper_bound exists.
146
147 timeout : numeric
148 Maximum number of seconds to execute.
149 After timeout is met, the current best GED is returned.
150
151 Examples
152 --------
153 >>> G1 = nx.cycle_graph(6)
154 >>> G2 = nx.wheel_graph(7)
155 >>> nx.graph_edit_distance(G1, G2)
156 7.0
157
158 >>> G1 = nx.star_graph(5)
159 >>> G2 = nx.star_graph(5)
160 >>> nx.graph_edit_distance(G1, G2, roots=(0, 0))
161 0.0
162 >>> nx.graph_edit_distance(G1, G2, roots=(1, 0))
163 8.0
164
165 See Also
166 --------
167 optimal_edit_paths, optimize_graph_edit_distance,
168
169 is_isomorphic (test for graph edit distance of 0)
170
171 References
172 ----------
173 .. [1] Zeina Abu-Aisheh, Romain Raveaux, Jean-Yves Ramel, Patrick
174 Martineau. An Exact Graph Edit Distance Algorithm for Solving
175 Pattern Recognition Problems. 4th International Conference on
176 Pattern Recognition Applications and Methods 2015, Jan 2015,
177 Lisbon, Portugal. 2015,
178 <10.5220/0005209202710278>. <hal-01168816>
179 https://hal.archives-ouvertes.fr/hal-01168816
180
181 """
182 bestcost = None
183 for vertex_path, edge_path, cost in optimize_edit_paths(
184 G1,
185 G2,
186 node_match,
187 edge_match,
188 node_subst_cost,
189 node_del_cost,
190 node_ins_cost,
191 edge_subst_cost,
192 edge_del_cost,
193 edge_ins_cost,
194 upper_bound,
195 True,
196 roots,
197 timeout,
198 ):
199 # assert bestcost is None or cost < bestcost
200 bestcost = cost
201 return bestcost
202
203
204 def optimal_edit_paths(
205 G1,
206 G2,
207 node_match=None,
208 edge_match=None,
209 node_subst_cost=None,
210 node_del_cost=None,
211 node_ins_cost=None,
212 edge_subst_cost=None,
213 edge_del_cost=None,
214 edge_ins_cost=None,
215 upper_bound=None,
216 ):
217 """Returns all minimum-cost edit paths transforming G1 to G2.
218
219 Graph edit path is a sequence of node and edge edit operations
220 transforming graph G1 to graph isomorphic to G2. Edit operations
221 include substitutions, deletions, and insertions.
222
223 Parameters
224 ----------
225 G1, G2: graphs
226 The two graphs G1 and G2 must be of the same type.
227
228 node_match : callable
229 A function that returns True if node n1 in G1 and n2 in G2
230 should be considered equal during matching.
231
232 The function will be called like
233
234 node_match(G1.nodes[n1], G2.nodes[n2]).
235
236 That is, the function will receive the node attribute
237 dictionaries for n1 and n2 as inputs.
238
239 Ignored if node_subst_cost is specified. If neither
240 node_match nor node_subst_cost are specified then node
241 attributes are not considered.
242
243 edge_match : callable
244 A function that returns True if the edge attribute dictionaries
245 for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should
246 be considered equal during matching.
247
248 The function will be called like
249
250 edge_match(G1[u1][v1], G2[u2][v2]).
251
252 That is, the function will receive the edge attribute
253 dictionaries of the edges under consideration.
254
255 Ignored if edge_subst_cost is specified. If neither
256 edge_match nor edge_subst_cost are specified then edge
257 attributes are not considered.
258
259 node_subst_cost, node_del_cost, node_ins_cost : callable
260 Functions that return the costs of node substitution, node
261 deletion, and node insertion, respectively.
262
263 The functions will be called like
264
265 node_subst_cost(G1.nodes[n1], G2.nodes[n2]),
266 node_del_cost(G1.nodes[n1]),
267 node_ins_cost(G2.nodes[n2]).
268
269 That is, the functions will receive the node attribute
270 dictionaries as inputs. The functions are expected to return
271 positive numeric values.
272
273 Function node_subst_cost overrides node_match if specified.
274 If neither node_match nor node_subst_cost are specified then
275 default node substitution cost of 0 is used (node attributes
276 are not considered during matching).
277
278 If node_del_cost is not specified then default node deletion
279 cost of 1 is used. If node_ins_cost is not specified then
280 default node insertion cost of 1 is used.
281
282 edge_subst_cost, edge_del_cost, edge_ins_cost : callable
283 Functions that return the costs of edge substitution, edge
284 deletion, and edge insertion, respectively.
285
286 The functions will be called like
287
288 edge_subst_cost(G1[u1][v1], G2[u2][v2]),
289 edge_del_cost(G1[u1][v1]),
290 edge_ins_cost(G2[u2][v2]).
291
292 That is, the functions will receive the edge attribute
293 dictionaries as inputs. The functions are expected to return
294 positive numeric values.
295
296 Function edge_subst_cost overrides edge_match if specified.
297 If neither edge_match nor edge_subst_cost are specified then
298 default edge substitution cost of 0 is used (edge attributes
299 are not considered during matching).
300
301 If edge_del_cost is not specified then default edge deletion
302 cost of 1 is used. If edge_ins_cost is not specified then
303 default edge insertion cost of 1 is used.
304
305 upper_bound : numeric
306 Maximum edit distance to consider.
307
308 Returns
309 -------
310 edit_paths : list of tuples (node_edit_path, edge_edit_path)
311 node_edit_path : list of tuples (u, v)
312 edge_edit_path : list of tuples ((u1, v1), (u2, v2))
313
314 cost : numeric
315 Optimal edit path cost (graph edit distance).
316
317 Examples
318 --------
319 >>> G1 = nx.cycle_graph(4)
320 >>> G2 = nx.wheel_graph(5)
321 >>> paths, cost = nx.optimal_edit_paths(G1, G2)
322 >>> len(paths)
323 40
324 >>> cost
325 5.0
326
327 See Also
328 --------
329 graph_edit_distance, optimize_edit_paths
330
331 References
332 ----------
333 .. [1] Zeina Abu-Aisheh, Romain Raveaux, Jean-Yves Ramel, Patrick
334 Martineau. An Exact Graph Edit Distance Algorithm for Solving
335 Pattern Recognition Problems. 4th International Conference on
336 Pattern Recognition Applications and Methods 2015, Jan 2015,
337 Lisbon, Portugal. 2015,
338 <10.5220/0005209202710278>. <hal-01168816>
339 https://hal.archives-ouvertes.fr/hal-01168816
340
341 """
342 paths = list()
343 bestcost = None
344 for vertex_path, edge_path, cost in optimize_edit_paths(
345 G1,
346 G2,
347 node_match,
348 edge_match,
349 node_subst_cost,
350 node_del_cost,
351 node_ins_cost,
352 edge_subst_cost,
353 edge_del_cost,
354 edge_ins_cost,
355 upper_bound,
356 False,
357 ):
358 # assert bestcost is None or cost <= bestcost
359 if bestcost is not None and cost < bestcost:
360 paths = list()
361 paths.append((vertex_path, edge_path))
362 bestcost = cost
363 return paths, bestcost
364
365
366 def optimize_graph_edit_distance(
367 G1,
368 G2,
369 node_match=None,
370 edge_match=None,
371 node_subst_cost=None,
372 node_del_cost=None,
373 node_ins_cost=None,
374 edge_subst_cost=None,
375 edge_del_cost=None,
376 edge_ins_cost=None,
377 upper_bound=None,
378 ):
379 """Returns consecutive approximations of GED (graph edit distance)
380 between graphs G1 and G2.
381
382 Graph edit distance is a graph similarity measure analogous to
383 Levenshtein distance for strings. It is defined as minimum cost
384 of edit path (sequence of node and edge edit operations)
385 transforming graph G1 to graph isomorphic to G2.
386
387 Parameters
388 ----------
389 G1, G2: graphs
390 The two graphs G1 and G2 must be of the same type.
391
392 node_match : callable
393 A function that returns True if node n1 in G1 and n2 in G2
394 should be considered equal during matching.
395
396 The function will be called like
397
398 node_match(G1.nodes[n1], G2.nodes[n2]).
399
400 That is, the function will receive the node attribute
401 dictionaries for n1 and n2 as inputs.
402
403 Ignored if node_subst_cost is specified. If neither
404 node_match nor node_subst_cost are specified then node
405 attributes are not considered.
406
407 edge_match : callable
408 A function that returns True if the edge attribute dictionaries
409 for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should
410 be considered equal during matching.
411
412 The function will be called like
413
414 edge_match(G1[u1][v1], G2[u2][v2]).
415
416 That is, the function will receive the edge attribute
417 dictionaries of the edges under consideration.
418
419 Ignored if edge_subst_cost is specified. If neither
420 edge_match nor edge_subst_cost are specified then edge
421 attributes are not considered.
422
423 node_subst_cost, node_del_cost, node_ins_cost : callable
424 Functions that return the costs of node substitution, node
425 deletion, and node insertion, respectively.
426
427 The functions will be called like
428
429 node_subst_cost(G1.nodes[n1], G2.nodes[n2]),
430 node_del_cost(G1.nodes[n1]),
431 node_ins_cost(G2.nodes[n2]).
432
433 That is, the functions will receive the node attribute
434 dictionaries as inputs. The functions are expected to return
435 positive numeric values.
436
437 Function node_subst_cost overrides node_match if specified.
438 If neither node_match nor node_subst_cost are specified then
439 default node substitution cost of 0 is used (node attributes
440 are not considered during matching).
441
442 If node_del_cost is not specified then default node deletion
443 cost of 1 is used. If node_ins_cost is not specified then
444 default node insertion cost of 1 is used.
445
446 edge_subst_cost, edge_del_cost, edge_ins_cost : callable
447 Functions that return the costs of edge substitution, edge
448 deletion, and edge insertion, respectively.
449
450 The functions will be called like
451
452 edge_subst_cost(G1[u1][v1], G2[u2][v2]),
453 edge_del_cost(G1[u1][v1]),
454 edge_ins_cost(G2[u2][v2]).
455
456 That is, the functions will receive the edge attribute
457 dictionaries as inputs. The functions are expected to return
458 positive numeric values.
459
460 Function edge_subst_cost overrides edge_match if specified.
461 If neither edge_match nor edge_subst_cost are specified then
462 default edge substitution cost of 0 is used (edge attributes
463 are not considered during matching).
464
465 If edge_del_cost is not specified then default edge deletion
466 cost of 1 is used. If edge_ins_cost is not specified then
467 default edge insertion cost of 1 is used.
468
469 upper_bound : numeric
470 Maximum edit distance to consider.
471
472 Returns
473 -------
474 Generator of consecutive approximations of graph edit distance.
475
476 Examples
477 --------
478 >>> G1 = nx.cycle_graph(6)
479 >>> G2 = nx.wheel_graph(7)
480 >>> for v in nx.optimize_graph_edit_distance(G1, G2):
481 ... minv = v
482 >>> minv
483 7.0
484
485 See Also
486 --------
487 graph_edit_distance, optimize_edit_paths
488
489 References
490 ----------
491 .. [1] Zeina Abu-Aisheh, Romain Raveaux, Jean-Yves Ramel, Patrick
492 Martineau. An Exact Graph Edit Distance Algorithm for Solving
493 Pattern Recognition Problems. 4th International Conference on
494 Pattern Recognition Applications and Methods 2015, Jan 2015,
495 Lisbon, Portugal. 2015,
496 <10.5220/0005209202710278>. <hal-01168816>
497 https://hal.archives-ouvertes.fr/hal-01168816
498 """
499 for vertex_path, edge_path, cost in optimize_edit_paths(
500 G1,
501 G2,
502 node_match,
503 edge_match,
504 node_subst_cost,
505 node_del_cost,
506 node_ins_cost,
507 edge_subst_cost,
508 edge_del_cost,
509 edge_ins_cost,
510 upper_bound,
511 True,
512 ):
513 yield cost
514
515
516 def optimize_edit_paths(
517 G1,
518 G2,
519 node_match=None,
520 edge_match=None,
521 node_subst_cost=None,
522 node_del_cost=None,
523 node_ins_cost=None,
524 edge_subst_cost=None,
525 edge_del_cost=None,
526 edge_ins_cost=None,
527 upper_bound=None,
528 strictly_decreasing=True,
529 roots=None,
530 timeout=None,
531 ):
532 """GED (graph edit distance) calculation: advanced interface.
533
534 Graph edit path is a sequence of node and edge edit operations
535 transforming graph G1 to graph isomorphic to G2. Edit operations
536 include substitutions, deletions, and insertions.
537
538 Graph edit distance is defined as minimum cost of edit path.
539
540 Parameters
541 ----------
542 G1, G2: graphs
543 The two graphs G1 and G2 must be of the same type.
544
545 node_match : callable
546 A function that returns True if node n1 in G1 and n2 in G2
547 should be considered equal during matching.
548
549 The function will be called like
550
551 node_match(G1.nodes[n1], G2.nodes[n2]).
552
553 That is, the function will receive the node attribute
554 dictionaries for n1 and n2 as inputs.
555
556 Ignored if node_subst_cost is specified. If neither
557 node_match nor node_subst_cost are specified then node
558 attributes are not considered.
559
560 edge_match : callable
561 A function that returns True if the edge attribute dictionaries
562 for the pair of nodes (u1, v1) in G1 and (u2, v2) in G2 should
563 be considered equal during matching.
564
565 The function will be called like
566
567 edge_match(G1[u1][v1], G2[u2][v2]).
568
569 That is, the function will receive the edge attribute
570 dictionaries of the edges under consideration.
571
572 Ignored if edge_subst_cost is specified. If neither
573 edge_match nor edge_subst_cost are specified then edge
574 attributes are not considered.
575
576 node_subst_cost, node_del_cost, node_ins_cost : callable
577 Functions that return the costs of node substitution, node
578 deletion, and node insertion, respectively.
579
580 The functions will be called like
581
582 node_subst_cost(G1.nodes[n1], G2.nodes[n2]),
583 node_del_cost(G1.nodes[n1]),
584 node_ins_cost(G2.nodes[n2]).
585
586 That is, the functions will receive the node attribute
587 dictionaries as inputs. The functions are expected to return
588 positive numeric values.
589
590 Function node_subst_cost overrides node_match if specified.
591 If neither node_match nor node_subst_cost are specified then
592 default node substitution cost of 0 is used (node attributes
593 are not considered during matching).
594
595 If node_del_cost is not specified then default node deletion
596 cost of 1 is used. If node_ins_cost is not specified then
597 default node insertion cost of 1 is used.
598
599 edge_subst_cost, edge_del_cost, edge_ins_cost : callable
600 Functions that return the costs of edge substitution, edge
601 deletion, and edge insertion, respectively.
602
603 The functions will be called like
604
605 edge_subst_cost(G1[u1][v1], G2[u2][v2]),
606 edge_del_cost(G1[u1][v1]),
607 edge_ins_cost(G2[u2][v2]).
608
609 That is, the functions will receive the edge attribute
610 dictionaries as inputs. The functions are expected to return
611 positive numeric values.
612
613 Function edge_subst_cost overrides edge_match if specified.
614 If neither edge_match nor edge_subst_cost are specified then
615 default edge substitution cost of 0 is used (edge attributes
616 are not considered during matching).
617
618 If edge_del_cost is not specified then default edge deletion
619 cost of 1 is used. If edge_ins_cost is not specified then
620 default edge insertion cost of 1 is used.
621
622 upper_bound : numeric
623 Maximum edit distance to consider.
624
625 strictly_decreasing : bool
626 If True, return consecutive approximations of strictly
627 decreasing cost. Otherwise, return all edit paths of cost
628 less than or equal to the previous minimum cost.
629
630 roots : 2-tuple
631 Tuple where first element is a node in G1 and the second
632 is a node in G2.
633 These nodes are forced to be matched in the comparison to
634 allow comparison between rooted graphs.
635
636 timeout : numeric
637 Maximum number of seconds to execute.
638 After timeout is met, the current best GED is returned.
639
640 Returns
641 -------
642 Generator of tuples (node_edit_path, edge_edit_path, cost)
643 node_edit_path : list of tuples (u, v)
644 edge_edit_path : list of tuples ((u1, v1), (u2, v2))
645 cost : numeric
646
647 See Also
648 --------
649 graph_edit_distance, optimize_graph_edit_distance, optimal_edit_paths
650
651 References
652 ----------
653 .. [1] Zeina Abu-Aisheh, Romain Raveaux, Jean-Yves Ramel, Patrick
654 Martineau. An Exact Graph Edit Distance Algorithm for Solving
655 Pattern Recognition Problems. 4th International Conference on
656 Pattern Recognition Applications and Methods 2015, Jan 2015,
657 Lisbon, Portugal. 2015,
658 <10.5220/0005209202710278>. <hal-01168816>
659 https://hal.archives-ouvertes.fr/hal-01168816
660
661 """
662 # TODO: support DiGraph
663
664 import numpy as np
665 from scipy.optimize import linear_sum_assignment
666
667 class CostMatrix:
668 def __init__(self, C, lsa_row_ind, lsa_col_ind, ls):
669 # assert C.shape[0] == len(lsa_row_ind)
670 # assert C.shape[1] == len(lsa_col_ind)
671 # assert len(lsa_row_ind) == len(lsa_col_ind)
672 # assert set(lsa_row_ind) == set(range(len(lsa_row_ind)))
673 # assert set(lsa_col_ind) == set(range(len(lsa_col_ind)))
674 # assert ls == C[lsa_row_ind, lsa_col_ind].sum()
675 self.C = C
676 self.lsa_row_ind = lsa_row_ind
677 self.lsa_col_ind = lsa_col_ind
678 self.ls = ls
679
680 def make_CostMatrix(C, m, n):
681 # assert(C.shape == (m + n, m + n))
682 lsa_row_ind, lsa_col_ind = linear_sum_assignment(C)
683
684 # Fixup dummy assignments:
685 # each substitution i<->j should have dummy assignment m+j<->n+i
686 # NOTE: fast reduce of Cv relies on it
687 # assert len(lsa_row_ind) == len(lsa_col_ind)
688 indexes = zip(range(len(lsa_row_ind)), lsa_row_ind, lsa_col_ind)
689 subst_ind = list(k for k, i, j in indexes if i < m and j < n)
690 indexes = zip(range(len(lsa_row_ind)), lsa_row_ind, lsa_col_ind)
691 dummy_ind = list(k for k, i, j in indexes if i >= m and j >= n)
692 # assert len(subst_ind) == len(dummy_ind)
693 lsa_row_ind[dummy_ind] = lsa_col_ind[subst_ind] + m
694 lsa_col_ind[dummy_ind] = lsa_row_ind[subst_ind] + n
695
696 return CostMatrix(
697 C, lsa_row_ind, lsa_col_ind, C[lsa_row_ind, lsa_col_ind].sum()
698 )
699
700 def extract_C(C, i, j, m, n):
701 # assert(C.shape == (m + n, m + n))
702 row_ind = [k in i or k - m in j for k in range(m + n)]
703 col_ind = [k in j or k - n in i for k in range(m + n)]
704 return C[row_ind, :][:, col_ind]
705
706 def reduce_C(C, i, j, m, n):
707 # assert(C.shape == (m + n, m + n))
708 row_ind = [k not in i and k - m not in j for k in range(m + n)]
709 col_ind = [k not in j and k - n not in i for k in range(m + n)]
710 return C[row_ind, :][:, col_ind]
711
712 def reduce_ind(ind, i):
713 # assert set(ind) == set(range(len(ind)))
714 rind = ind[[k not in i for k in ind]]
715 for k in set(i):
716 rind[rind >= k] -= 1
717 return rind
718
719 def match_edges(u, v, pending_g, pending_h, Ce, matched_uv=[]):
720 """
721 Parameters:
722 u, v: matched vertices, u=None or v=None for
723 deletion/insertion
724 pending_g, pending_h: lists of edges not yet mapped
725 Ce: CostMatrix of pending edge mappings
726 matched_uv: partial vertex edit path
727 list of tuples (u, v) of previously matched vertex
728 mappings u<->v, u=None or v=None for
729 deletion/insertion
730
731 Returns:
732 list of (i, j): indices of edge mappings g<->h
733 localCe: local CostMatrix of edge mappings
734 (basically submatrix of Ce at cross of rows i, cols j)
735 """
736 M = len(pending_g)
737 N = len(pending_h)
738 # assert Ce.C.shape == (M + N, M + N)
739
740 g_ind = [
741 i
742 for i in range(M)
743 if pending_g[i][:2] == (u, u)
744 or any(pending_g[i][:2] in ((p, u), (u, p)) for p, q in matched_uv)
745 ]
746 h_ind = [
747 j
748 for j in range(N)
749 if pending_h[j][:2] == (v, v)
750 or any(pending_h[j][:2] in ((q, v), (v, q)) for p, q in matched_uv)
751 ]
752 m = len(g_ind)
753 n = len(h_ind)
754
755 if m or n:
756 C = extract_C(Ce.C, g_ind, h_ind, M, N)
757 # assert C.shape == (m + n, m + n)
758
759 # Forbid structurally invalid matches
760 # NOTE: inf remembered from Ce construction
761 for k, i in zip(range(m), g_ind):
762 g = pending_g[i][:2]
763 for l, j in zip(range(n), h_ind):
764 h = pending_h[j][:2]
765 if nx.is_directed(G1) or nx.is_directed(G2):
766 if any(
767 g == (p, u) and h == (q, v) or g == (u, p) and h == (v, q)
768 for p, q in matched_uv
769 ):
770 continue
771 else:
772 if any(
773 g in ((p, u), (u, p)) and h in ((q, v), (v, q))
774 for p, q in matched_uv
775 ):
776 continue
777 if g == (u, u):
778 continue
779 if h == (v, v):
780 continue
781 C[k, l] = inf
782
783 localCe = make_CostMatrix(C, m, n)
784 ij = list(
785 (
786 g_ind[k] if k < m else M + h_ind[l],
787 h_ind[l] if l < n else N + g_ind[k],
788 )
789 for k, l in zip(localCe.lsa_row_ind, localCe.lsa_col_ind)
790 if k < m or l < n
791 )
792
793 else:
794 ij = []
795 localCe = CostMatrix(np.empty((0, 0)), [], [], 0)
796
797 return ij, localCe
798
799 def reduce_Ce(Ce, ij, m, n):
800 if len(ij):
801 i, j = zip(*ij)
802 m_i = m - sum(1 for t in i if t < m)
803 n_j = n - sum(1 for t in j if t < n)
804 return make_CostMatrix(reduce_C(Ce.C, i, j, m, n), m_i, n_j)
805 else:
806 return Ce
807
808 def get_edit_ops(
809 matched_uv, pending_u, pending_v, Cv, pending_g, pending_h, Ce, matched_cost
810 ):
811 """
812 Parameters:
813 matched_uv: partial vertex edit path
814 list of tuples (u, v) of vertex mappings u<->v,
815 u=None or v=None for deletion/insertion
816 pending_u, pending_v: lists of vertices not yet mapped
817 Cv: CostMatrix of pending vertex mappings
818 pending_g, pending_h: lists of edges not yet mapped
819 Ce: CostMatrix of pending edge mappings
820 matched_cost: cost of partial edit path
821
822 Returns:
823 sequence of
824 (i, j): indices of vertex mapping u<->v
825 Cv_ij: reduced CostMatrix of pending vertex mappings
826 (basically Cv with row i, col j removed)
827 list of (x, y): indices of edge mappings g<->h
828 Ce_xy: reduced CostMatrix of pending edge mappings
829 (basically Ce with rows x, cols y removed)
830 cost: total cost of edit operation
831 NOTE: most promising ops first
832 """
833 m = len(pending_u)
834 n = len(pending_v)
835 # assert Cv.C.shape == (m + n, m + n)
836
837 # 1) a vertex mapping from optimal linear sum assignment
838 i, j = min(
839 (k, l) for k, l in zip(Cv.lsa_row_ind, Cv.lsa_col_ind) if k < m or l < n
840 )
841 xy, localCe = match_edges(
842 pending_u[i] if i < m else None,
843 pending_v[j] if j < n else None,
844 pending_g,
845 pending_h,
846 Ce,
847 matched_uv,
848 )
849 Ce_xy = reduce_Ce(Ce, xy, len(pending_g), len(pending_h))
850 # assert Ce.ls <= localCe.ls + Ce_xy.ls
851 if prune(matched_cost + Cv.ls + localCe.ls + Ce_xy.ls):
852 pass
853 else:
854 # get reduced Cv efficiently
855 Cv_ij = CostMatrix(
856 reduce_C(Cv.C, (i,), (j,), m, n),
857 reduce_ind(Cv.lsa_row_ind, (i, m + j)),
858 reduce_ind(Cv.lsa_col_ind, (j, n + i)),
859 Cv.ls - Cv.C[i, j],
860 )
861 yield (i, j), Cv_ij, xy, Ce_xy, Cv.C[i, j] + localCe.ls
862
863 # 2) other candidates, sorted by lower-bound cost estimate
864 other = list()
865 fixed_i, fixed_j = i, j
866 if m <= n:
867 candidates = (
868 (t, fixed_j)
869 for t in range(m + n)
870 if t != fixed_i and (t < m or t == m + fixed_j)
871 )
872 else:
873 candidates = (
874 (fixed_i, t)
875 for t in range(m + n)
876 if t != fixed_j and (t < n or t == n + fixed_i)
877 )
878 for i, j in candidates:
879 if prune(matched_cost + Cv.C[i, j] + Ce.ls):
880 continue
881 Cv_ij = make_CostMatrix(
882 reduce_C(Cv.C, (i,), (j,), m, n),
883 m - 1 if i < m else m,
884 n - 1 if j < n else n,
885 )
886 # assert Cv.ls <= Cv.C[i, j] + Cv_ij.ls
887 if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + Ce.ls):
888 continue
889 xy, localCe = match_edges(
890 pending_u[i] if i < m else None,
891 pending_v[j] if j < n else None,
892 pending_g,
893 pending_h,
894 Ce,
895 matched_uv,
896 )
897 if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + localCe.ls):
898 continue
899 Ce_xy = reduce_Ce(Ce, xy, len(pending_g), len(pending_h))
900 # assert Ce.ls <= localCe.ls + Ce_xy.ls
901 if prune(matched_cost + Cv.C[i, j] + Cv_ij.ls + localCe.ls + Ce_xy.ls):
902 continue
903 other.append(((i, j), Cv_ij, xy, Ce_xy, Cv.C[i, j] + localCe.ls))
904
905 yield from sorted(other, key=lambda t: t[4] + t[1].ls + t[3].ls)
906
907 def get_edit_paths(
908 matched_uv,
909 pending_u,
910 pending_v,
911 Cv,
912 matched_gh,
913 pending_g,
914 pending_h,
915 Ce,
916 matched_cost,
917 ):
918 """
919 Parameters:
920 matched_uv: partial vertex edit path
921 list of tuples (u, v) of vertex mappings u<->v,
922 u=None or v=None for deletion/insertion
923 pending_u, pending_v: lists of vertices not yet mapped
924 Cv: CostMatrix of pending vertex mappings
925 matched_gh: partial edge edit path
926 list of tuples (g, h) of edge mappings g<->h,
927 g=None or h=None for deletion/insertion
928 pending_g, pending_h: lists of edges not yet mapped
929 Ce: CostMatrix of pending edge mappings
930 matched_cost: cost of partial edit path
931
932 Returns:
933 sequence of (vertex_path, edge_path, cost)
934 vertex_path: complete vertex edit path
935 list of tuples (u, v) of vertex mappings u<->v,
936 u=None or v=None for deletion/insertion
937 edge_path: complete edge edit path
938 list of tuples (g, h) of edge mappings g<->h,
939 g=None or h=None for deletion/insertion
940 cost: total cost of edit path
941 NOTE: path costs are non-increasing
942 """
943 # debug_print('matched-uv:', matched_uv)
944 # debug_print('matched-gh:', matched_gh)
945 # debug_print('matched-cost:', matched_cost)
946 # debug_print('pending-u:', pending_u)
947 # debug_print('pending-v:', pending_v)
948 # debug_print(Cv.C)
949 # assert list(sorted(G1.nodes)) == list(sorted(list(u for u, v in matched_uv if u is not None) + pending_u))
950 # assert list(sorted(G2.nodes)) == list(sorted(list(v for u, v in matched_uv if v is not None) + pending_v))
951 # debug_print('pending-g:', pending_g)
952 # debug_print('pending-h:', pending_h)
953 # debug_print(Ce.C)
954 # assert list(sorted(G1.edges)) == list(sorted(list(g for g, h in matched_gh if g is not None) + pending_g))
955 # assert list(sorted(G2.edges)) == list(sorted(list(h for g, h in matched_gh if h is not None) + pending_h))
956 # debug_print()
957
958 if prune(matched_cost + Cv.ls + Ce.ls):
959 return
960
961 if not max(len(pending_u), len(pending_v)):
962 # assert not len(pending_g)
963 # assert not len(pending_h)
964 # path completed!
965 # assert matched_cost <= maxcost.value
966 maxcost.value = min(maxcost.value, matched_cost)
967 yield matched_uv, matched_gh, matched_cost
968
969 else:
970 edit_ops = get_edit_ops(
971 matched_uv,
972 pending_u,
973 pending_v,
974 Cv,
975 pending_g,
976 pending_h,
977 Ce,
978 matched_cost,
979 )
980 for ij, Cv_ij, xy, Ce_xy, edit_cost in edit_ops:
981 i, j = ij
982 # assert Cv.C[i, j] + sum(Ce.C[t] for t in xy) == edit_cost
983 if prune(matched_cost + edit_cost + Cv_ij.ls + Ce_xy.ls):
984 continue
985
986 # dive deeper
987 u = pending_u.pop(i) if i < len(pending_u) else None
988 v = pending_v.pop(j) if j < len(pending_v) else None
989 matched_uv.append((u, v))
990 for x, y in xy:
991 len_g = len(pending_g)
992 len_h = len(pending_h)
993 matched_gh.append(
994 (
995 pending_g[x] if x < len_g else None,
996 pending_h[y] if y < len_h else None,
997 )
998 )
999 sortedx = list(sorted(x for x, y in xy))
1000 sortedy = list(sorted(y for x, y in xy))
1001 G = list(
1002 (pending_g.pop(x) if x < len(pending_g) else None)
1003 for x in reversed(sortedx)
1004 )
1005 H = list(
1006 (pending_h.pop(y) if y < len(pending_h) else None)
1007 for y in reversed(sortedy)
1008 )
1009
1010 yield from get_edit_paths(
1011 matched_uv,
1012 pending_u,
1013 pending_v,
1014 Cv_ij,
1015 matched_gh,
1016 pending_g,
1017 pending_h,
1018 Ce_xy,
1019 matched_cost + edit_cost,
1020 )
1021
1022 # backtrack
1023 if u is not None:
1024 pending_u.insert(i, u)
1025 if v is not None:
1026 pending_v.insert(j, v)
1027 matched_uv.pop()
1028 for x, g in zip(sortedx, reversed(G)):
1029 if g is not None:
1030 pending_g.insert(x, g)
1031 for y, h in zip(sortedy, reversed(H)):
1032 if h is not None:
1033 pending_h.insert(y, h)
1034 for t in xy:
1035 matched_gh.pop()
1036
1037 # Initialization
1038
1039 pending_u = list(G1.nodes)
1040 pending_v = list(G2.nodes)
1041
1042 initial_cost = 0
1043 if roots:
1044 root_u, root_v = roots
1045 if root_u not in pending_u or root_v not in pending_v:
1046 raise nx.NodeNotFound("Root node not in graph.")
1047
1048 # remove roots from pending
1049 pending_u.remove(root_u)
1050 pending_v.remove(root_v)
1051
1052 # cost matrix of vertex mappings
1053 m = len(pending_u)
1054 n = len(pending_v)
1055 C = np.zeros((m + n, m + n))
1056 if node_subst_cost:
1057 C[0:m, 0:n] = np.array(
1058 [
1059 node_subst_cost(G1.nodes[u], G2.nodes[v])
1060 for u in pending_u
1061 for v in pending_v
1062 ]
1063 ).reshape(m, n)
1064 if roots:
1065 initial_cost = node_subst_cost(G1.nodes[root_u], G2.nodes[root_v])
1066 elif node_match:
1067 C[0:m, 0:n] = np.array(
1068 [
1069 1 - int(node_match(G1.nodes[u], G2.nodes[v]))
1070 for u in pending_u
1071 for v in pending_v
1072 ]
1073 ).reshape(m, n)
1074 if roots:
1075 initial_cost = 1 - node_match(G1.nodes[root_u], G2.nodes[root_v])
1076 else:
1077 # all zeroes
1078 pass
1079 # assert not min(m, n) or C[0:m, 0:n].min() >= 0
1080 if node_del_cost:
1081 del_costs = [node_del_cost(G1.nodes[u]) for u in pending_u]
1082 else:
1083 del_costs = [1] * len(pending_u)
1084 # assert not m or min(del_costs) >= 0
1085 if node_ins_cost:
1086 ins_costs = [node_ins_cost(G2.nodes[v]) for v in pending_v]
1087 else:
1088 ins_costs = [1] * len(pending_v)
1089 # assert not n or min(ins_costs) >= 0
1090 inf = C[0:m, 0:n].sum() + sum(del_costs) + sum(ins_costs) + 1
1091 C[0:m, n : n + m] = np.array(
1092 [del_costs[i] if i == j else inf for i in range(m) for j in range(m)]
1093 ).reshape(m, m)
1094 C[m : m + n, 0:n] = np.array(
1095 [ins_costs[i] if i == j else inf for i in range(n) for j in range(n)]
1096 ).reshape(n, n)
1097 Cv = make_CostMatrix(C, m, n)
1098 # debug_print(f"Cv: {m} x {n}")
1099 # debug_print(Cv.C)
1100
1101 pending_g = list(G1.edges)
1102 pending_h = list(G2.edges)
1103
1104 # cost matrix of edge mappings
1105 m = len(pending_g)
1106 n = len(pending_h)
1107 C = np.zeros((m + n, m + n))
1108 if edge_subst_cost:
1109 C[0:m, 0:n] = np.array(
1110 [
1111 edge_subst_cost(G1.edges[g], G2.edges[h])
1112 for g in pending_g
1113 for h in pending_h
1114 ]
1115 ).reshape(m, n)
1116 elif edge_match:
1117 C[0:m, 0:n] = np.array(
1118 [
1119 1 - int(edge_match(G1.edges[g], G2.edges[h]))
1120 for g in pending_g
1121 for h in pending_h
1122 ]
1123 ).reshape(m, n)
1124 else:
1125 # all zeroes
1126 pass
1127 # assert not min(m, n) or C[0:m, 0:n].min() >= 0
1128 if edge_del_cost:
1129 del_costs = [edge_del_cost(G1.edges[g]) for g in pending_g]
1130 else:
1131 del_costs = [1] * len(pending_g)
1132 # assert not m or min(del_costs) >= 0
1133 if edge_ins_cost:
1134 ins_costs = [edge_ins_cost(G2.edges[h]) for h in pending_h]
1135 else:
1136 ins_costs = [1] * len(pending_h)
1137 # assert not n or min(ins_costs) >= 0
1138 inf = C[0:m, 0:n].sum() + sum(del_costs) + sum(ins_costs) + 1
1139 C[0:m, n : n + m] = np.array(
1140 [del_costs[i] if i == j else inf for i in range(m) for j in range(m)]
1141 ).reshape(m, m)
1142 C[m : m + n, 0:n] = np.array(
1143 [ins_costs[i] if i == j else inf for i in range(n) for j in range(n)]
1144 ).reshape(n, n)
1145 Ce = make_CostMatrix(C, m, n)
1146 # debug_print(f'Ce: {m} x {n}')
1147 # debug_print(Ce.C)
1148 # debug_print()
1149
1150 class MaxCost:
1151 def __init__(self):
1152 # initial upper-bound estimate
1153 # NOTE: should work for empty graph
1154 self.value = Cv.C.sum() + Ce.C.sum() + 1
1155
1156 maxcost = MaxCost()
1157
1158 if timeout is not None:
1159 if timeout <= 0:
1160 raise nx.NetworkXError("Timeout value must be greater than 0")
1161 start = time.perf_counter()
1162
1163 def prune(cost):
1164 if timeout is not None:
1165 if time.perf_counter() - start > timeout:
1166 return True
1167 if upper_bound is not None:
1168 if cost > upper_bound:
1169 return True
1170 if cost > maxcost.value:
1171 return True
1172 elif strictly_decreasing and cost >= maxcost.value:
1173 return True
1174
1175 # Now go!
1176
1177 done_uv = [] if roots is None else [roots]
1178
1179 for vertex_path, edge_path, cost in get_edit_paths(
1180 done_uv, pending_u, pending_v, Cv, [], pending_g, pending_h, Ce, initial_cost
1181 ):
1182 # assert sorted(G1.nodes) == sorted(u for u, v in vertex_path if u is not None)
1183 # assert sorted(G2.nodes) == sorted(v for u, v in vertex_path if v is not None)
1184 # assert sorted(G1.edges) == sorted(g for g, h in edge_path if g is not None)
1185 # assert sorted(G2.edges) == sorted(h for g, h in edge_path if h is not None)
1186 # print(vertex_path, edge_path, cost, file = sys.stderr)
1187 # assert cost == maxcost.value
1188 yield list(vertex_path), list(edge_path), cost
1189
1190
1191 def _is_close(d1, d2, atolerance=0, rtolerance=0):
1192 """Determines whether two adjacency matrices are within
1193 a provided tolerance.
1194
1195 Parameters
1196 ----------
1197 d1 : dict
1198 Adjacency dictionary
1199
1200 d2 : dict
1201 Adjacency dictionary
1202
1203 atolerance : float
1204 Some scalar tolerance value to determine closeness
1205
1206 rtolerance : float
1207 A scalar tolerance value that will be some proportion
1208 of ``d2``'s value
1209
1210 Returns
1211 -------
1212 closeness : bool
1213 If all of the nodes within ``d1`` and ``d2`` are within
1214 a predefined tolerance, they are considered "close" and
1215 this method will return True. Otherwise, this method will
1216 return False.
1217
1218 """
1219 # Pre-condition: d1 and d2 have the same keys at each level if they
1220 # are dictionaries.
1221 if not isinstance(d1, dict) and not isinstance(d2, dict):
1222 return abs(d1 - d2) <= atolerance + rtolerance * abs(d2)
1223 return all(all(_is_close(d1[u][v], d2[u][v]) for v in d1[u]) for u in d1)
1224
1225
1226 def simrank_similarity(
1227 G,
1228 source=None,
1229 target=None,
1230 importance_factor=0.9,
1231 max_iterations=100,
1232 tolerance=1e-4,
1233 ):
1234 """Returns the SimRank similarity of nodes in the graph ``G``.
1235
1236 SimRank is a similarity metric that says "two objects are considered
1237 to be similar if they are referenced by similar objects." [1]_.
1238
1239 The pseudo-code definition from the paper is::
1240
1241 def simrank(G, u, v):
1242 in_neighbors_u = G.predecessors(u)
1243 in_neighbors_v = G.predecessors(v)
1244 scale = C / (len(in_neighbors_u) * len(in_neighbors_v))
1245 return scale * sum(simrank(G, w, x)
1246 for w, x in product(in_neighbors_u,
1247 in_neighbors_v))
1248
1249 where ``G`` is the graph, ``u`` is the source, ``v`` is the target,
1250 and ``C`` is a float decay or importance factor between 0 and 1.
1251
1252 The SimRank algorithm for determining node similarity is defined in
1253 [2]_.
1254
1255 Parameters
1256 ----------
1257 G : NetworkX graph
1258 A NetworkX graph
1259
1260 source : node
1261 If this is specified, the returned dictionary maps each node
1262 ``v`` in the graph to the similarity between ``source`` and
1263 ``v``.
1264
1265 target : node
1266 If both ``source`` and ``target`` are specified, the similarity
1267 value between ``source`` and ``target`` is returned. If
1268 ``target`` is specified but ``source`` is not, this argument is
1269 ignored.
1270
1271 importance_factor : float
1272 The relative importance of indirect neighbors with respect to
1273 direct neighbors.
1274
1275 max_iterations : integer
1276 Maximum number of iterations.
1277
1278 tolerance : float
1279 Error tolerance used to check convergence. When an iteration of
1280 the algorithm finds that no similarity value changes more than
1281 this amount, the algorithm halts.
1282
1283 Returns
1284 -------
1285 similarity : dictionary or float
1286 If ``source`` and ``target`` are both ``None``, this returns a
1287 dictionary of dictionaries, where keys are node pairs and value
1288 are similarity of the pair of nodes.
1289
1290 If ``source`` is not ``None`` but ``target`` is, this returns a
1291 dictionary mapping node to the similarity of ``source`` and that
1292 node.
1293
1294 If neither ``source`` nor ``target`` is ``None``, this returns
1295 the similarity value for the given pair of nodes.
1296
1297 Examples
1298 --------
1299 If the nodes of the graph are numbered from zero to *n - 1*, where *n*
1300 is the number of nodes in the graph, you can create a SimRank matrix
1301 from the return value of this function where the node numbers are
1302 the row and column indices of the matrix::
1303
1304 >>> from numpy import array
1305 >>> G = nx.cycle_graph(4)
1306 >>> sim = nx.simrank_similarity(G)
1307 >>> lol = [[sim[u][v] for v in sorted(sim[u])] for u in sorted(sim)]
1308 >>> sim_array = array(lol)
1309
1310 References
1311 ----------
1312 .. [1] https://en.wikipedia.org/wiki/SimRank
1313 .. [2] G. Jeh and J. Widom.
1314 "SimRank: a measure of structural-context similarity",
1315 In KDD'02: Proceedings of the Eighth ACM SIGKDD
1316 International Conference on Knowledge Discovery and Data Mining,
1317 pp. 538--543. ACM Press, 2002.
1318 """
1319 prevsim = None
1320
1321 # build up our similarity adjacency dictionary output
1322 newsim = {u: {v: 1 if u == v else 0 for v in G} for u in G}
1323
1324 # These functions compute the update to the similarity value of the nodes
1325 # `u` and `v` with respect to the previous similarity values.
1326 def avg_sim(s):
1327 return sum(newsim[w][x] for (w, x) in s) / len(s) if s else 0.0
1328
1329 def sim(u, v):
1330 Gadj = G.pred if G.is_directed() else G.adj
1331 return importance_factor * avg_sim(list(product(Gadj[u], Gadj[v])))
1332
1333 for _ in range(max_iterations):
1334 if prevsim and _is_close(prevsim, newsim, tolerance):
1335 break
1336 prevsim = newsim
1337 newsim = {
1338 u: {v: sim(u, v) if u is not v else 1 for v in newsim[u]} for u in newsim
1339 }
1340
1341 if source is not None and target is not None:
1342 return newsim[source][target]
1343 if source is not None:
1344 return newsim[source]
1345 return newsim
1346
1347
1348 def simrank_similarity_numpy(
1349 G,
1350 source=None,
1351 target=None,
1352 importance_factor=0.9,
1353 max_iterations=100,
1354 tolerance=1e-4,
1355 ):
1356 """Calculate SimRank of nodes in ``G`` using matrices with ``numpy``.
1357
1358 The SimRank algorithm for determining node similarity is defined in
1359 [1]_.
1360
1361 Parameters
1362 ----------
1363 G : NetworkX graph
1364 A NetworkX graph
1365
1366 source : node
1367 If this is specified, the returned dictionary maps each node
1368 ``v`` in the graph to the similarity between ``source`` and
1369 ``v``.
1370
1371 target : node
1372 If both ``source`` and ``target`` are specified, the similarity
1373 value between ``source`` and ``target`` is returned. If
1374 ``target`` is specified but ``source`` is not, this argument is
1375 ignored.
1376
1377 importance_factor : float
1378 The relative importance of indirect neighbors with respect to
1379 direct neighbors.
1380
1381 max_iterations : integer
1382 Maximum number of iterations.
1383
1384 tolerance : float
1385 Error tolerance used to check convergence. When an iteration of
1386 the algorithm finds that no similarity value changes more than
1387 this amount, the algorithm halts.
1388
1389 Returns
1390 -------
1391 similarity : numpy matrix, numpy array or float
1392 If ``source`` and ``target`` are both ``None``, this returns a
1393 Matrix containing SimRank scores of the nodes.
1394
1395 If ``source`` is not ``None`` but ``target`` is, this returns an
1396 Array containing SimRank scores of ``source`` and that
1397 node.
1398
1399 If neither ``source`` nor ``target`` is ``None``, this returns
1400 the similarity value for the given pair of nodes.
1401
1402 Examples
1403 --------
1404 >>> from numpy import array
1405 >>> G = nx.cycle_graph(4)
1406 >>> sim = nx.simrank_similarity_numpy(G)
1407
1408 References
1409 ----------
1410 .. [1] G. Jeh and J. Widom.
1411 "SimRank: a measure of structural-context similarity",
1412 In KDD'02: Proceedings of the Eighth ACM SIGKDD
1413 International Conference on Knowledge Discovery and Data Mining,
1414 pp. 538--543. ACM Press, 2002.
1415 """
1416 # This algorithm follows roughly
1417 #
1418 # S = max{C * (A.T * S * A), I}
1419 #
1420 # where C is the importance factor, A is the column normalized
1421 # adjacency matrix, and I is the identity matrix.
1422 import numpy as np
1423
1424 adjacency_matrix = nx.to_numpy_array(G)
1425
1426 # column-normalize the ``adjacency_matrix``
1427 adjacency_matrix /= adjacency_matrix.sum(axis=0)
1428
1429 newsim = np.eye(adjacency_matrix.shape[0], dtype=np.float64)
1430 for _ in range(max_iterations):
1431 prevsim = np.copy(newsim)
1432 newsim = importance_factor * np.matmul(
1433 np.matmul(adjacency_matrix.T, prevsim), adjacency_matrix
1434 )
1435 np.fill_diagonal(newsim, 1.0)
1436
1437 if np.allclose(prevsim, newsim, atol=tolerance):
1438 break
1439
1440 if source is not None and target is not None:
1441 return newsim[source, target]
1442 if source is not None:
1443 return newsim[source]
1444 return newsim