Mercurial > repos > shellac > sam_consensus_v3
annotate env/lib/python3.9/sitepackages/networkx/algorithms/link_analysis/pagerank_alg.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author  shellac 

date  Mon, 22 Mar 2021 18:12:50 +0000 
parents  
children 
rev  line source 

0
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

1 """PageRank analysis of graph structure. """ 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

2 import networkx as nx 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

3 from networkx.utils import not_implemented_for 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

4 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

5 __all__ = ["pagerank", "pagerank_numpy", "pagerank_scipy", "google_matrix"] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

6 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

7 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

8 @not_implemented_for("multigraph") 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

9 def pagerank( 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

10 G, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

11 alpha=0.85, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

12 personalization=None, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

13 max_iter=100, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

14 tol=1.0e6, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

15 nstart=None, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

16 weight="weight", 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

17 dangling=None, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

18 ): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

19 """Returns the PageRank of the nodes in the graph. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

20 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

21 PageRank computes a ranking of the nodes in the graph G based on 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

22 the structure of the incoming links. It was originally designed as 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

23 an algorithm to rank web pages. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

24 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

25 Parameters 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

26  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

27 G : graph 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

28 A NetworkX graph. Undirected graphs will be converted to a directed 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

29 graph with two directed edges for each undirected edge. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

30 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

31 alpha : float, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

32 Damping parameter for PageRank, default=0.85. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

33 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

34 personalization: dict, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

35 The "personalization vector" consisting of a dictionary with a 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

36 key some subset of graph nodes and personalization value each of those. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

37 At least one personalization value must be nonzero. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

38 If not specfiied, a nodes personalization value will be zero. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

39 By default, a uniform distribution is used. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

40 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

41 max_iter : integer, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

42 Maximum number of iterations in power method eigenvalue solver. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

43 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

44 tol : float, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

45 Error tolerance used to check convergence in power method solver. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

46 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

47 nstart : dictionary, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

48 Starting value of PageRank iteration for each node. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

49 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

50 weight : key, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

51 Edge data key to use as weight. If None weights are set to 1. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

52 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

53 dangling: dict, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

54 The outedges to be assigned to any "dangling" nodes, i.e., nodes without 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

55 any outedges. The dict key is the node the outedge points to and the dict 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

56 value is the weight of that outedge. By default, dangling nodes are given 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

57 outedges according to the personalization vector (uniform if not 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

58 specified). This must be selected to result in an irreducible transition 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

59 matrix (see notes under google_matrix). It may be common to have the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

60 dangling dict to be the same as the personalization dict. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

61 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

62 Returns 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

63  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

64 pagerank : dictionary 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

65 Dictionary of nodes with PageRank as value 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

66 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

67 Examples 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

68  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

69 >>> G = nx.DiGraph(nx.path_graph(4)) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

70 >>> pr = nx.pagerank(G, alpha=0.9) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

71 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

72 Notes 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

73  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

74 The eigenvector calculation is done by the power iteration method 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

75 and has no guarantee of convergence. The iteration will stop after 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

76 an error tolerance of ``len(G) * tol`` has been reached. If the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

77 number of iterations exceed `max_iter`, a 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

78 :exc:`networkx.exception.PowerIterationFailedConvergence` exception 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

79 is raised. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

80 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

81 The PageRank algorithm was designed for directed graphs but this 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

82 algorithm does not check if the input graph is directed and will 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

83 execute on undirected graphs by converting each edge in the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

84 directed graph to two edges. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

85 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

86 See Also 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

87  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

88 pagerank_numpy, pagerank_scipy, google_matrix 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

89 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

90 Raises 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

91  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

92 PowerIterationFailedConvergence 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

93 If the algorithm fails to converge to the specified tolerance 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

94 within the specified number of iterations of the power iteration 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

95 method. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

96 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

97 References 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

98  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

99 .. [1] A. Langville and C. Meyer, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

100 "A survey of eigenvector methods of web information retrieval." 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

101 http://citeseer.ist.psu.edu/713792.html 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

102 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

103 The PageRank citation ranking: Bringing order to the Web. 1999 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

104 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=199966&format=pdf 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

105 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

106 """ 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

107 if len(G) == 0: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

108 return {} 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

109 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

110 if not G.is_directed(): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

111 D = G.to_directed() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

112 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

113 D = G 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

114 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

115 # Create a copy in (right) stochastic form 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

116 W = nx.stochastic_graph(D, weight=weight) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

117 N = W.number_of_nodes() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

118 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

119 # Choose fixed starting vector if not given 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

120 if nstart is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

121 x = dict.fromkeys(W, 1.0 / N) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

122 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

123 # Normalized nstart vector 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

124 s = float(sum(nstart.values())) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

125 x = {k: v / s for k, v in nstart.items()} 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

126 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

127 if personalization is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

128 # Assign uniform personalization vector if not given 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

129 p = dict.fromkeys(W, 1.0 / N) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

130 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

131 s = float(sum(personalization.values())) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

132 p = {k: v / s for k, v in personalization.items()} 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

133 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

134 if dangling is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

135 # Use personalization vector if dangling vector not specified 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

136 dangling_weights = p 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

137 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

138 s = float(sum(dangling.values())) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

139 dangling_weights = {k: v / s for k, v in dangling.items()} 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

140 dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

141 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

142 # power iteration: make up to max_iter iterations 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

143 for _ in range(max_iter): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

144 xlast = x 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

145 x = dict.fromkeys(xlast.keys(), 0) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

146 danglesum = alpha * sum(xlast[n] for n in dangling_nodes) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

147 for n in x: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

148 # this matrix multiply looks odd because it is 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

149 # doing a left multiply x^T=xlast^T*W 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

150 for nbr in W[n]: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

151 x[nbr] += alpha * xlast[n] * W[n][nbr][weight] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

152 x[n] += danglesum * dangling_weights.get(n, 0) + (1.0  alpha) * p.get(n, 0) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

153 # check convergence, l1 norm 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

154 err = sum([abs(x[n]  xlast[n]) for n in x]) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

155 if err < N * tol: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

156 return x 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

157 raise nx.PowerIterationFailedConvergence(max_iter) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

158 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

159 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

160 def google_matrix( 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

161 G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

162 ): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

163 """Returns the Google matrix of the graph. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

164 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

165 Parameters 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

166  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

167 G : graph 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

168 A NetworkX graph. Undirected graphs will be converted to a directed 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

169 graph with two directed edges for each undirected edge. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

170 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

171 alpha : float 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

172 The damping factor. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

173 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

174 personalization: dict, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

175 The "personalization vector" consisting of a dictionary with a 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

176 key some subset of graph nodes and personalization value each of those. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

177 At least one personalization value must be nonzero. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

178 If not specfiied, a nodes personalization value will be zero. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

179 By default, a uniform distribution is used. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

180 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

181 nodelist : list, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

182 The rows and columns are ordered according to the nodes in nodelist. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

183 If nodelist is None, then the ordering is produced by G.nodes(). 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

184 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

185 weight : key, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

186 Edge data key to use as weight. If None weights are set to 1. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

187 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

188 dangling: dict, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

189 The outedges to be assigned to any "dangling" nodes, i.e., nodes without 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

190 any outedges. The dict key is the node the outedge points to and the dict 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

191 value is the weight of that outedge. By default, dangling nodes are given 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

192 outedges according to the personalization vector (uniform if not 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

193 specified) This must be selected to result in an irreducible transition 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

194 matrix (see notes below). It may be common to have the dangling dict to 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

195 be the same as the personalization dict. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

196 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

197 Returns 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

198  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

199 A : NumPy matrix 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

200 Google matrix of the graph 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

201 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

202 Notes 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

203  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

204 The matrix returned represents the transition matrix that describes the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

205 Markov chain used in PageRank. For PageRank to converge to a unique 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

206 solution (i.e., a unique stationary distribution in a Markov chain), the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

207 transition matrix must be irreducible. In other words, it must be that 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

208 there exists a path between every pair of nodes in the graph, or else there 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

209 is the potential of "rank sinks." 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

210 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

211 This implementation works with Multi(Di)Graphs. For multigraphs the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

212 weight between two nodes is set to be the sum of all edge weights 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

213 between those nodes. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

214 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

215 See Also 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

216  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

217 pagerank, pagerank_numpy, pagerank_scipy 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

218 """ 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

219 import numpy as np 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

220 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

221 if nodelist is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

222 nodelist = list(G) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

223 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

224 M = nx.to_numpy_matrix(G, nodelist=nodelist, weight=weight) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

225 N = len(G) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

226 if N == 0: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

227 return M 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

228 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

229 # Personalization vector 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

230 if personalization is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

231 p = np.repeat(1.0 / N, N) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

232 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

233 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

234 p /= p.sum() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

235 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

236 # Dangling nodes 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

237 if dangling is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

238 dangling_weights = p 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

239 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

240 # Convert the dangling dictionary into an array in nodelist order 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

241 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

242 dangling_weights /= dangling_weights.sum() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

243 dangling_nodes = np.where(M.sum(axis=1) == 0)[0] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

244 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

245 # Assign dangling_weights to any dangling nodes (nodes with no out links) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

246 for node in dangling_nodes: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

247 M[node] = dangling_weights 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

248 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

249 M /= M.sum(axis=1) # Normalize rows to sum to 1 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

250 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

251 return alpha * M + (1  alpha) * p 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

252 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

253 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

254 def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

255 """Returns the PageRank of the nodes in the graph. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

256 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

257 PageRank computes a ranking of the nodes in the graph G based on 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

258 the structure of the incoming links. It was originally designed as 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

259 an algorithm to rank web pages. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

260 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

261 Parameters 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

262  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

263 G : graph 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

264 A NetworkX graph. Undirected graphs will be converted to a directed 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

265 graph with two directed edges for each undirected edge. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

266 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

267 alpha : float, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

268 Damping parameter for PageRank, default=0.85. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

269 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

270 personalization: dict, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

271 The "personalization vector" consisting of a dictionary with a 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

272 key some subset of graph nodes and personalization value each of those. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

273 At least one personalization value must be nonzero. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

274 If not specfiied, a nodes personalization value will be zero. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

275 By default, a uniform distribution is used. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

276 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

277 weight : key, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

278 Edge data key to use as weight. If None weights are set to 1. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

279 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

280 dangling: dict, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

281 The outedges to be assigned to any "dangling" nodes, i.e., nodes without 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

282 any outedges. The dict key is the node the outedge points to and the dict 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

283 value is the weight of that outedge. By default, dangling nodes are given 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

284 outedges according to the personalization vector (uniform if not 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

285 specified) This must be selected to result in an irreducible transition 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

286 matrix (see notes under google_matrix). It may be common to have the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

287 dangling dict to be the same as the personalization dict. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

288 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

289 Returns 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

290  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

291 pagerank : dictionary 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

292 Dictionary of nodes with PageRank as value. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

293 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

294 Examples 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

295  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

296 >>> G = nx.DiGraph(nx.path_graph(4)) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

297 >>> pr = nx.pagerank_numpy(G, alpha=0.9) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

298 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

299 Notes 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

300  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

301 The eigenvector calculation uses NumPy's interface to the LAPACK 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

302 eigenvalue solvers. This will be the fastest and most accurate 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

303 for small graphs. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

304 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

305 This implementation works with Multi(Di)Graphs. For multigraphs the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

306 weight between two nodes is set to be the sum of all edge weights 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

307 between those nodes. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

308 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

309 See Also 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

310  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

311 pagerank, pagerank_scipy, google_matrix 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

312 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

313 References 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

314  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

315 .. [1] A. Langville and C. Meyer, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

316 "A survey of eigenvector methods of web information retrieval." 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

317 http://citeseer.ist.psu.edu/713792.html 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

318 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

319 The PageRank citation ranking: Bringing order to the Web. 1999 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

320 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=199966&format=pdf 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

321 """ 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

322 import numpy as np 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

323 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

324 if len(G) == 0: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

325 return {} 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

326 M = google_matrix( 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

327 G, alpha, personalization=personalization, weight=weight, dangling=dangling 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

328 ) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

329 # use numpy LAPACK solver 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

330 eigenvalues, eigenvectors = np.linalg.eig(M.T) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

331 ind = np.argmax(eigenvalues) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

332 # eigenvector of largest eigenvalue is at ind, normalized 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

333 largest = np.array(eigenvectors[:, ind]).flatten().real 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

334 norm = float(largest.sum()) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

335 return dict(zip(G, map(float, largest / norm))) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

336 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

337 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

338 def pagerank_scipy( 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

339 G, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

340 alpha=0.85, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

341 personalization=None, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

342 max_iter=100, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

343 tol=1.0e6, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

344 nstart=None, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

345 weight="weight", 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

346 dangling=None, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

347 ): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

348 """Returns the PageRank of the nodes in the graph. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

349 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

350 PageRank computes a ranking of the nodes in the graph G based on 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

351 the structure of the incoming links. It was originally designed as 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

352 an algorithm to rank web pages. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

353 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

354 Parameters 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

355  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

356 G : graph 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

357 A NetworkX graph. Undirected graphs will be converted to a directed 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

358 graph with two directed edges for each undirected edge. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

359 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

360 alpha : float, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

361 Damping parameter for PageRank, default=0.85. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

362 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

363 personalization: dict, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

364 The "personalization vector" consisting of a dictionary with a 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

365 key some subset of graph nodes and personalization value each of those. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

366 At least one personalization value must be nonzero. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

367 If not specfiied, a nodes personalization value will be zero. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

368 By default, a uniform distribution is used. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

369 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

370 max_iter : integer, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

371 Maximum number of iterations in power method eigenvalue solver. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

372 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

373 tol : float, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

374 Error tolerance used to check convergence in power method solver. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

375 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

376 nstart : dictionary, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

377 Starting value of PageRank iteration for each node. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

378 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

379 weight : key, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

380 Edge data key to use as weight. If None weights are set to 1. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

381 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

382 dangling: dict, optional 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

383 The outedges to be assigned to any "dangling" nodes, i.e., nodes without 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

384 any outedges. The dict key is the node the outedge points to and the dict 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

385 value is the weight of that outedge. By default, dangling nodes are given 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

386 outedges according to the personalization vector (uniform if not 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

387 specified) This must be selected to result in an irreducible transition 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

388 matrix (see notes under google_matrix). It may be common to have the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

389 dangling dict to be the same as the personalization dict. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

390 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

391 Returns 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

392  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

393 pagerank : dictionary 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

394 Dictionary of nodes with PageRank as value 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

395 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

396 Examples 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

397  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

398 >>> G = nx.DiGraph(nx.path_graph(4)) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

399 >>> pr = nx.pagerank_scipy(G, alpha=0.9) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

400 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

401 Notes 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

402  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

403 The eigenvector calculation uses power iteration with a SciPy 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

404 sparse matrix representation. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

405 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

406 This implementation works with Multi(Di)Graphs. For multigraphs the 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

407 weight between two nodes is set to be the sum of all edge weights 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

408 between those nodes. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

409 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

410 See Also 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

411  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

412 pagerank, pagerank_numpy, google_matrix 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

413 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

414 Raises 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

415  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

416 PowerIterationFailedConvergence 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

417 If the algorithm fails to converge to the specified tolerance 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

418 within the specified number of iterations of the power iteration 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

419 method. 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

420 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

421 References 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

422  
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

423 .. [1] A. Langville and C. Meyer, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

424 "A survey of eigenvector methods of web information retrieval." 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

425 http://citeseer.ist.psu.edu/713792.html 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

426 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

427 The PageRank citation ranking: Bringing order to the Web. 1999 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

428 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=199966&format=pdf 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

429 """ 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

430 import numpy as np 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

431 import scipy.sparse 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

432 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

433 N = len(G) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

434 if N == 0: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

435 return {} 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

436 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

437 nodelist = list(G) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

438 M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

439 S = np.array(M.sum(axis=1)).flatten() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

440 S[S != 0] = 1.0 / S[S != 0] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

441 Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format="csr") 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

442 M = Q * M 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

443 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

444 # initial vector 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

445 if nstart is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

446 x = np.repeat(1.0 / N, N) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

447 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

448 x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

449 x = x / x.sum() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

450 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

451 # Personalization vector 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

452 if personalization is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

453 p = np.repeat(1.0 / N, N) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

454 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

455 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

456 p = p / p.sum() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

457 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

458 # Dangling nodes 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

459 if dangling is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

460 dangling_weights = p 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

461 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

462 # Convert the dangling dictionary into an array in nodelist order 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

463 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

464 dangling_weights /= dangling_weights.sum() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

465 is_dangling = np.where(S == 0)[0] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

466 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

467 # power iteration: make up to max_iter iterations 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

468 for _ in range(max_iter): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

469 xlast = x 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

470 x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + (1  alpha) * p 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

471 # check convergence, l1 norm 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

472 err = np.absolute(x  xlast).sum() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

473 if err < N * tol: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

474 return dict(zip(nodelist, map(float, x))) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

475 raise nx.PowerIterationFailedConvergence(max_iter) 