Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """PageRank analysis of graph structure. """ | |
2 import networkx as nx | |
3 from networkx.utils import not_implemented_for | |
4 | |
5 __all__ = ["pagerank", "pagerank_numpy", "pagerank_scipy", "google_matrix"] | |
6 | |
7 | |
8 @not_implemented_for("multigraph") | |
9 def pagerank( | |
10 G, | |
11 alpha=0.85, | |
12 personalization=None, | |
13 max_iter=100, | |
14 tol=1.0e-6, | |
15 nstart=None, | |
16 weight="weight", | |
17 dangling=None, | |
18 ): | |
19 """Returns the PageRank of the nodes in the graph. | |
20 | |
21 PageRank computes a ranking of the nodes in the graph G based on | |
22 the structure of the incoming links. It was originally designed as | |
23 an algorithm to rank web pages. | |
24 | |
25 Parameters | |
26 ---------- | |
27 G : graph | |
28 A NetworkX graph. Undirected graphs will be converted to a directed | |
29 graph with two directed edges for each undirected edge. | |
30 | |
31 alpha : float, optional | |
32 Damping parameter for PageRank, default=0.85. | |
33 | |
34 personalization: dict, optional | |
35 The "personalization vector" consisting of a dictionary with a | |
36 key some subset of graph nodes and personalization value each of those. | |
37 At least one personalization value must be non-zero. | |
38 If not specfiied, a nodes personalization value will be zero. | |
39 By default, a uniform distribution is used. | |
40 | |
41 max_iter : integer, optional | |
42 Maximum number of iterations in power method eigenvalue solver. | |
43 | |
44 tol : float, optional | |
45 Error tolerance used to check convergence in power method solver. | |
46 | |
47 nstart : dictionary, optional | |
48 Starting value of PageRank iteration for each node. | |
49 | |
50 weight : key, optional | |
51 Edge data key to use as weight. If None weights are set to 1. | |
52 | |
53 dangling: dict, optional | |
54 The outedges to be assigned to any "dangling" nodes, i.e., nodes without | |
55 any outedges. The dict key is the node the outedge points to and the dict | |
56 value is the weight of that outedge. By default, dangling nodes are given | |
57 outedges according to the personalization vector (uniform if not | |
58 specified). This must be selected to result in an irreducible transition | |
59 matrix (see notes under google_matrix). It may be common to have the | |
60 dangling dict to be the same as the personalization dict. | |
61 | |
62 Returns | |
63 ------- | |
64 pagerank : dictionary | |
65 Dictionary of nodes with PageRank as value | |
66 | |
67 Examples | |
68 -------- | |
69 >>> G = nx.DiGraph(nx.path_graph(4)) | |
70 >>> pr = nx.pagerank(G, alpha=0.9) | |
71 | |
72 Notes | |
73 ----- | |
74 The eigenvector calculation is done by the power iteration method | |
75 and has no guarantee of convergence. The iteration will stop after | |
76 an error tolerance of ``len(G) * tol`` has been reached. If the | |
77 number of iterations exceed `max_iter`, a | |
78 :exc:`networkx.exception.PowerIterationFailedConvergence` exception | |
79 is raised. | |
80 | |
81 The PageRank algorithm was designed for directed graphs but this | |
82 algorithm does not check if the input graph is directed and will | |
83 execute on undirected graphs by converting each edge in the | |
84 directed graph to two edges. | |
85 | |
86 See Also | |
87 -------- | |
88 pagerank_numpy, pagerank_scipy, google_matrix | |
89 | |
90 Raises | |
91 ------ | |
92 PowerIterationFailedConvergence | |
93 If the algorithm fails to converge to the specified tolerance | |
94 within the specified number of iterations of the power iteration | |
95 method. | |
96 | |
97 References | |
98 ---------- | |
99 .. [1] A. Langville and C. Meyer, | |
100 "A survey of eigenvector methods of web information retrieval." | |
101 http://citeseer.ist.psu.edu/713792.html | |
102 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, | |
103 The PageRank citation ranking: Bringing order to the Web. 1999 | |
104 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf | |
105 | |
106 """ | |
107 if len(G) == 0: | |
108 return {} | |
109 | |
110 if not G.is_directed(): | |
111 D = G.to_directed() | |
112 else: | |
113 D = G | |
114 | |
115 # Create a copy in (right) stochastic form | |
116 W = nx.stochastic_graph(D, weight=weight) | |
117 N = W.number_of_nodes() | |
118 | |
119 # Choose fixed starting vector if not given | |
120 if nstart is None: | |
121 x = dict.fromkeys(W, 1.0 / N) | |
122 else: | |
123 # Normalized nstart vector | |
124 s = float(sum(nstart.values())) | |
125 x = {k: v / s for k, v in nstart.items()} | |
126 | |
127 if personalization is None: | |
128 # Assign uniform personalization vector if not given | |
129 p = dict.fromkeys(W, 1.0 / N) | |
130 else: | |
131 s = float(sum(personalization.values())) | |
132 p = {k: v / s for k, v in personalization.items()} | |
133 | |
134 if dangling is None: | |
135 # Use personalization vector if dangling vector not specified | |
136 dangling_weights = p | |
137 else: | |
138 s = float(sum(dangling.values())) | |
139 dangling_weights = {k: v / s for k, v in dangling.items()} | |
140 dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] | |
141 | |
142 # power iteration: make up to max_iter iterations | |
143 for _ in range(max_iter): | |
144 xlast = x | |
145 x = dict.fromkeys(xlast.keys(), 0) | |
146 danglesum = alpha * sum(xlast[n] for n in dangling_nodes) | |
147 for n in x: | |
148 # this matrix multiply looks odd because it is | |
149 # doing a left multiply x^T=xlast^T*W | |
150 for nbr in W[n]: | |
151 x[nbr] += alpha * xlast[n] * W[n][nbr][weight] | |
152 x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0) | |
153 # check convergence, l1 norm | |
154 err = sum([abs(x[n] - xlast[n]) for n in x]) | |
155 if err < N * tol: | |
156 return x | |
157 raise nx.PowerIterationFailedConvergence(max_iter) | |
158 | |
159 | |
160 def google_matrix( | |
161 G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None | |
162 ): | |
163 """Returns the Google matrix of the graph. | |
164 | |
165 Parameters | |
166 ---------- | |
167 G : graph | |
168 A NetworkX graph. Undirected graphs will be converted to a directed | |
169 graph with two directed edges for each undirected edge. | |
170 | |
171 alpha : float | |
172 The damping factor. | |
173 | |
174 personalization: dict, optional | |
175 The "personalization vector" consisting of a dictionary with a | |
176 key some subset of graph nodes and personalization value each of those. | |
177 At least one personalization value must be non-zero. | |
178 If not specfiied, a nodes personalization value will be zero. | |
179 By default, a uniform distribution is used. | |
180 | |
181 nodelist : list, optional | |
182 The rows and columns are ordered according to the nodes in nodelist. | |
183 If nodelist is None, then the ordering is produced by G.nodes(). | |
184 | |
185 weight : key, optional | |
186 Edge data key to use as weight. If None weights are set to 1. | |
187 | |
188 dangling: dict, optional | |
189 The outedges to be assigned to any "dangling" nodes, i.e., nodes without | |
190 any outedges. The dict key is the node the outedge points to and the dict | |
191 value is the weight of that outedge. By default, dangling nodes are given | |
192 outedges according to the personalization vector (uniform if not | |
193 specified) This must be selected to result in an irreducible transition | |
194 matrix (see notes below). It may be common to have the dangling dict to | |
195 be the same as the personalization dict. | |
196 | |
197 Returns | |
198 ------- | |
199 A : NumPy matrix | |
200 Google matrix of the graph | |
201 | |
202 Notes | |
203 ----- | |
204 The matrix returned represents the transition matrix that describes the | |
205 Markov chain used in PageRank. For PageRank to converge to a unique | |
206 solution (i.e., a unique stationary distribution in a Markov chain), the | |
207 transition matrix must be irreducible. In other words, it must be that | |
208 there exists a path between every pair of nodes in the graph, or else there | |
209 is the potential of "rank sinks." | |
210 | |
211 This implementation works with Multi(Di)Graphs. For multigraphs the | |
212 weight between two nodes is set to be the sum of all edge weights | |
213 between those nodes. | |
214 | |
215 See Also | |
216 -------- | |
217 pagerank, pagerank_numpy, pagerank_scipy | |
218 """ | |
219 import numpy as np | |
220 | |
221 if nodelist is None: | |
222 nodelist = list(G) | |
223 | |
224 M = nx.to_numpy_matrix(G, nodelist=nodelist, weight=weight) | |
225 N = len(G) | |
226 if N == 0: | |
227 return M | |
228 | |
229 # Personalization vector | |
230 if personalization is None: | |
231 p = np.repeat(1.0 / N, N) | |
232 else: | |
233 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) | |
234 p /= p.sum() | |
235 | |
236 # Dangling nodes | |
237 if dangling is None: | |
238 dangling_weights = p | |
239 else: | |
240 # Convert the dangling dictionary into an array in nodelist order | |
241 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) | |
242 dangling_weights /= dangling_weights.sum() | |
243 dangling_nodes = np.where(M.sum(axis=1) == 0)[0] | |
244 | |
245 # Assign dangling_weights to any dangling nodes (nodes with no out links) | |
246 for node in dangling_nodes: | |
247 M[node] = dangling_weights | |
248 | |
249 M /= M.sum(axis=1) # Normalize rows to sum to 1 | |
250 | |
251 return alpha * M + (1 - alpha) * p | |
252 | |
253 | |
254 def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None): | |
255 """Returns the PageRank of the nodes in the graph. | |
256 | |
257 PageRank computes a ranking of the nodes in the graph G based on | |
258 the structure of the incoming links. It was originally designed as | |
259 an algorithm to rank web pages. | |
260 | |
261 Parameters | |
262 ---------- | |
263 G : graph | |
264 A NetworkX graph. Undirected graphs will be converted to a directed | |
265 graph with two directed edges for each undirected edge. | |
266 | |
267 alpha : float, optional | |
268 Damping parameter for PageRank, default=0.85. | |
269 | |
270 personalization: dict, optional | |
271 The "personalization vector" consisting of a dictionary with a | |
272 key some subset of graph nodes and personalization value each of those. | |
273 At least one personalization value must be non-zero. | |
274 If not specfiied, a nodes personalization value will be zero. | |
275 By default, a uniform distribution is used. | |
276 | |
277 weight : key, optional | |
278 Edge data key to use as weight. If None weights are set to 1. | |
279 | |
280 dangling: dict, optional | |
281 The outedges to be assigned to any "dangling" nodes, i.e., nodes without | |
282 any outedges. The dict key is the node the outedge points to and the dict | |
283 value is the weight of that outedge. By default, dangling nodes are given | |
284 outedges according to the personalization vector (uniform if not | |
285 specified) This must be selected to result in an irreducible transition | |
286 matrix (see notes under google_matrix). It may be common to have the | |
287 dangling dict to be the same as the personalization dict. | |
288 | |
289 Returns | |
290 ------- | |
291 pagerank : dictionary | |
292 Dictionary of nodes with PageRank as value. | |
293 | |
294 Examples | |
295 -------- | |
296 >>> G = nx.DiGraph(nx.path_graph(4)) | |
297 >>> pr = nx.pagerank_numpy(G, alpha=0.9) | |
298 | |
299 Notes | |
300 ----- | |
301 The eigenvector calculation uses NumPy's interface to the LAPACK | |
302 eigenvalue solvers. This will be the fastest and most accurate | |
303 for small graphs. | |
304 | |
305 This implementation works with Multi(Di)Graphs. For multigraphs the | |
306 weight between two nodes is set to be the sum of all edge weights | |
307 between those nodes. | |
308 | |
309 See Also | |
310 -------- | |
311 pagerank, pagerank_scipy, google_matrix | |
312 | |
313 References | |
314 ---------- | |
315 .. [1] A. Langville and C. Meyer, | |
316 "A survey of eigenvector methods of web information retrieval." | |
317 http://citeseer.ist.psu.edu/713792.html | |
318 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, | |
319 The PageRank citation ranking: Bringing order to the Web. 1999 | |
320 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf | |
321 """ | |
322 import numpy as np | |
323 | |
324 if len(G) == 0: | |
325 return {} | |
326 M = google_matrix( | |
327 G, alpha, personalization=personalization, weight=weight, dangling=dangling | |
328 ) | |
329 # use numpy LAPACK solver | |
330 eigenvalues, eigenvectors = np.linalg.eig(M.T) | |
331 ind = np.argmax(eigenvalues) | |
332 # eigenvector of largest eigenvalue is at ind, normalized | |
333 largest = np.array(eigenvectors[:, ind]).flatten().real | |
334 norm = float(largest.sum()) | |
335 return dict(zip(G, map(float, largest / norm))) | |
336 | |
337 | |
338 def pagerank_scipy( | |
339 G, | |
340 alpha=0.85, | |
341 personalization=None, | |
342 max_iter=100, | |
343 tol=1.0e-6, | |
344 nstart=None, | |
345 weight="weight", | |
346 dangling=None, | |
347 ): | |
348 """Returns the PageRank of the nodes in the graph. | |
349 | |
350 PageRank computes a ranking of the nodes in the graph G based on | |
351 the structure of the incoming links. It was originally designed as | |
352 an algorithm to rank web pages. | |
353 | |
354 Parameters | |
355 ---------- | |
356 G : graph | |
357 A NetworkX graph. Undirected graphs will be converted to a directed | |
358 graph with two directed edges for each undirected edge. | |
359 | |
360 alpha : float, optional | |
361 Damping parameter for PageRank, default=0.85. | |
362 | |
363 personalization: dict, optional | |
364 The "personalization vector" consisting of a dictionary with a | |
365 key some subset of graph nodes and personalization value each of those. | |
366 At least one personalization value must be non-zero. | |
367 If not specfiied, a nodes personalization value will be zero. | |
368 By default, a uniform distribution is used. | |
369 | |
370 max_iter : integer, optional | |
371 Maximum number of iterations in power method eigenvalue solver. | |
372 | |
373 tol : float, optional | |
374 Error tolerance used to check convergence in power method solver. | |
375 | |
376 nstart : dictionary, optional | |
377 Starting value of PageRank iteration for each node. | |
378 | |
379 weight : key, optional | |
380 Edge data key to use as weight. If None weights are set to 1. | |
381 | |
382 dangling: dict, optional | |
383 The outedges to be assigned to any "dangling" nodes, i.e., nodes without | |
384 any outedges. The dict key is the node the outedge points to and the dict | |
385 value is the weight of that outedge. By default, dangling nodes are given | |
386 outedges according to the personalization vector (uniform if not | |
387 specified) This must be selected to result in an irreducible transition | |
388 matrix (see notes under google_matrix). It may be common to have the | |
389 dangling dict to be the same as the personalization dict. | |
390 | |
391 Returns | |
392 ------- | |
393 pagerank : dictionary | |
394 Dictionary of nodes with PageRank as value | |
395 | |
396 Examples | |
397 -------- | |
398 >>> G = nx.DiGraph(nx.path_graph(4)) | |
399 >>> pr = nx.pagerank_scipy(G, alpha=0.9) | |
400 | |
401 Notes | |
402 ----- | |
403 The eigenvector calculation uses power iteration with a SciPy | |
404 sparse matrix representation. | |
405 | |
406 This implementation works with Multi(Di)Graphs. For multigraphs the | |
407 weight between two nodes is set to be the sum of all edge weights | |
408 between those nodes. | |
409 | |
410 See Also | |
411 -------- | |
412 pagerank, pagerank_numpy, google_matrix | |
413 | |
414 Raises | |
415 ------ | |
416 PowerIterationFailedConvergence | |
417 If the algorithm fails to converge to the specified tolerance | |
418 within the specified number of iterations of the power iteration | |
419 method. | |
420 | |
421 References | |
422 ---------- | |
423 .. [1] A. Langville and C. Meyer, | |
424 "A survey of eigenvector methods of web information retrieval." | |
425 http://citeseer.ist.psu.edu/713792.html | |
426 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, | |
427 The PageRank citation ranking: Bringing order to the Web. 1999 | |
428 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf | |
429 """ | |
430 import numpy as np | |
431 import scipy.sparse | |
432 | |
433 N = len(G) | |
434 if N == 0: | |
435 return {} | |
436 | |
437 nodelist = list(G) | |
438 M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) | |
439 S = np.array(M.sum(axis=1)).flatten() | |
440 S[S != 0] = 1.0 / S[S != 0] | |
441 Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format="csr") | |
442 M = Q * M | |
443 | |
444 # initial vector | |
445 if nstart is None: | |
446 x = np.repeat(1.0 / N, N) | |
447 else: | |
448 x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float) | |
449 x = x / x.sum() | |
450 | |
451 # Personalization vector | |
452 if personalization is None: | |
453 p = np.repeat(1.0 / N, N) | |
454 else: | |
455 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) | |
456 p = p / p.sum() | |
457 | |
458 # Dangling nodes | |
459 if dangling is None: | |
460 dangling_weights = p | |
461 else: | |
462 # Convert the dangling dictionary into an array in nodelist order | |
463 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) | |
464 dangling_weights /= dangling_weights.sum() | |
465 is_dangling = np.where(S == 0)[0] | |
466 | |
467 # power iteration: make up to max_iter iterations | |
468 for _ in range(max_iter): | |
469 xlast = x | |
470 x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p | |
471 # check convergence, l1 norm | |
472 err = np.absolute(x - xlast).sum() | |
473 if err < N * tol: | |
474 return dict(zip(nodelist, map(float, x))) | |
475 raise nx.PowerIterationFailedConvergence(max_iter) |