## Mercurial > repos > shellac > sam_consensus_v3

### comparison env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py @ 0:4f3585e2f14b draft default tip

Find changesets by keywords (author, files, the commit message), revision
number or hash, or revset expression.

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"

author | shellac |
---|---|

date | Mon, 22 Mar 2021 18:12:50 +0000 |

parents | |

children |

comparison

equal
deleted
inserted
replaced

-1:000000000000 | 0:4f3585e2f14b |
---|---|

1 """PageRank analysis of graph structure. """ | |

2 import networkx as nx | |

3 from networkx.utils import not_implemented_for | |

4 | |

5 __all__ = ["pagerank", "pagerank_numpy", "pagerank_scipy", "google_matrix"] | |

6 | |

7 | |

8 @not_implemented_for("multigraph") | |

9 def pagerank( | |

10 G, | |

11 alpha=0.85, | |

12 personalization=None, | |

13 max_iter=100, | |

14 tol=1.0e-6, | |

15 nstart=None, | |

16 weight="weight", | |

17 dangling=None, | |

18 ): | |

19 """Returns the PageRank of the nodes in the graph. | |

20 | |

21 PageRank computes a ranking of the nodes in the graph G based on | |

22 the structure of the incoming links. It was originally designed as | |

23 an algorithm to rank web pages. | |

24 | |

25 Parameters | |

26 ---------- | |

27 G : graph | |

28 A NetworkX graph. Undirected graphs will be converted to a directed | |

29 graph with two directed edges for each undirected edge. | |

30 | |

31 alpha : float, optional | |

32 Damping parameter for PageRank, default=0.85. | |

33 | |

34 personalization: dict, optional | |

35 The "personalization vector" consisting of a dictionary with a | |

36 key some subset of graph nodes and personalization value each of those. | |

37 At least one personalization value must be non-zero. | |

38 If not specfiied, a nodes personalization value will be zero. | |

39 By default, a uniform distribution is used. | |

40 | |

41 max_iter : integer, optional | |

42 Maximum number of iterations in power method eigenvalue solver. | |

43 | |

44 tol : float, optional | |

45 Error tolerance used to check convergence in power method solver. | |

46 | |

47 nstart : dictionary, optional | |

48 Starting value of PageRank iteration for each node. | |

49 | |

50 weight : key, optional | |

51 Edge data key to use as weight. If None weights are set to 1. | |

52 | |

53 dangling: dict, optional | |

54 The outedges to be assigned to any "dangling" nodes, i.e., nodes without | |

55 any outedges. The dict key is the node the outedge points to and the dict | |

56 value is the weight of that outedge. By default, dangling nodes are given | |

57 outedges according to the personalization vector (uniform if not | |

58 specified). This must be selected to result in an irreducible transition | |

59 matrix (see notes under google_matrix). It may be common to have the | |

60 dangling dict to be the same as the personalization dict. | |

61 | |

62 Returns | |

63 ------- | |

64 pagerank : dictionary | |

65 Dictionary of nodes with PageRank as value | |

66 | |

67 Examples | |

68 -------- | |

69 >>> G = nx.DiGraph(nx.path_graph(4)) | |

70 >>> pr = nx.pagerank(G, alpha=0.9) | |

71 | |

72 Notes | |

73 ----- | |

74 The eigenvector calculation is done by the power iteration method | |

75 and has no guarantee of convergence. The iteration will stop after | |

76 an error tolerance of ``len(G) * tol`` has been reached. If the | |

77 number of iterations exceed `max_iter`, a | |

78 :exc:`networkx.exception.PowerIterationFailedConvergence` exception | |

79 is raised. | |

80 | |

81 The PageRank algorithm was designed for directed graphs but this | |

82 algorithm does not check if the input graph is directed and will | |

83 execute on undirected graphs by converting each edge in the | |

84 directed graph to two edges. | |

85 | |

86 See Also | |

87 -------- | |

88 pagerank_numpy, pagerank_scipy, google_matrix | |

89 | |

90 Raises | |

91 ------ | |

92 PowerIterationFailedConvergence | |

93 If the algorithm fails to converge to the specified tolerance | |

94 within the specified number of iterations of the power iteration | |

95 method. | |

96 | |

97 References | |

98 ---------- | |

99 .. [1] A. Langville and C. Meyer, | |

100 "A survey of eigenvector methods of web information retrieval." | |

101 http://citeseer.ist.psu.edu/713792.html | |

102 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, | |

103 The PageRank citation ranking: Bringing order to the Web. 1999 | |

104 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf | |

105 | |

106 """ | |

107 if len(G) == 0: | |

108 return {} | |

109 | |

110 if not G.is_directed(): | |

111 D = G.to_directed() | |

112 else: | |

113 D = G | |

114 | |

115 # Create a copy in (right) stochastic form | |

116 W = nx.stochastic_graph(D, weight=weight) | |

117 N = W.number_of_nodes() | |

118 | |

119 # Choose fixed starting vector if not given | |

120 if nstart is None: | |

121 x = dict.fromkeys(W, 1.0 / N) | |

122 else: | |

123 # Normalized nstart vector | |

124 s = float(sum(nstart.values())) | |

125 x = {k: v / s for k, v in nstart.items()} | |

126 | |

127 if personalization is None: | |

128 # Assign uniform personalization vector if not given | |

129 p = dict.fromkeys(W, 1.0 / N) | |

130 else: | |

131 s = float(sum(personalization.values())) | |

132 p = {k: v / s for k, v in personalization.items()} | |

133 | |

134 if dangling is None: | |

135 # Use personalization vector if dangling vector not specified | |

136 dangling_weights = p | |

137 else: | |

138 s = float(sum(dangling.values())) | |

139 dangling_weights = {k: v / s for k, v in dangling.items()} | |

140 dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] | |

141 | |

142 # power iteration: make up to max_iter iterations | |

143 for _ in range(max_iter): | |

144 xlast = x | |

145 x = dict.fromkeys(xlast.keys(), 0) | |

146 danglesum = alpha * sum(xlast[n] for n in dangling_nodes) | |

147 for n in x: | |

148 # this matrix multiply looks odd because it is | |

149 # doing a left multiply x^T=xlast^T*W | |

150 for nbr in W[n]: | |

151 x[nbr] += alpha * xlast[n] * W[n][nbr][weight] | |

152 x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0) | |

153 # check convergence, l1 norm | |

154 err = sum([abs(x[n] - xlast[n]) for n in x]) | |

155 if err < N * tol: | |

156 return x | |

157 raise nx.PowerIterationFailedConvergence(max_iter) | |

158 | |

159 | |

160 def google_matrix( | |

161 G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None | |

162 ): | |

163 """Returns the Google matrix of the graph. | |

164 | |

165 Parameters | |

166 ---------- | |

167 G : graph | |

168 A NetworkX graph. Undirected graphs will be converted to a directed | |

169 graph with two directed edges for each undirected edge. | |

170 | |

171 alpha : float | |

172 The damping factor. | |

173 | |

174 personalization: dict, optional | |

175 The "personalization vector" consisting of a dictionary with a | |

176 key some subset of graph nodes and personalization value each of those. | |

177 At least one personalization value must be non-zero. | |

178 If not specfiied, a nodes personalization value will be zero. | |

179 By default, a uniform distribution is used. | |

180 | |

181 nodelist : list, optional | |

182 The rows and columns are ordered according to the nodes in nodelist. | |

183 If nodelist is None, then the ordering is produced by G.nodes(). | |

184 | |

185 weight : key, optional | |

186 Edge data key to use as weight. If None weights are set to 1. | |

187 | |

188 dangling: dict, optional | |

189 The outedges to be assigned to any "dangling" nodes, i.e., nodes without | |

190 any outedges. The dict key is the node the outedge points to and the dict | |

191 value is the weight of that outedge. By default, dangling nodes are given | |

192 outedges according to the personalization vector (uniform if not | |

193 specified) This must be selected to result in an irreducible transition | |

194 matrix (see notes below). It may be common to have the dangling dict to | |

195 be the same as the personalization dict. | |

196 | |

197 Returns | |

198 ------- | |

199 A : NumPy matrix | |

200 Google matrix of the graph | |

201 | |

202 Notes | |

203 ----- | |

204 The matrix returned represents the transition matrix that describes the | |

205 Markov chain used in PageRank. For PageRank to converge to a unique | |

206 solution (i.e., a unique stationary distribution in a Markov chain), the | |

207 transition matrix must be irreducible. In other words, it must be that | |

208 there exists a path between every pair of nodes in the graph, or else there | |

209 is the potential of "rank sinks." | |

210 | |

211 This implementation works with Multi(Di)Graphs. For multigraphs the | |

212 weight between two nodes is set to be the sum of all edge weights | |

213 between those nodes. | |

214 | |

215 See Also | |

216 -------- | |

217 pagerank, pagerank_numpy, pagerank_scipy | |

218 """ | |

219 import numpy as np | |

220 | |

221 if nodelist is None: | |

222 nodelist = list(G) | |

223 | |

224 M = nx.to_numpy_matrix(G, nodelist=nodelist, weight=weight) | |

225 N = len(G) | |

226 if N == 0: | |

227 return M | |

228 | |

229 # Personalization vector | |

230 if personalization is None: | |

231 p = np.repeat(1.0 / N, N) | |

232 else: | |

233 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) | |

234 p /= p.sum() | |

235 | |

236 # Dangling nodes | |

237 if dangling is None: | |

238 dangling_weights = p | |

239 else: | |

240 # Convert the dangling dictionary into an array in nodelist order | |

241 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) | |

242 dangling_weights /= dangling_weights.sum() | |

243 dangling_nodes = np.where(M.sum(axis=1) == 0)[0] | |

244 | |

245 # Assign dangling_weights to any dangling nodes (nodes with no out links) | |

246 for node in dangling_nodes: | |

247 M[node] = dangling_weights | |

248 | |

249 M /= M.sum(axis=1) # Normalize rows to sum to 1 | |

250 | |

251 return alpha * M + (1 - alpha) * p | |

252 | |

253 | |

254 def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None): | |

255 """Returns the PageRank of the nodes in the graph. | |

256 | |

257 PageRank computes a ranking of the nodes in the graph G based on | |

258 the structure of the incoming links. It was originally designed as | |

259 an algorithm to rank web pages. | |

260 | |

261 Parameters | |

262 ---------- | |

263 G : graph | |

264 A NetworkX graph. Undirected graphs will be converted to a directed | |

265 graph with two directed edges for each undirected edge. | |

266 | |

267 alpha : float, optional | |

268 Damping parameter for PageRank, default=0.85. | |

269 | |

270 personalization: dict, optional | |

271 The "personalization vector" consisting of a dictionary with a | |

272 key some subset of graph nodes and personalization value each of those. | |

273 At least one personalization value must be non-zero. | |

274 If not specfiied, a nodes personalization value will be zero. | |

275 By default, a uniform distribution is used. | |

276 | |

277 weight : key, optional | |

278 Edge data key to use as weight. If None weights are set to 1. | |

279 | |

280 dangling: dict, optional | |

281 The outedges to be assigned to any "dangling" nodes, i.e., nodes without | |

282 any outedges. The dict key is the node the outedge points to and the dict | |

283 value is the weight of that outedge. By default, dangling nodes are given | |

284 outedges according to the personalization vector (uniform if not | |

285 specified) This must be selected to result in an irreducible transition | |

286 matrix (see notes under google_matrix). It may be common to have the | |

287 dangling dict to be the same as the personalization dict. | |

288 | |

289 Returns | |

290 ------- | |

291 pagerank : dictionary | |

292 Dictionary of nodes with PageRank as value. | |

293 | |

294 Examples | |

295 -------- | |

296 >>> G = nx.DiGraph(nx.path_graph(4)) | |

297 >>> pr = nx.pagerank_numpy(G, alpha=0.9) | |

298 | |

299 Notes | |

300 ----- | |

301 The eigenvector calculation uses NumPy's interface to the LAPACK | |

302 eigenvalue solvers. This will be the fastest and most accurate | |

303 for small graphs. | |

304 | |

305 This implementation works with Multi(Di)Graphs. For multigraphs the | |

306 weight between two nodes is set to be the sum of all edge weights | |

307 between those nodes. | |

308 | |

309 See Also | |

310 -------- | |

311 pagerank, pagerank_scipy, google_matrix | |

312 | |

313 References | |

314 ---------- | |

315 .. [1] A. Langville and C. Meyer, | |

316 "A survey of eigenvector methods of web information retrieval." | |

317 http://citeseer.ist.psu.edu/713792.html | |

318 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, | |

319 The PageRank citation ranking: Bringing order to the Web. 1999 | |

320 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf | |

321 """ | |

322 import numpy as np | |

323 | |

324 if len(G) == 0: | |

325 return {} | |

326 M = google_matrix( | |

327 G, alpha, personalization=personalization, weight=weight, dangling=dangling | |

328 ) | |

329 # use numpy LAPACK solver | |

330 eigenvalues, eigenvectors = np.linalg.eig(M.T) | |

331 ind = np.argmax(eigenvalues) | |

332 # eigenvector of largest eigenvalue is at ind, normalized | |

333 largest = np.array(eigenvectors[:, ind]).flatten().real | |

334 norm = float(largest.sum()) | |

335 return dict(zip(G, map(float, largest / norm))) | |

336 | |

337 | |

338 def pagerank_scipy( | |

339 G, | |

340 alpha=0.85, | |

341 personalization=None, | |

342 max_iter=100, | |

343 tol=1.0e-6, | |

344 nstart=None, | |

345 weight="weight", | |

346 dangling=None, | |

347 ): | |

348 """Returns the PageRank of the nodes in the graph. | |

349 | |

350 PageRank computes a ranking of the nodes in the graph G based on | |

351 the structure of the incoming links. It was originally designed as | |

352 an algorithm to rank web pages. | |

353 | |

354 Parameters | |

355 ---------- | |

356 G : graph | |

357 A NetworkX graph. Undirected graphs will be converted to a directed | |

358 graph with two directed edges for each undirected edge. | |

359 | |

360 alpha : float, optional | |

361 Damping parameter for PageRank, default=0.85. | |

362 | |

363 personalization: dict, optional | |

364 The "personalization vector" consisting of a dictionary with a | |

365 key some subset of graph nodes and personalization value each of those. | |

366 At least one personalization value must be non-zero. | |

367 If not specfiied, a nodes personalization value will be zero. | |

368 By default, a uniform distribution is used. | |

369 | |

370 max_iter : integer, optional | |

371 Maximum number of iterations in power method eigenvalue solver. | |

372 | |

373 tol : float, optional | |

374 Error tolerance used to check convergence in power method solver. | |

375 | |

376 nstart : dictionary, optional | |

377 Starting value of PageRank iteration for each node. | |

378 | |

379 weight : key, optional | |

380 Edge data key to use as weight. If None weights are set to 1. | |

381 | |

382 dangling: dict, optional | |

383 The outedges to be assigned to any "dangling" nodes, i.e., nodes without | |

384 any outedges. The dict key is the node the outedge points to and the dict | |

385 value is the weight of that outedge. By default, dangling nodes are given | |

386 outedges according to the personalization vector (uniform if not | |

387 specified) This must be selected to result in an irreducible transition | |

388 matrix (see notes under google_matrix). It may be common to have the | |

389 dangling dict to be the same as the personalization dict. | |

390 | |

391 Returns | |

392 ------- | |

393 pagerank : dictionary | |

394 Dictionary of nodes with PageRank as value | |

395 | |

396 Examples | |

397 -------- | |

398 >>> G = nx.DiGraph(nx.path_graph(4)) | |

399 >>> pr = nx.pagerank_scipy(G, alpha=0.9) | |

400 | |

401 Notes | |

402 ----- | |

403 The eigenvector calculation uses power iteration with a SciPy | |

404 sparse matrix representation. | |

405 | |

406 This implementation works with Multi(Di)Graphs. For multigraphs the | |

407 weight between two nodes is set to be the sum of all edge weights | |

408 between those nodes. | |

409 | |

410 See Also | |

411 -------- | |

412 pagerank, pagerank_numpy, google_matrix | |

413 | |

414 Raises | |

415 ------ | |

416 PowerIterationFailedConvergence | |

417 If the algorithm fails to converge to the specified tolerance | |

418 within the specified number of iterations of the power iteration | |

419 method. | |

420 | |

421 References | |

422 ---------- | |

423 .. [1] A. Langville and C. Meyer, | |

424 "A survey of eigenvector methods of web information retrieval." | |

425 http://citeseer.ist.psu.edu/713792.html | |

426 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, | |

427 The PageRank citation ranking: Bringing order to the Web. 1999 | |

428 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf | |

429 """ | |

430 import numpy as np | |

431 import scipy.sparse | |

432 | |

433 N = len(G) | |

434 if N == 0: | |

435 return {} | |

436 | |

437 nodelist = list(G) | |

438 M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) | |

439 S = np.array(M.sum(axis=1)).flatten() | |

440 S[S != 0] = 1.0 / S[S != 0] | |

441 Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format="csr") | |

442 M = Q * M | |

443 | |

444 # initial vector | |

445 if nstart is None: | |

446 x = np.repeat(1.0 / N, N) | |

447 else: | |

448 x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float) | |

449 x = x / x.sum() | |

450 | |

451 # Personalization vector | |

452 if personalization is None: | |

453 p = np.repeat(1.0 / N, N) | |

454 else: | |

455 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) | |

456 p = p / p.sum() | |

457 | |

458 # Dangling nodes | |

459 if dangling is None: | |

460 dangling_weights = p | |

461 else: | |

462 # Convert the dangling dictionary into an array in nodelist order | |

463 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) | |

464 dangling_weights /= dangling_weights.sum() | |

465 is_dangling = np.where(S == 0)[0] | |

466 | |

467 # power iteration: make up to max_iter iterations | |

468 for _ in range(max_iter): | |

469 xlast = x | |

470 x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p | |

471 # check convergence, l1 norm | |

472 err = np.absolute(x - xlast).sum() | |

473 if err < N * tol: | |

474 return dict(zip(nodelist, map(float, x))) | |

475 raise nx.PowerIterationFailedConvergence(max_iter) |