sam_consensus_v3: env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/pagerank

comparison env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"

author	shellac
date	Mon, 22 Mar 2021 18:12:50 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:4f3585e2f14b
+"""PageRank analysis of graph structure. """
+import networkx as nx
+from networkx.utils import not_implemented_for
+__all__ = ["pagerank", "pagerank_numpy", "pagerank_scipy", "google_matrix"]
+@not_implemented_for("multigraph")
+def pagerank(
+G,
+alpha=0.85,
+personalization=None,
+max_iter=100,
+tol=1.0e-6,
+nstart=None,
+weight="weight",
+dangling=None,
+):
+"""Returns the PageRank of the nodes in the graph.
+PageRank computes a ranking of the nodes in the graph G based on
+the structure of the incoming links. It was originally designed as
+an algorithm to rank web pages.
+Parameters
+----------
+G : graph
+A NetworkX graph.  Undirected graphs will be converted to a directed
+graph with two directed edges for each undirected edge.
+alpha : float, optional
+Damping parameter for PageRank, default=0.85.
+personalization: dict, optional
+The "personalization vector" consisting of a dictionary with a
+key some subset of graph nodes and personalization value each of those.
+At least one personalization value must be non-zero.
+If not specfiied, a nodes personalization value will be zero.
+By default, a uniform distribution is used.
+max_iter : integer, optional
+Maximum number of iterations in power method eigenvalue solver.
+tol : float, optional
+Error tolerance used to check convergence in power method solver.
+nstart : dictionary, optional
+Starting value of PageRank iteration for each node.
+weight : key, optional
+Edge data key to use as weight.  If None weights are set to 1.
+dangling: dict, optional
+The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+any outedges. The dict key is the node the outedge points to and the dict
+value is the weight of that outedge. By default, dangling nodes are given
+outedges according to the personalization vector (uniform if not
+specified). This must be selected to result in an irreducible transition
+matrix (see notes under google_matrix). It may be common to have the
+dangling dict to be the same as the personalization dict.
+Returns
+-------
+pagerank : dictionary
+Dictionary of nodes with PageRank as value
+Examples
+--------
+>>> G = nx.DiGraph(nx.path_graph(4))
+>>> pr = nx.pagerank(G, alpha=0.9)
+Notes
+-----
+The eigenvector calculation is done by the power iteration method
+and has no guarantee of convergence.  The iteration will stop after
+an error tolerance of ``len(G) * tol`` has been reached. If the
+number of iterations exceed `max_iter`, a
+:exc:`networkx.exception.PowerIterationFailedConvergence` exception
+is raised.
+The PageRank algorithm was designed for directed graphs but this
+algorithm does not check if the input graph is directed and will
+execute on undirected graphs by converting each edge in the
+directed graph to two edges.
+See Also
+--------
+pagerank_numpy, pagerank_scipy, google_matrix
+Raises
+------
+PowerIterationFailedConvergence
+If the algorithm fails to converge to the specified tolerance
+within the specified number of iterations of the power iteration
+method.
+References
+----------
+.. [1] A. Langville and C. Meyer,
+"A survey of eigenvector methods of web information retrieval."
+http://citeseer.ist.psu.edu/713792.html
+.. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+The PageRank citation ranking: Bringing order to the Web. 1999
+http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+"""
+if len(G) == 0:
+return {}
+if not G.is_directed():
+D = G.to_directed()
+else:
+D = G
+# Create a copy in (right) stochastic form
+W = nx.stochastic_graph(D, weight=weight)
+N = W.number_of_nodes()
+# Choose fixed starting vector if not given
+if nstart is None:
+x = dict.fromkeys(W, 1.0 / N)
+else:
+# Normalized nstart vector
+s = float(sum(nstart.values()))
+x = {k: v / s for k, v in nstart.items()}
+if personalization is None:
+# Assign uniform personalization vector if not given
+p = dict.fromkeys(W, 1.0 / N)
+else:
+s = float(sum(personalization.values()))
+p = {k: v / s for k, v in personalization.items()}
+if dangling is None:
+# Use personalization vector if dangling vector not specified
+dangling_weights = p
+else:
+s = float(sum(dangling.values()))
+dangling_weights = {k: v / s for k, v in dangling.items()}
+dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
+# power iteration: make up to max_iter iterations
+for _ in range(max_iter):
+xlast = x
+x = dict.fromkeys(xlast.keys(), 0)
+danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
+for n in x:
+# this matrix multiply looks odd because it is
+# doing a left multiply x^T=xlast^T*W
+for nbr in W[n]:
+x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
+x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0)
+# check convergence, l1 norm
+err = sum([abs(x[n] - xlast[n]) for n in x])
+if err < N * tol:
+return x
+raise nx.PowerIterationFailedConvergence(max_iter)
+def google_matrix(
+G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None
+):
+"""Returns the Google matrix of the graph.
+Parameters
+----------
+G : graph
+A NetworkX graph.  Undirected graphs will be converted to a directed
+graph with two directed edges for each undirected edge.
+alpha : float
+The damping factor.
+personalization: dict, optional
+The "personalization vector" consisting of a dictionary with a
+key some subset of graph nodes and personalization value each of those.
+At least one personalization value must be non-zero.
+If not specfiied, a nodes personalization value will be zero.
+By default, a uniform distribution is used.
+nodelist : list, optional
+The rows and columns are ordered according to the nodes in nodelist.
+If nodelist is None, then the ordering is produced by G.nodes().
+weight : key, optional
+Edge data key to use as weight.  If None weights are set to 1.
+dangling: dict, optional
+The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+any outedges. The dict key is the node the outedge points to and the dict
+value is the weight of that outedge. By default, dangling nodes are given
+outedges according to the personalization vector (uniform if not
+specified) This must be selected to result in an irreducible transition
+matrix (see notes below). It may be common to have the dangling dict to
+be the same as the personalization dict.
+Returns
+-------
+A : NumPy matrix
+Google matrix of the graph
+Notes
+-----
+The matrix returned represents the transition matrix that describes the
+Markov chain used in PageRank. For PageRank to converge to a unique
+solution (i.e., a unique stationary distribution in a Markov chain), the
+transition matrix must be irreducible. In other words, it must be that
+there exists a path between every pair of nodes in the graph, or else there
+is the potential of "rank sinks."
+This implementation works with Multi(Di)Graphs. For multigraphs the
+weight between two nodes is set to be the sum of all edge weights
+between those nodes.
+See Also
+--------
+pagerank, pagerank_numpy, pagerank_scipy
+"""
+import numpy as np
+if nodelist is None:
+nodelist = list(G)
+M = nx.to_numpy_matrix(G, nodelist=nodelist, weight=weight)
+N = len(G)
+if N == 0:
+return M
+# Personalization vector
+if personalization is None:
+p = np.repeat(1.0 / N, N)
+else:
+p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
+p /= p.sum()
+# Dangling nodes
+if dangling is None:
+dangling_weights = p
+else:
+# Convert the dangling dictionary into an array in nodelist order
+dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
+dangling_weights /= dangling_weights.sum()
+dangling_nodes = np.where(M.sum(axis=1) == 0)[0]
+# Assign dangling_weights to any dangling nodes (nodes with no out links)
+for node in dangling_nodes:
+M[node] = dangling_weights
+M /= M.sum(axis=1)  # Normalize rows to sum to 1
+return alpha * M + (1 - alpha) * p
+def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None):
+"""Returns the PageRank of the nodes in the graph.
+PageRank computes a ranking of the nodes in the graph G based on
+the structure of the incoming links. It was originally designed as
+an algorithm to rank web pages.
+Parameters
+----------
+G : graph
+A NetworkX graph.  Undirected graphs will be converted to a directed
+graph with two directed edges for each undirected edge.
+alpha : float, optional
+Damping parameter for PageRank, default=0.85.
+personalization: dict, optional
+The "personalization vector" consisting of a dictionary with a
+key some subset of graph nodes and personalization value each of those.
+At least one personalization value must be non-zero.
+If not specfiied, a nodes personalization value will be zero.
+By default, a uniform distribution is used.
+weight : key, optional
+Edge data key to use as weight.  If None weights are set to 1.
+dangling: dict, optional
+The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+any outedges. The dict key is the node the outedge points to and the dict
+value is the weight of that outedge. By default, dangling nodes are given
+outedges according to the personalization vector (uniform if not
+specified) This must be selected to result in an irreducible transition
+matrix (see notes under google_matrix). It may be common to have the
+dangling dict to be the same as the personalization dict.
+Returns
+-------
+pagerank : dictionary
+Dictionary of nodes with PageRank as value.
+Examples
+--------
+>>> G = nx.DiGraph(nx.path_graph(4))
+>>> pr = nx.pagerank_numpy(G, alpha=0.9)
+Notes
+-----
+The eigenvector calculation uses NumPy's interface to the LAPACK
+eigenvalue solvers.  This will be the fastest and most accurate
+for small graphs.
+This implementation works with Multi(Di)Graphs. For multigraphs the
+weight between two nodes is set to be the sum of all edge weights
+between those nodes.
+See Also
+--------
+pagerank, pagerank_scipy, google_matrix
+References
+----------
+.. [1] A. Langville and C. Meyer,
+"A survey of eigenvector methods of web information retrieval."
+http://citeseer.ist.psu.edu/713792.html
+.. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+The PageRank citation ranking: Bringing order to the Web. 1999
+http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+"""
+import numpy as np
+if len(G) == 0:
+return {}
+M = google_matrix(
+G, alpha, personalization=personalization, weight=weight, dangling=dangling
+)
+# use numpy LAPACK solver
+eigenvalues, eigenvectors = np.linalg.eig(M.T)
+ind = np.argmax(eigenvalues)
+# eigenvector of largest eigenvalue is at ind, normalized
+largest = np.array(eigenvectors[:, ind]).flatten().real
+norm = float(largest.sum())
+return dict(zip(G, map(float, largest / norm)))
+def pagerank_scipy(
+G,
+alpha=0.85,
+personalization=None,
+max_iter=100,
+tol=1.0e-6,
+nstart=None,
+weight="weight",
+dangling=None,
+):
+"""Returns the PageRank of the nodes in the graph.
+PageRank computes a ranking of the nodes in the graph G based on
+the structure of the incoming links. It was originally designed as
+an algorithm to rank web pages.
+Parameters
+----------
+G : graph
+A NetworkX graph.  Undirected graphs will be converted to a directed
+graph with two directed edges for each undirected edge.
+alpha : float, optional
+Damping parameter for PageRank, default=0.85.
+personalization: dict, optional
+The "personalization vector" consisting of a dictionary with a
+key some subset of graph nodes and personalization value each of those.
+At least one personalization value must be non-zero.
+If not specfiied, a nodes personalization value will be zero.
+By default, a uniform distribution is used.
+max_iter : integer, optional
+Maximum number of iterations in power method eigenvalue solver.
+tol : float, optional
+Error tolerance used to check convergence in power method solver.
+nstart : dictionary, optional
+Starting value of PageRank iteration for each node.
+weight : key, optional
+Edge data key to use as weight.  If None weights are set to 1.
+dangling: dict, optional
+The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+any outedges. The dict key is the node the outedge points to and the dict
+value is the weight of that outedge. By default, dangling nodes are given
+outedges according to the personalization vector (uniform if not
+specified) This must be selected to result in an irreducible transition
+matrix (see notes under google_matrix). It may be common to have the
+dangling dict to be the same as the personalization dict.
+Returns
+-------
+pagerank : dictionary
+Dictionary of nodes with PageRank as value
+Examples
+--------
+>>> G = nx.DiGraph(nx.path_graph(4))
+>>> pr = nx.pagerank_scipy(G, alpha=0.9)
+Notes
+-----
+The eigenvector calculation uses power iteration with a SciPy
+sparse matrix representation.
+This implementation works with Multi(Di)Graphs. For multigraphs the
+weight between two nodes is set to be the sum of all edge weights
+between those nodes.
+See Also
+--------
+pagerank, pagerank_numpy, google_matrix
+Raises
+------
+PowerIterationFailedConvergence
+If the algorithm fails to converge to the specified tolerance
+within the specified number of iterations of the power iteration
+method.
+References
+----------
+.. [1] A. Langville and C. Meyer,
+"A survey of eigenvector methods of web information retrieval."
+http://citeseer.ist.psu.edu/713792.html
+.. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+The PageRank citation ranking: Bringing order to the Web. 1999
+http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+"""
+import numpy as np
+import scipy.sparse
+N = len(G)
+if N == 0:
+return {}
+nodelist = list(G)
+M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float)
+S = np.array(M.sum(axis=1)).flatten()
+S[S != 0] = 1.0 / S[S != 0]
+Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format="csr")
+M = Q * M
+# initial vector
+if nstart is None:
+x = np.repeat(1.0 / N, N)
+else:
+x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)
+x = x / x.sum()
+# Personalization vector
+if personalization is None:
+p = np.repeat(1.0 / N, N)
+else:
+p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
+p = p / p.sum()
+# Dangling nodes
+if dangling is None:
+dangling_weights = p
+else:
+# Convert the dangling dictionary into an array in nodelist order
+dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
+dangling_weights /= dangling_weights.sum()
+is_dangling = np.where(S == 0)[0]
+# power iteration: make up to max_iter iterations
+for _ in range(max_iter):
+xlast = x
+x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
+# check convergence, l1 norm
+err = np.absolute(x - xlast).sum()
+if err < N * tol:
+return dict(zip(nodelist, map(float, x)))
+raise nx.PowerIterationFailedConvergence(max_iter)

Mercurial > repos > shellac > sam_consensus_v3

comparison env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py @ 0:4f3585e2f14b draft default tip