Mercurial > repos > shellac > sam_consensus_v3
diff env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/hits_alg.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/hits_alg.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,305 @@ +"""Hubs and authorities analysis of graph structure. +""" +import networkx as nx + +__all__ = ["hits", "hits_numpy", "hits_scipy", "authority_matrix", "hub_matrix"] + + +def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): + """Returns HITS hubs and authorities values for nodes. + + The HITS algorithm computes two numbers for a node. + Authorities estimates the node value based on the incoming links. + Hubs estimates the node value based on outgoing links. + + Parameters + ---------- + G : graph + A NetworkX graph + + max_iter : integer, optional + Maximum number of iterations in power method. + + tol : float, optional + Error tolerance used to check convergence in power method iteration. + + nstart : dictionary, optional + Starting value of each node for power method iteration. + + normalized : bool (default=True) + Normalize results by the sum of all of the values. + + Returns + ------- + (hubs,authorities) : two-tuple of dictionaries + Two dictionaries keyed by node containing the hub and authority + values. + + Raises + ------ + PowerIterationFailedConvergence + If the algorithm fails to converge to the specified tolerance + within the specified number of iterations of the power iteration + method. + + Examples + -------- + >>> G = nx.path_graph(4) + >>> h, a = nx.hits(G) + + Notes + ----- + The eigenvector calculation is done by the power iteration method + and has no guarantee of convergence. The iteration will stop + after max_iter iterations or an error tolerance of + number_of_nodes(G)*tol has been reached. + + The HITS algorithm was designed for directed graphs but this + algorithm does not check if the input graph is directed and will + execute on undirected graphs. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Jon Kleinberg, + Authoritative sources in a hyperlinked environment + Journal of the ACM 46 (5): 604-32, 1999. + doi:10.1145/324133.324140. + http://www.cs.cornell.edu/home/kleinber/auth.pdf. + """ + if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: + raise Exception("hits() not defined for graphs with multiedges.") + if len(G) == 0: + return {}, {} + # choose fixed starting vector if not given + if nstart is None: + h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) + else: + h = nstart + # normalize starting vector + s = 1.0 / sum(h.values()) + for k in h: + h[k] *= s + for _ in range(max_iter): # power iteration: make up to max_iter iterations + hlast = h + h = dict.fromkeys(hlast.keys(), 0) + a = dict.fromkeys(hlast.keys(), 0) + # this "matrix multiply" looks odd because it is + # doing a left multiply a^T=hlast^T*G + for n in h: + for nbr in G[n]: + a[nbr] += hlast[n] * G[n][nbr].get("weight", 1) + # now multiply h=Ga + for n in h: + for nbr in G[n]: + h[n] += a[nbr] * G[n][nbr].get("weight", 1) + # normalize vector + s = 1.0 / max(h.values()) + for n in h: + h[n] *= s + # normalize vector + s = 1.0 / max(a.values()) + for n in a: + a[n] *= s + # check convergence, l1 norm + err = sum([abs(h[n] - hlast[n]) for n in h]) + if err < tol: + break + else: + raise nx.PowerIterationFailedConvergence(max_iter) + if normalized: + s = 1.0 / sum(a.values()) + for n in a: + a[n] *= s + s = 1.0 / sum(h.values()) + for n in h: + h[n] *= s + return h, a + + +def authority_matrix(G, nodelist=None): + """Returns the HITS authority matrix.""" + M = nx.to_numpy_array(G, nodelist=nodelist) + return M.T @ M + + +def hub_matrix(G, nodelist=None): + """Returns the HITS hub matrix.""" + M = nx.to_numpy_array(G, nodelist=nodelist) + return M @ M.T + + +def hits_numpy(G, normalized=True): + """Returns HITS hubs and authorities values for nodes. + + The HITS algorithm computes two numbers for a node. + Authorities estimates the node value based on the incoming links. + Hubs estimates the node value based on outgoing links. + + Parameters + ---------- + G : graph + A NetworkX graph + + normalized : bool (default=True) + Normalize results by the sum of all of the values. + + Returns + ------- + (hubs,authorities) : two-tuple of dictionaries + Two dictionaries keyed by node containing the hub and authority + values. + + Examples + -------- + >>> G = nx.path_graph(4) + >>> h, a = nx.hits(G) + + Notes + ----- + The eigenvector calculation uses NumPy's interface to LAPACK. + + The HITS algorithm was designed for directed graphs but this + algorithm does not check if the input graph is directed and will + execute on undirected graphs. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Jon Kleinberg, + Authoritative sources in a hyperlinked environment + Journal of the ACM 46 (5): 604-32, 1999. + doi:10.1145/324133.324140. + http://www.cs.cornell.edu/home/kleinber/auth.pdf. + """ + try: + import numpy as np + except ImportError as e: + raise ImportError("hits_numpy() requires NumPy: " "http://numpy.org/") from e + if len(G) == 0: + return {}, {} + H = nx.hub_matrix(G, list(G)) + e, ev = np.linalg.eig(H) + m = e.argsort()[-1] # index of maximum eigenvalue + h = np.array(ev[:, m]).flatten() + A = nx.authority_matrix(G, list(G)) + e, ev = np.linalg.eig(A) + m = e.argsort()[-1] # index of maximum eigenvalue + a = np.array(ev[:, m]).flatten() + if normalized: + h = h / h.sum() + a = a / a.sum() + else: + h = h / h.max() + a = a / a.max() + hubs = dict(zip(G, map(float, h))) + authorities = dict(zip(G, map(float, a))) + return hubs, authorities + + +def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True): + """Returns HITS hubs and authorities values for nodes. + + The HITS algorithm computes two numbers for a node. + Authorities estimates the node value based on the incoming links. + Hubs estimates the node value based on outgoing links. + + Parameters + ---------- + G : graph + A NetworkX graph + + max_iter : integer, optional + Maximum number of iterations in power method. + + tol : float, optional + Error tolerance used to check convergence in power method iteration. + + nstart : dictionary, optional + Starting value of each node for power method iteration. + + normalized : bool (default=True) + Normalize results by the sum of all of the values. + + Returns + ------- + (hubs,authorities) : two-tuple of dictionaries + Two dictionaries keyed by node containing the hub and authority + values. + + Examples + -------- + >>> G = nx.path_graph(4) + >>> h, a = nx.hits(G) + + Notes + ----- + This implementation uses SciPy sparse matrices. + + The eigenvector calculation is done by the power iteration method + and has no guarantee of convergence. The iteration will stop + after max_iter iterations or an error tolerance of + number_of_nodes(G)*tol has been reached. + + The HITS algorithm was designed for directed graphs but this + algorithm does not check if the input graph is directed and will + execute on undirected graphs. + + Raises + ------ + PowerIterationFailedConvergence + If the algorithm fails to converge to the specified tolerance + within the specified number of iterations of the power iteration + method. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Jon Kleinberg, + Authoritative sources in a hyperlinked environment + Journal of the ACM 46 (5): 604-632, 1999. + doi:10.1145/324133.324140. + http://www.cs.cornell.edu/home/kleinber/auth.pdf. + """ + try: + import numpy as np + except ImportError as e: + raise ImportError( + "hits_scipy() requires SciPy and NumPy:" + "http://scipy.org/ http://numpy.org/" + ) from e + if len(G) == 0: + return {}, {} + M = nx.to_scipy_sparse_matrix(G, nodelist=list(G)) + (n, m) = M.shape # should be square + A = M.T * M # authority matrix + x = np.ones((n, 1)) / n # initial guess + # power iteration on authority matrix + i = 0 + while True: + xlast = x + x = A * x + x = x / x.max() + # check convergence, l1 norm + err = np.absolute(x - xlast).sum() + if err < tol: + break + if i > max_iter: + raise nx.PowerIterationFailedConvergence(max_iter) + i += 1 + + a = np.asarray(x).flatten() + # h=M*a + h = np.asarray(M * a).flatten() + if normalized: + h = h / h.sum() + a = a / a.sum() + hubs = dict(zip(G, map(float, h))) + authorities = dict(zip(G, map(float, a))) + return hubs, authorities