comparison env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/hits_alg.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Hubs and authorities analysis of graph structure.
2 """
3 import networkx as nx
4
5 __all__ = ["hits", "hits_numpy", "hits_scipy", "authority_matrix", "hub_matrix"]
6
7
8 def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
9 """Returns HITS hubs and authorities values for nodes.
10
11 The HITS algorithm computes two numbers for a node.
12 Authorities estimates the node value based on the incoming links.
13 Hubs estimates the node value based on outgoing links.
14
15 Parameters
16 ----------
17 G : graph
18 A NetworkX graph
19
20 max_iter : integer, optional
21 Maximum number of iterations in power method.
22
23 tol : float, optional
24 Error tolerance used to check convergence in power method iteration.
25
26 nstart : dictionary, optional
27 Starting value of each node for power method iteration.
28
29 normalized : bool (default=True)
30 Normalize results by the sum of all of the values.
31
32 Returns
33 -------
34 (hubs,authorities) : two-tuple of dictionaries
35 Two dictionaries keyed by node containing the hub and authority
36 values.
37
38 Raises
39 ------
40 PowerIterationFailedConvergence
41 If the algorithm fails to converge to the specified tolerance
42 within the specified number of iterations of the power iteration
43 method.
44
45 Examples
46 --------
47 >>> G = nx.path_graph(4)
48 >>> h, a = nx.hits(G)
49
50 Notes
51 -----
52 The eigenvector calculation is done by the power iteration method
53 and has no guarantee of convergence. The iteration will stop
54 after max_iter iterations or an error tolerance of
55 number_of_nodes(G)*tol has been reached.
56
57 The HITS algorithm was designed for directed graphs but this
58 algorithm does not check if the input graph is directed and will
59 execute on undirected graphs.
60
61 References
62 ----------
63 .. [1] A. Langville and C. Meyer,
64 "A survey of eigenvector methods of web information retrieval."
65 http://citeseer.ist.psu.edu/713792.html
66 .. [2] Jon Kleinberg,
67 Authoritative sources in a hyperlinked environment
68 Journal of the ACM 46 (5): 604-32, 1999.
69 doi:10.1145/324133.324140.
70 http://www.cs.cornell.edu/home/kleinber/auth.pdf.
71 """
72 if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
73 raise Exception("hits() not defined for graphs with multiedges.")
74 if len(G) == 0:
75 return {}, {}
76 # choose fixed starting vector if not given
77 if nstart is None:
78 h = dict.fromkeys(G, 1.0 / G.number_of_nodes())
79 else:
80 h = nstart
81 # normalize starting vector
82 s = 1.0 / sum(h.values())
83 for k in h:
84 h[k] *= s
85 for _ in range(max_iter): # power iteration: make up to max_iter iterations
86 hlast = h
87 h = dict.fromkeys(hlast.keys(), 0)
88 a = dict.fromkeys(hlast.keys(), 0)
89 # this "matrix multiply" looks odd because it is
90 # doing a left multiply a^T=hlast^T*G
91 for n in h:
92 for nbr in G[n]:
93 a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)
94 # now multiply h=Ga
95 for n in h:
96 for nbr in G[n]:
97 h[n] += a[nbr] * G[n][nbr].get("weight", 1)
98 # normalize vector
99 s = 1.0 / max(h.values())
100 for n in h:
101 h[n] *= s
102 # normalize vector
103 s = 1.0 / max(a.values())
104 for n in a:
105 a[n] *= s
106 # check convergence, l1 norm
107 err = sum([abs(h[n] - hlast[n]) for n in h])
108 if err < tol:
109 break
110 else:
111 raise nx.PowerIterationFailedConvergence(max_iter)
112 if normalized:
113 s = 1.0 / sum(a.values())
114 for n in a:
115 a[n] *= s
116 s = 1.0 / sum(h.values())
117 for n in h:
118 h[n] *= s
119 return h, a
120
121
122 def authority_matrix(G, nodelist=None):
123 """Returns the HITS authority matrix."""
124 M = nx.to_numpy_array(G, nodelist=nodelist)
125 return M.T @ M
126
127
128 def hub_matrix(G, nodelist=None):
129 """Returns the HITS hub matrix."""
130 M = nx.to_numpy_array(G, nodelist=nodelist)
131 return M @ M.T
132
133
134 def hits_numpy(G, normalized=True):
135 """Returns HITS hubs and authorities values for nodes.
136
137 The HITS algorithm computes two numbers for a node.
138 Authorities estimates the node value based on the incoming links.
139 Hubs estimates the node value based on outgoing links.
140
141 Parameters
142 ----------
143 G : graph
144 A NetworkX graph
145
146 normalized : bool (default=True)
147 Normalize results by the sum of all of the values.
148
149 Returns
150 -------
151 (hubs,authorities) : two-tuple of dictionaries
152 Two dictionaries keyed by node containing the hub and authority
153 values.
154
155 Examples
156 --------
157 >>> G = nx.path_graph(4)
158 >>> h, a = nx.hits(G)
159
160 Notes
161 -----
162 The eigenvector calculation uses NumPy's interface to LAPACK.
163
164 The HITS algorithm was designed for directed graphs but this
165 algorithm does not check if the input graph is directed and will
166 execute on undirected graphs.
167
168 References
169 ----------
170 .. [1] A. Langville and C. Meyer,
171 "A survey of eigenvector methods of web information retrieval."
172 http://citeseer.ist.psu.edu/713792.html
173 .. [2] Jon Kleinberg,
174 Authoritative sources in a hyperlinked environment
175 Journal of the ACM 46 (5): 604-32, 1999.
176 doi:10.1145/324133.324140.
177 http://www.cs.cornell.edu/home/kleinber/auth.pdf.
178 """
179 try:
180 import numpy as np
181 except ImportError as e:
182 raise ImportError("hits_numpy() requires NumPy: " "http://numpy.org/") from e
183 if len(G) == 0:
184 return {}, {}
185 H = nx.hub_matrix(G, list(G))
186 e, ev = np.linalg.eig(H)
187 m = e.argsort()[-1] # index of maximum eigenvalue
188 h = np.array(ev[:, m]).flatten()
189 A = nx.authority_matrix(G, list(G))
190 e, ev = np.linalg.eig(A)
191 m = e.argsort()[-1] # index of maximum eigenvalue
192 a = np.array(ev[:, m]).flatten()
193 if normalized:
194 h = h / h.sum()
195 a = a / a.sum()
196 else:
197 h = h / h.max()
198 a = a / a.max()
199 hubs = dict(zip(G, map(float, h)))
200 authorities = dict(zip(G, map(float, a)))
201 return hubs, authorities
202
203
204 def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True):
205 """Returns HITS hubs and authorities values for nodes.
206
207 The HITS algorithm computes two numbers for a node.
208 Authorities estimates the node value based on the incoming links.
209 Hubs estimates the node value based on outgoing links.
210
211 Parameters
212 ----------
213 G : graph
214 A NetworkX graph
215
216 max_iter : integer, optional
217 Maximum number of iterations in power method.
218
219 tol : float, optional
220 Error tolerance used to check convergence in power method iteration.
221
222 nstart : dictionary, optional
223 Starting value of each node for power method iteration.
224
225 normalized : bool (default=True)
226 Normalize results by the sum of all of the values.
227
228 Returns
229 -------
230 (hubs,authorities) : two-tuple of dictionaries
231 Two dictionaries keyed by node containing the hub and authority
232 values.
233
234 Examples
235 --------
236 >>> G = nx.path_graph(4)
237 >>> h, a = nx.hits(G)
238
239 Notes
240 -----
241 This implementation uses SciPy sparse matrices.
242
243 The eigenvector calculation is done by the power iteration method
244 and has no guarantee of convergence. The iteration will stop
245 after max_iter iterations or an error tolerance of
246 number_of_nodes(G)*tol has been reached.
247
248 The HITS algorithm was designed for directed graphs but this
249 algorithm does not check if the input graph is directed and will
250 execute on undirected graphs.
251
252 Raises
253 ------
254 PowerIterationFailedConvergence
255 If the algorithm fails to converge to the specified tolerance
256 within the specified number of iterations of the power iteration
257 method.
258
259 References
260 ----------
261 .. [1] A. Langville and C. Meyer,
262 "A survey of eigenvector methods of web information retrieval."
263 http://citeseer.ist.psu.edu/713792.html
264 .. [2] Jon Kleinberg,
265 Authoritative sources in a hyperlinked environment
266 Journal of the ACM 46 (5): 604-632, 1999.
267 doi:10.1145/324133.324140.
268 http://www.cs.cornell.edu/home/kleinber/auth.pdf.
269 """
270 try:
271 import numpy as np
272 except ImportError as e:
273 raise ImportError(
274 "hits_scipy() requires SciPy and NumPy:"
275 "http://scipy.org/ http://numpy.org/"
276 ) from e
277 if len(G) == 0:
278 return {}, {}
279 M = nx.to_scipy_sparse_matrix(G, nodelist=list(G))
280 (n, m) = M.shape # should be square
281 A = M.T * M # authority matrix
282 x = np.ones((n, 1)) / n # initial guess
283 # power iteration on authority matrix
284 i = 0
285 while True:
286 xlast = x
287 x = A * x
288 x = x / x.max()
289 # check convergence, l1 norm
290 err = np.absolute(x - xlast).sum()
291 if err < tol:
292 break
293 if i > max_iter:
294 raise nx.PowerIterationFailedConvergence(max_iter)
295 i += 1
296
297 a = np.asarray(x).flatten()
298 # h=M*a
299 h = np.asarray(M * a).flatten()
300 if normalized:
301 h = h / h.sum()
302 a = a / a.sum()
303 hubs = dict(zip(G, map(float, h)))
304 authorities = dict(zip(G, map(float, a)))
305 return hubs, authorities