Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/algorithms/link_analysis/hits_alg.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Hubs and authorities analysis of graph structure. | |
2 """ | |
3 import networkx as nx | |
4 | |
5 __all__ = ["hits", "hits_numpy", "hits_scipy", "authority_matrix", "hub_matrix"] | |
6 | |
7 | |
8 def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): | |
9 """Returns HITS hubs and authorities values for nodes. | |
10 | |
11 The HITS algorithm computes two numbers for a node. | |
12 Authorities estimates the node value based on the incoming links. | |
13 Hubs estimates the node value based on outgoing links. | |
14 | |
15 Parameters | |
16 ---------- | |
17 G : graph | |
18 A NetworkX graph | |
19 | |
20 max_iter : integer, optional | |
21 Maximum number of iterations in power method. | |
22 | |
23 tol : float, optional | |
24 Error tolerance used to check convergence in power method iteration. | |
25 | |
26 nstart : dictionary, optional | |
27 Starting value of each node for power method iteration. | |
28 | |
29 normalized : bool (default=True) | |
30 Normalize results by the sum of all of the values. | |
31 | |
32 Returns | |
33 ------- | |
34 (hubs,authorities) : two-tuple of dictionaries | |
35 Two dictionaries keyed by node containing the hub and authority | |
36 values. | |
37 | |
38 Raises | |
39 ------ | |
40 PowerIterationFailedConvergence | |
41 If the algorithm fails to converge to the specified tolerance | |
42 within the specified number of iterations of the power iteration | |
43 method. | |
44 | |
45 Examples | |
46 -------- | |
47 >>> G = nx.path_graph(4) | |
48 >>> h, a = nx.hits(G) | |
49 | |
50 Notes | |
51 ----- | |
52 The eigenvector calculation is done by the power iteration method | |
53 and has no guarantee of convergence. The iteration will stop | |
54 after max_iter iterations or an error tolerance of | |
55 number_of_nodes(G)*tol has been reached. | |
56 | |
57 The HITS algorithm was designed for directed graphs but this | |
58 algorithm does not check if the input graph is directed and will | |
59 execute on undirected graphs. | |
60 | |
61 References | |
62 ---------- | |
63 .. [1] A. Langville and C. Meyer, | |
64 "A survey of eigenvector methods of web information retrieval." | |
65 http://citeseer.ist.psu.edu/713792.html | |
66 .. [2] Jon Kleinberg, | |
67 Authoritative sources in a hyperlinked environment | |
68 Journal of the ACM 46 (5): 604-32, 1999. | |
69 doi:10.1145/324133.324140. | |
70 http://www.cs.cornell.edu/home/kleinber/auth.pdf. | |
71 """ | |
72 if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: | |
73 raise Exception("hits() not defined for graphs with multiedges.") | |
74 if len(G) == 0: | |
75 return {}, {} | |
76 # choose fixed starting vector if not given | |
77 if nstart is None: | |
78 h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) | |
79 else: | |
80 h = nstart | |
81 # normalize starting vector | |
82 s = 1.0 / sum(h.values()) | |
83 for k in h: | |
84 h[k] *= s | |
85 for _ in range(max_iter): # power iteration: make up to max_iter iterations | |
86 hlast = h | |
87 h = dict.fromkeys(hlast.keys(), 0) | |
88 a = dict.fromkeys(hlast.keys(), 0) | |
89 # this "matrix multiply" looks odd because it is | |
90 # doing a left multiply a^T=hlast^T*G | |
91 for n in h: | |
92 for nbr in G[n]: | |
93 a[nbr] += hlast[n] * G[n][nbr].get("weight", 1) | |
94 # now multiply h=Ga | |
95 for n in h: | |
96 for nbr in G[n]: | |
97 h[n] += a[nbr] * G[n][nbr].get("weight", 1) | |
98 # normalize vector | |
99 s = 1.0 / max(h.values()) | |
100 for n in h: | |
101 h[n] *= s | |
102 # normalize vector | |
103 s = 1.0 / max(a.values()) | |
104 for n in a: | |
105 a[n] *= s | |
106 # check convergence, l1 norm | |
107 err = sum([abs(h[n] - hlast[n]) for n in h]) | |
108 if err < tol: | |
109 break | |
110 else: | |
111 raise nx.PowerIterationFailedConvergence(max_iter) | |
112 if normalized: | |
113 s = 1.0 / sum(a.values()) | |
114 for n in a: | |
115 a[n] *= s | |
116 s = 1.0 / sum(h.values()) | |
117 for n in h: | |
118 h[n] *= s | |
119 return h, a | |
120 | |
121 | |
122 def authority_matrix(G, nodelist=None): | |
123 """Returns the HITS authority matrix.""" | |
124 M = nx.to_numpy_array(G, nodelist=nodelist) | |
125 return M.T @ M | |
126 | |
127 | |
128 def hub_matrix(G, nodelist=None): | |
129 """Returns the HITS hub matrix.""" | |
130 M = nx.to_numpy_array(G, nodelist=nodelist) | |
131 return M @ M.T | |
132 | |
133 | |
134 def hits_numpy(G, normalized=True): | |
135 """Returns HITS hubs and authorities values for nodes. | |
136 | |
137 The HITS algorithm computes two numbers for a node. | |
138 Authorities estimates the node value based on the incoming links. | |
139 Hubs estimates the node value based on outgoing links. | |
140 | |
141 Parameters | |
142 ---------- | |
143 G : graph | |
144 A NetworkX graph | |
145 | |
146 normalized : bool (default=True) | |
147 Normalize results by the sum of all of the values. | |
148 | |
149 Returns | |
150 ------- | |
151 (hubs,authorities) : two-tuple of dictionaries | |
152 Two dictionaries keyed by node containing the hub and authority | |
153 values. | |
154 | |
155 Examples | |
156 -------- | |
157 >>> G = nx.path_graph(4) | |
158 >>> h, a = nx.hits(G) | |
159 | |
160 Notes | |
161 ----- | |
162 The eigenvector calculation uses NumPy's interface to LAPACK. | |
163 | |
164 The HITS algorithm was designed for directed graphs but this | |
165 algorithm does not check if the input graph is directed and will | |
166 execute on undirected graphs. | |
167 | |
168 References | |
169 ---------- | |
170 .. [1] A. Langville and C. Meyer, | |
171 "A survey of eigenvector methods of web information retrieval." | |
172 http://citeseer.ist.psu.edu/713792.html | |
173 .. [2] Jon Kleinberg, | |
174 Authoritative sources in a hyperlinked environment | |
175 Journal of the ACM 46 (5): 604-32, 1999. | |
176 doi:10.1145/324133.324140. | |
177 http://www.cs.cornell.edu/home/kleinber/auth.pdf. | |
178 """ | |
179 try: | |
180 import numpy as np | |
181 except ImportError as e: | |
182 raise ImportError("hits_numpy() requires NumPy: " "http://numpy.org/") from e | |
183 if len(G) == 0: | |
184 return {}, {} | |
185 H = nx.hub_matrix(G, list(G)) | |
186 e, ev = np.linalg.eig(H) | |
187 m = e.argsort()[-1] # index of maximum eigenvalue | |
188 h = np.array(ev[:, m]).flatten() | |
189 A = nx.authority_matrix(G, list(G)) | |
190 e, ev = np.linalg.eig(A) | |
191 m = e.argsort()[-1] # index of maximum eigenvalue | |
192 a = np.array(ev[:, m]).flatten() | |
193 if normalized: | |
194 h = h / h.sum() | |
195 a = a / a.sum() | |
196 else: | |
197 h = h / h.max() | |
198 a = a / a.max() | |
199 hubs = dict(zip(G, map(float, h))) | |
200 authorities = dict(zip(G, map(float, a))) | |
201 return hubs, authorities | |
202 | |
203 | |
204 def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True): | |
205 """Returns HITS hubs and authorities values for nodes. | |
206 | |
207 The HITS algorithm computes two numbers for a node. | |
208 Authorities estimates the node value based on the incoming links. | |
209 Hubs estimates the node value based on outgoing links. | |
210 | |
211 Parameters | |
212 ---------- | |
213 G : graph | |
214 A NetworkX graph | |
215 | |
216 max_iter : integer, optional | |
217 Maximum number of iterations in power method. | |
218 | |
219 tol : float, optional | |
220 Error tolerance used to check convergence in power method iteration. | |
221 | |
222 nstart : dictionary, optional | |
223 Starting value of each node for power method iteration. | |
224 | |
225 normalized : bool (default=True) | |
226 Normalize results by the sum of all of the values. | |
227 | |
228 Returns | |
229 ------- | |
230 (hubs,authorities) : two-tuple of dictionaries | |
231 Two dictionaries keyed by node containing the hub and authority | |
232 values. | |
233 | |
234 Examples | |
235 -------- | |
236 >>> G = nx.path_graph(4) | |
237 >>> h, a = nx.hits(G) | |
238 | |
239 Notes | |
240 ----- | |
241 This implementation uses SciPy sparse matrices. | |
242 | |
243 The eigenvector calculation is done by the power iteration method | |
244 and has no guarantee of convergence. The iteration will stop | |
245 after max_iter iterations or an error tolerance of | |
246 number_of_nodes(G)*tol has been reached. | |
247 | |
248 The HITS algorithm was designed for directed graphs but this | |
249 algorithm does not check if the input graph is directed and will | |
250 execute on undirected graphs. | |
251 | |
252 Raises | |
253 ------ | |
254 PowerIterationFailedConvergence | |
255 If the algorithm fails to converge to the specified tolerance | |
256 within the specified number of iterations of the power iteration | |
257 method. | |
258 | |
259 References | |
260 ---------- | |
261 .. [1] A. Langville and C. Meyer, | |
262 "A survey of eigenvector methods of web information retrieval." | |
263 http://citeseer.ist.psu.edu/713792.html | |
264 .. [2] Jon Kleinberg, | |
265 Authoritative sources in a hyperlinked environment | |
266 Journal of the ACM 46 (5): 604-632, 1999. | |
267 doi:10.1145/324133.324140. | |
268 http://www.cs.cornell.edu/home/kleinber/auth.pdf. | |
269 """ | |
270 try: | |
271 import numpy as np | |
272 except ImportError as e: | |
273 raise ImportError( | |
274 "hits_scipy() requires SciPy and NumPy:" | |
275 "http://scipy.org/ http://numpy.org/" | |
276 ) from e | |
277 if len(G) == 0: | |
278 return {}, {} | |
279 M = nx.to_scipy_sparse_matrix(G, nodelist=list(G)) | |
280 (n, m) = M.shape # should be square | |
281 A = M.T * M # authority matrix | |
282 x = np.ones((n, 1)) / n # initial guess | |
283 # power iteration on authority matrix | |
284 i = 0 | |
285 while True: | |
286 xlast = x | |
287 x = A * x | |
288 x = x / x.max() | |
289 # check convergence, l1 norm | |
290 err = np.absolute(x - xlast).sum() | |
291 if err < tol: | |
292 break | |
293 if i > max_iter: | |
294 raise nx.PowerIterationFailedConvergence(max_iter) | |
295 i += 1 | |
296 | |
297 a = np.asarray(x).flatten() | |
298 # h=M*a | |
299 h = np.asarray(M * a).flatten() | |
300 if normalized: | |
301 h = h / h.sum() | |
302 a = a / a.sum() | |
303 hubs = dict(zip(G, map(float, h))) | |
304 authorities = dict(zip(G, map(float, a))) | |
305 return hubs, authorities |