comparison env/lib/python3.9/site-packages/networkx/linalg/attrmatrix.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 Functions for constructing matrix-like objects from graph attributes.
3 """
4
5 __all__ = ["attr_matrix", "attr_sparse_matrix"]
6
7
8 def _node_value(G, node_attr):
9 """Returns a function that returns a value from G.nodes[u].
10
11 We return a function expecting a node as its sole argument. Then, in the
12 simplest scenario, the returned function will return G.nodes[u][node_attr].
13 However, we also handle the case when `node_attr` is None or when it is a
14 function itself.
15
16 Parameters
17 ----------
18 G : graph
19 A NetworkX graph
20
21 node_attr : {None, str, callable}
22 Specification of how the value of the node attribute should be obtained
23 from the node attribute dictionary.
24
25 Returns
26 -------
27 value : function
28 A function expecting a node as its sole argument. The function will
29 returns a value from G.nodes[u] that depends on `edge_attr`.
30
31 """
32 if node_attr is None:
33
34 def value(u):
35 return u
36
37 elif not hasattr(node_attr, "__call__"):
38 # assume it is a key for the node attribute dictionary
39 def value(u):
40 return G.nodes[u][node_attr]
41
42 else:
43 # Advanced: Allow users to specify something else.
44 #
45 # For example,
46 # node_attr = lambda u: G.nodes[u].get('size', .5) * 3
47 #
48 value = node_attr
49
50 return value
51
52
53 def _edge_value(G, edge_attr):
54 """Returns a function that returns a value from G[u][v].
55
56 Suppose there exists an edge between u and v. Then we return a function
57 expecting u and v as arguments. For Graph and DiGraph, G[u][v] is
58 the edge attribute dictionary, and the function (essentially) returns
59 G[u][v][edge_attr]. However, we also handle cases when `edge_attr` is None
60 and when it is a function itself. For MultiGraph and MultiDiGraph, G[u][v]
61 is a dictionary of all edges between u and v. In this case, the returned
62 function sums the value of `edge_attr` for every edge between u and v.
63
64 Parameters
65 ----------
66 G : graph
67 A NetworkX graph
68
69 edge_attr : {None, str, callable}
70 Specification of how the value of the edge attribute should be obtained
71 from the edge attribute dictionary, G[u][v]. For multigraphs, G[u][v]
72 is a dictionary of all the edges between u and v. This allows for
73 special treatment of multiedges.
74
75 Returns
76 -------
77 value : function
78 A function expecting two nodes as parameters. The nodes should
79 represent the from- and to- node of an edge. The function will
80 return a value from G[u][v] that depends on `edge_attr`.
81
82 """
83
84 if edge_attr is None:
85 # topological count of edges
86
87 if G.is_multigraph():
88
89 def value(u, v):
90 return len(G[u][v])
91
92 else:
93
94 def value(u, v):
95 return 1
96
97 elif not hasattr(edge_attr, "__call__"):
98 # assume it is a key for the edge attribute dictionary
99
100 if edge_attr == "weight":
101 # provide a default value
102 if G.is_multigraph():
103
104 def value(u, v):
105 return sum([d.get(edge_attr, 1) for d in G[u][v].values()])
106
107 else:
108
109 def value(u, v):
110 return G[u][v].get(edge_attr, 1)
111
112 else:
113 # otherwise, the edge attribute MUST exist for each edge
114 if G.is_multigraph():
115
116 def value(u, v):
117 return sum([d[edge_attr] for d in G[u][v].values()])
118
119 else:
120
121 def value(u, v):
122 return G[u][v][edge_attr]
123
124 else:
125 # Advanced: Allow users to specify something else.
126 #
127 # Alternative default value:
128 # edge_attr = lambda u,v: G[u][v].get('thickness', .5)
129 #
130 # Function on an attribute:
131 # edge_attr = lambda u,v: abs(G[u][v]['weight'])
132 #
133 # Handle Multi(Di)Graphs differently:
134 # edge_attr = lambda u,v: numpy.prod([d['size'] for d in G[u][v].values()])
135 #
136 # Ignore multiple edges
137 # edge_attr = lambda u,v: 1 if len(G[u][v]) else 0
138 #
139 value = edge_attr
140
141 return value
142
143
144 def attr_matrix(
145 G,
146 edge_attr=None,
147 node_attr=None,
148 normalized=False,
149 rc_order=None,
150 dtype=None,
151 order=None,
152 ):
153 """Returns a NumPy matrix using attributes from G.
154
155 If only `G` is passed in, then the adjacency matrix is constructed.
156
157 Let A be a discrete set of values for the node attribute `node_attr`. Then
158 the elements of A represent the rows and columns of the constructed matrix.
159 Now, iterate through every edge e=(u,v) in `G` and consider the value
160 of the edge attribute `edge_attr`. If ua and va are the values of the
161 node attribute `node_attr` for u and v, respectively, then the value of
162 the edge attribute is added to the matrix element at (ua, va).
163
164 Parameters
165 ----------
166 G : graph
167 The NetworkX graph used to construct the NumPy matrix.
168
169 edge_attr : str, optional
170 Each element of the matrix represents a running total of the
171 specified edge attribute for edges whose node attributes correspond
172 to the rows/cols of the matirx. The attribute must be present for
173 all edges in the graph. If no attribute is specified, then we
174 just count the number of edges whose node attributes correspond
175 to the matrix element.
176
177 node_attr : str, optional
178 Each row and column in the matrix represents a particular value
179 of the node attribute. The attribute must be present for all nodes
180 in the graph. Note, the values of this attribute should be reliably
181 hashable. So, float values are not recommended. If no attribute is
182 specified, then the rows and columns will be the nodes of the graph.
183
184 normalized : bool, optional
185 If True, then each row is normalized by the summation of its values.
186
187 rc_order : list, optional
188 A list of the node attribute values. This list specifies the ordering
189 of rows and columns of the array. If no ordering is provided, then
190 the ordering will be random (and also, a return value).
191
192 Other Parameters
193 ----------------
194 dtype : NumPy data-type, optional
195 A valid NumPy dtype used to initialize the array. Keep in mind certain
196 dtypes can yield unexpected results if the array is to be normalized.
197 The parameter is passed to numpy.zeros(). If unspecified, the NumPy
198 default is used.
199
200 order : {'C', 'F'}, optional
201 Whether to store multidimensional data in C- or Fortran-contiguous
202 (row- or column-wise) order in memory. This parameter is passed to
203 numpy.zeros(). If unspecified, the NumPy default is used.
204
205 Returns
206 -------
207 M : NumPy matrix
208 The attribute matrix.
209
210 ordering : list
211 If `rc_order` was specified, then only the matrix is returned.
212 However, if `rc_order` was None, then the ordering used to construct
213 the matrix is returned as well.
214
215 Examples
216 --------
217 Construct an adjacency matrix:
218
219 >>> G = nx.Graph()
220 >>> G.add_edge(0, 1, thickness=1, weight=3)
221 >>> G.add_edge(0, 2, thickness=2)
222 >>> G.add_edge(1, 2, thickness=3)
223 >>> nx.attr_matrix(G, rc_order=[0, 1, 2])
224 matrix([[0., 1., 1.],
225 [1., 0., 1.],
226 [1., 1., 0.]])
227
228 Alternatively, we can obtain the matrix describing edge thickness.
229
230 >>> nx.attr_matrix(G, edge_attr="thickness", rc_order=[0, 1, 2])
231 matrix([[0., 1., 2.],
232 [1., 0., 3.],
233 [2., 3., 0.]])
234
235 We can also color the nodes and ask for the probability distribution over
236 all edges (u,v) describing:
237
238 Pr(v has color Y | u has color X)
239
240 >>> G.nodes[0]["color"] = "red"
241 >>> G.nodes[1]["color"] = "red"
242 >>> G.nodes[2]["color"] = "blue"
243 >>> rc = ["red", "blue"]
244 >>> nx.attr_matrix(G, node_attr="color", normalized=True, rc_order=rc)
245 matrix([[0.33333333, 0.66666667],
246 [1. , 0. ]])
247
248 For example, the above tells us that for all edges (u,v):
249
250 Pr( v is red | u is red) = 1/3
251 Pr( v is blue | u is red) = 2/3
252
253 Pr( v is red | u is blue) = 1
254 Pr( v is blue | u is blue) = 0
255
256 Finally, we can obtain the total weights listed by the node colors.
257
258 >>> nx.attr_matrix(G, edge_attr="weight", node_attr="color", rc_order=rc)
259 matrix([[3., 2.],
260 [2., 0.]])
261
262 Thus, the total weight over all edges (u,v) with u and v having colors:
263
264 (red, red) is 3 # the sole contribution is from edge (0,1)
265 (red, blue) is 2 # contributions from edges (0,2) and (1,2)
266 (blue, red) is 2 # same as (red, blue) since graph is undirected
267 (blue, blue) is 0 # there are no edges with blue endpoints
268
269 """
270 try:
271 import numpy as np
272 except ImportError as e:
273 raise ImportError("attr_matrix() requires numpy: http://scipy.org/ ") from e
274
275 edge_value = _edge_value(G, edge_attr)
276 node_value = _node_value(G, node_attr)
277
278 if rc_order is None:
279 ordering = list({node_value(n) for n in G})
280 else:
281 ordering = rc_order
282
283 N = len(ordering)
284 undirected = not G.is_directed()
285 index = dict(zip(ordering, range(N)))
286 M = np.zeros((N, N), dtype=dtype, order=order)
287
288 seen = set()
289 for u, nbrdict in G.adjacency():
290 for v in nbrdict:
291 # Obtain the node attribute values.
292 i, j = index[node_value(u)], index[node_value(v)]
293 if v not in seen:
294 M[i, j] += edge_value(u, v)
295 if undirected:
296 M[j, i] = M[i, j]
297
298 if undirected:
299 seen.add(u)
300
301 if normalized:
302 M /= M.sum(axis=1).reshape((N, 1))
303
304 M = np.asmatrix(M)
305
306 if rc_order is None:
307 return M, ordering
308 else:
309 return M
310
311
312 def attr_sparse_matrix(
313 G, edge_attr=None, node_attr=None, normalized=False, rc_order=None, dtype=None
314 ):
315 """Returns a SciPy sparse matrix using attributes from G.
316
317 If only `G` is passed in, then the adjacency matrix is constructed.
318
319 Let A be a discrete set of values for the node attribute `node_attr`. Then
320 the elements of A represent the rows and columns of the constructed matrix.
321 Now, iterate through every edge e=(u,v) in `G` and consider the value
322 of the edge attribute `edge_attr`. If ua and va are the values of the
323 node attribute `node_attr` for u and v, respectively, then the value of
324 the edge attribute is added to the matrix element at (ua, va).
325
326 Parameters
327 ----------
328 G : graph
329 The NetworkX graph used to construct the NumPy matrix.
330
331 edge_attr : str, optional
332 Each element of the matrix represents a running total of the
333 specified edge attribute for edges whose node attributes correspond
334 to the rows/cols of the matirx. The attribute must be present for
335 all edges in the graph. If no attribute is specified, then we
336 just count the number of edges whose node attributes correspond
337 to the matrix element.
338
339 node_attr : str, optional
340 Each row and column in the matrix represents a particular value
341 of the node attribute. The attribute must be present for all nodes
342 in the graph. Note, the values of this attribute should be reliably
343 hashable. So, float values are not recommended. If no attribute is
344 specified, then the rows and columns will be the nodes of the graph.
345
346 normalized : bool, optional
347 If True, then each row is normalized by the summation of its values.
348
349 rc_order : list, optional
350 A list of the node attribute values. This list specifies the ordering
351 of rows and columns of the array. If no ordering is provided, then
352 the ordering will be random (and also, a return value).
353
354 Other Parameters
355 ----------------
356 dtype : NumPy data-type, optional
357 A valid NumPy dtype used to initialize the array. Keep in mind certain
358 dtypes can yield unexpected results if the array is to be normalized.
359 The parameter is passed to numpy.zeros(). If unspecified, the NumPy
360 default is used.
361
362 Returns
363 -------
364 M : SciPy sparse matrix
365 The attribute matrix.
366
367 ordering : list
368 If `rc_order` was specified, then only the matrix is returned.
369 However, if `rc_order` was None, then the ordering used to construct
370 the matrix is returned as well.
371
372 Examples
373 --------
374 Construct an adjacency matrix:
375
376 >>> G = nx.Graph()
377 >>> G.add_edge(0, 1, thickness=1, weight=3)
378 >>> G.add_edge(0, 2, thickness=2)
379 >>> G.add_edge(1, 2, thickness=3)
380 >>> M = nx.attr_sparse_matrix(G, rc_order=[0, 1, 2])
381 >>> M.todense()
382 matrix([[0., 1., 1.],
383 [1., 0., 1.],
384 [1., 1., 0.]])
385
386 Alternatively, we can obtain the matrix describing edge thickness.
387
388 >>> M = nx.attr_sparse_matrix(G, edge_attr="thickness", rc_order=[0, 1, 2])
389 >>> M.todense()
390 matrix([[0., 1., 2.],
391 [1., 0., 3.],
392 [2., 3., 0.]])
393
394 We can also color the nodes and ask for the probability distribution over
395 all edges (u,v) describing:
396
397 Pr(v has color Y | u has color X)
398
399 >>> G.nodes[0]["color"] = "red"
400 >>> G.nodes[1]["color"] = "red"
401 >>> G.nodes[2]["color"] = "blue"
402 >>> rc = ["red", "blue"]
403 >>> M = nx.attr_sparse_matrix(G, node_attr="color", normalized=True, rc_order=rc)
404 >>> M.todense()
405 matrix([[0.33333333, 0.66666667],
406 [1. , 0. ]])
407
408 For example, the above tells us that for all edges (u,v):
409
410 Pr( v is red | u is red) = 1/3
411 Pr( v is blue | u is red) = 2/3
412
413 Pr( v is red | u is blue) = 1
414 Pr( v is blue | u is blue) = 0
415
416 Finally, we can obtain the total weights listed by the node colors.
417
418 >>> M = nx.attr_sparse_matrix(G, edge_attr="weight", node_attr="color", rc_order=rc)
419 >>> M.todense()
420 matrix([[3., 2.],
421 [2., 0.]])
422
423 Thus, the total weight over all edges (u,v) with u and v having colors:
424
425 (red, red) is 3 # the sole contribution is from edge (0,1)
426 (red, blue) is 2 # contributions from edges (0,2) and (1,2)
427 (blue, red) is 2 # same as (red, blue) since graph is undirected
428 (blue, blue) is 0 # there are no edges with blue endpoints
429
430 """
431 try:
432 import numpy as np
433 from scipy import sparse
434 except ImportError as e:
435 raise ImportError(
436 "attr_sparse_matrix() requires scipy: " "http://scipy.org/ "
437 ) from e
438
439 edge_value = _edge_value(G, edge_attr)
440 node_value = _node_value(G, node_attr)
441
442 if rc_order is None:
443 ordering = list({node_value(n) for n in G})
444 else:
445 ordering = rc_order
446
447 N = len(ordering)
448 undirected = not G.is_directed()
449 index = dict(zip(ordering, range(N)))
450 M = sparse.lil_matrix((N, N), dtype=dtype)
451
452 seen = set()
453 for u, nbrdict in G.adjacency():
454 for v in nbrdict:
455 # Obtain the node attribute values.
456 i, j = index[node_value(u)], index[node_value(v)]
457 if v not in seen:
458 M[i, j] += edge_value(u, v)
459 if undirected:
460 M[j, i] = M[i, j]
461
462 if undirected:
463 seen.add(u)
464
465 if normalized:
466 norms = np.asarray(M.sum(axis=1)).ravel()
467 for i, norm in enumerate(norms):
468 M[i, :] /= norm
469
470 if rc_order is None:
471 return M, ordering
472 else:
473 return M