Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/networkx/linalg/attrmatrix.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 """ | |
| 2 Functions for constructing matrix-like objects from graph attributes. | |
| 3 """ | |
| 4 | |
| 5 __all__ = ['attr_matrix', 'attr_sparse_matrix'] | |
| 6 | |
| 7 import networkx as nx | |
| 8 | |
| 9 | |
| 10 def _node_value(G, node_attr): | |
| 11 """Returns a function that returns a value from G.nodes[u]. | |
| 12 | |
| 13 We return a function expecting a node as its sole argument. Then, in the | |
| 14 simplest scenario, the returned function will return G.nodes[u][node_attr]. | |
| 15 However, we also handle the case when `node_attr` is None or when it is a | |
| 16 function itself. | |
| 17 | |
| 18 Parameters | |
| 19 ---------- | |
| 20 G : graph | |
| 21 A NetworkX graph | |
| 22 | |
| 23 node_attr : {None, str, callable} | |
| 24 Specification of how the value of the node attribute should be obtained | |
| 25 from the node attribute dictionary. | |
| 26 | |
| 27 Returns | |
| 28 ------- | |
| 29 value : function | |
| 30 A function expecting a node as its sole argument. The function will | |
| 31 returns a value from G.nodes[u] that depends on `edge_attr`. | |
| 32 | |
| 33 """ | |
| 34 if node_attr is None: | |
| 35 def value(u): return u | |
| 36 elif not hasattr(node_attr, '__call__'): | |
| 37 # assume it is a key for the node attribute dictionary | |
| 38 def value(u): return G.nodes[u][node_attr] | |
| 39 else: | |
| 40 # Advanced: Allow users to specify something else. | |
| 41 # | |
| 42 # For example, | |
| 43 # node_attr = lambda u: G.nodes[u].get('size', .5) * 3 | |
| 44 # | |
| 45 value = node_attr | |
| 46 | |
| 47 return value | |
| 48 | |
| 49 | |
| 50 def _edge_value(G, edge_attr): | |
| 51 """Returns a function that returns a value from G[u][v]. | |
| 52 | |
| 53 Suppose there exists an edge between u and v. Then we return a function | |
| 54 expecting u and v as arguments. For Graph and DiGraph, G[u][v] is | |
| 55 the edge attribute dictionary, and the function (essentially) returns | |
| 56 G[u][v][edge_attr]. However, we also handle cases when `edge_attr` is None | |
| 57 and when it is a function itself. For MultiGraph and MultiDiGraph, G[u][v] | |
| 58 is a dictionary of all edges between u and v. In this case, the returned | |
| 59 function sums the value of `edge_attr` for every edge between u and v. | |
| 60 | |
| 61 Parameters | |
| 62 ---------- | |
| 63 G : graph | |
| 64 A NetworkX graph | |
| 65 | |
| 66 edge_attr : {None, str, callable} | |
| 67 Specification of how the value of the edge attribute should be obtained | |
| 68 from the edge attribute dictionary, G[u][v]. For multigraphs, G[u][v] | |
| 69 is a dictionary of all the edges between u and v. This allows for | |
| 70 special treatment of multiedges. | |
| 71 | |
| 72 Returns | |
| 73 ------- | |
| 74 value : function | |
| 75 A function expecting two nodes as parameters. The nodes should | |
| 76 represent the from- and to- node of an edge. The function will | |
| 77 return a value from G[u][v] that depends on `edge_attr`. | |
| 78 | |
| 79 """ | |
| 80 | |
| 81 if edge_attr is None: | |
| 82 # topological count of edges | |
| 83 | |
| 84 if G.is_multigraph(): | |
| 85 def value(u, v): return len(G[u][v]) | |
| 86 else: | |
| 87 def value(u, v): return 1 | |
| 88 | |
| 89 elif not hasattr(edge_attr, '__call__'): | |
| 90 # assume it is a key for the edge attribute dictionary | |
| 91 | |
| 92 if edge_attr == 'weight': | |
| 93 # provide a default value | |
| 94 if G.is_multigraph(): | |
| 95 def value(u, v): return sum([d.get(edge_attr, 1) for d in G[u][v].values()]) | |
| 96 else: | |
| 97 def value(u, v): return G[u][v].get(edge_attr, 1) | |
| 98 else: | |
| 99 # otherwise, the edge attribute MUST exist for each edge | |
| 100 if G.is_multigraph(): | |
| 101 def value(u, v): return sum([d[edge_attr] for d in G[u][v].values()]) | |
| 102 else: | |
| 103 def value(u, v): return G[u][v][edge_attr] | |
| 104 | |
| 105 else: | |
| 106 # Advanced: Allow users to specify something else. | |
| 107 # | |
| 108 # Alternative default value: | |
| 109 # edge_attr = lambda u,v: G[u][v].get('thickness', .5) | |
| 110 # | |
| 111 # Function on an attribute: | |
| 112 # edge_attr = lambda u,v: abs(G[u][v]['weight']) | |
| 113 # | |
| 114 # Handle Multi(Di)Graphs differently: | |
| 115 # edge_attr = lambda u,v: numpy.prod([d['size'] for d in G[u][v].values()]) | |
| 116 # | |
| 117 # Ignore multiple edges | |
| 118 # edge_attr = lambda u,v: 1 if len(G[u][v]) else 0 | |
| 119 # | |
| 120 value = edge_attr | |
| 121 | |
| 122 return value | |
| 123 | |
| 124 | |
| 125 def attr_matrix(G, edge_attr=None, node_attr=None, normalized=False, | |
| 126 rc_order=None, dtype=None, order=None): | |
| 127 """Returns a NumPy matrix using attributes from G. | |
| 128 | |
| 129 If only `G` is passed in, then the adjacency matrix is constructed. | |
| 130 | |
| 131 Let A be a discrete set of values for the node attribute `node_attr`. Then | |
| 132 the elements of A represent the rows and columns of the constructed matrix. | |
| 133 Now, iterate through every edge e=(u,v) in `G` and consider the value | |
| 134 of the edge attribute `edge_attr`. If ua and va are the values of the | |
| 135 node attribute `node_attr` for u and v, respectively, then the value of | |
| 136 the edge attribute is added to the matrix element at (ua, va). | |
| 137 | |
| 138 Parameters | |
| 139 ---------- | |
| 140 G : graph | |
| 141 The NetworkX graph used to construct the NumPy matrix. | |
| 142 | |
| 143 edge_attr : str, optional | |
| 144 Each element of the matrix represents a running total of the | |
| 145 specified edge attribute for edges whose node attributes correspond | |
| 146 to the rows/cols of the matirx. The attribute must be present for | |
| 147 all edges in the graph. If no attribute is specified, then we | |
| 148 just count the number of edges whose node attributes correspond | |
| 149 to the matrix element. | |
| 150 | |
| 151 node_attr : str, optional | |
| 152 Each row and column in the matrix represents a particular value | |
| 153 of the node attribute. The attribute must be present for all nodes | |
| 154 in the graph. Note, the values of this attribute should be reliably | |
| 155 hashable. So, float values are not recommended. If no attribute is | |
| 156 specified, then the rows and columns will be the nodes of the graph. | |
| 157 | |
| 158 normalized : bool, optional | |
| 159 If True, then each row is normalized by the summation of its values. | |
| 160 | |
| 161 rc_order : list, optional | |
| 162 A list of the node attribute values. This list specifies the ordering | |
| 163 of rows and columns of the array. If no ordering is provided, then | |
| 164 the ordering will be random (and also, a return value). | |
| 165 | |
| 166 Other Parameters | |
| 167 ---------------- | |
| 168 dtype : NumPy data-type, optional | |
| 169 A valid NumPy dtype used to initialize the array. Keep in mind certain | |
| 170 dtypes can yield unexpected results if the array is to be normalized. | |
| 171 The parameter is passed to numpy.zeros(). If unspecified, the NumPy | |
| 172 default is used. | |
| 173 | |
| 174 order : {'C', 'F'}, optional | |
| 175 Whether to store multidimensional data in C- or Fortran-contiguous | |
| 176 (row- or column-wise) order in memory. This parameter is passed to | |
| 177 numpy.zeros(). If unspecified, the NumPy default is used. | |
| 178 | |
| 179 Returns | |
| 180 ------- | |
| 181 M : NumPy matrix | |
| 182 The attribute matrix. | |
| 183 | |
| 184 ordering : list | |
| 185 If `rc_order` was specified, then only the matrix is returned. | |
| 186 However, if `rc_order` was None, then the ordering used to construct | |
| 187 the matrix is returned as well. | |
| 188 | |
| 189 Examples | |
| 190 -------- | |
| 191 Construct an adjacency matrix: | |
| 192 | |
| 193 >>> G = nx.Graph() | |
| 194 >>> G.add_edge(0, 1, thickness=1, weight=3) | |
| 195 >>> G.add_edge(0, 2, thickness=2) | |
| 196 >>> G.add_edge(1, 2, thickness=3) | |
| 197 >>> nx.attr_matrix(G, rc_order=[0, 1, 2]) | |
| 198 matrix([[0., 1., 1.], | |
| 199 [1., 0., 1.], | |
| 200 [1., 1., 0.]]) | |
| 201 | |
| 202 Alternatively, we can obtain the matrix describing edge thickness. | |
| 203 | |
| 204 >>> nx.attr_matrix(G, edge_attr='thickness', rc_order=[0, 1, 2]) | |
| 205 matrix([[0., 1., 2.], | |
| 206 [1., 0., 3.], | |
| 207 [2., 3., 0.]]) | |
| 208 | |
| 209 We can also color the nodes and ask for the probability distribution over | |
| 210 all edges (u,v) describing: | |
| 211 | |
| 212 Pr(v has color Y | u has color X) | |
| 213 | |
| 214 >>> G.nodes[0]['color'] = 'red' | |
| 215 >>> G.nodes[1]['color'] = 'red' | |
| 216 >>> G.nodes[2]['color'] = 'blue' | |
| 217 >>> rc = ['red', 'blue'] | |
| 218 >>> nx.attr_matrix(G, node_attr='color', normalized=True, rc_order=rc) | |
| 219 matrix([[0.33333333, 0.66666667], | |
| 220 [1. , 0. ]]) | |
| 221 | |
| 222 For example, the above tells us that for all edges (u,v): | |
| 223 | |
| 224 Pr( v is red | u is red) = 1/3 | |
| 225 Pr( v is blue | u is red) = 2/3 | |
| 226 | |
| 227 Pr( v is red | u is blue) = 1 | |
| 228 Pr( v is blue | u is blue) = 0 | |
| 229 | |
| 230 Finally, we can obtain the total weights listed by the node colors. | |
| 231 | |
| 232 >>> nx.attr_matrix(G, edge_attr='weight', node_attr='color', rc_order=rc) | |
| 233 matrix([[3., 2.], | |
| 234 [2., 0.]]) | |
| 235 | |
| 236 Thus, the total weight over all edges (u,v) with u and v having colors: | |
| 237 | |
| 238 (red, red) is 3 # the sole contribution is from edge (0,1) | |
| 239 (red, blue) is 2 # contributions from edges (0,2) and (1,2) | |
| 240 (blue, red) is 2 # same as (red, blue) since graph is undirected | |
| 241 (blue, blue) is 0 # there are no edges with blue endpoints | |
| 242 | |
| 243 """ | |
| 244 try: | |
| 245 import numpy as np | |
| 246 except ImportError: | |
| 247 raise ImportError( | |
| 248 "attr_matrix() requires numpy: http://scipy.org/ ") | |
| 249 | |
| 250 edge_value = _edge_value(G, edge_attr) | |
| 251 node_value = _node_value(G, node_attr) | |
| 252 | |
| 253 if rc_order is None: | |
| 254 ordering = list(set([node_value(n) for n in G])) | |
| 255 else: | |
| 256 ordering = rc_order | |
| 257 | |
| 258 N = len(ordering) | |
| 259 undirected = not G.is_directed() | |
| 260 index = dict(zip(ordering, range(N))) | |
| 261 M = np.zeros((N, N), dtype=dtype, order=order) | |
| 262 | |
| 263 seen = set([]) | |
| 264 for u, nbrdict in G.adjacency(): | |
| 265 for v in nbrdict: | |
| 266 # Obtain the node attribute values. | |
| 267 i, j = index[node_value(u)], index[node_value(v)] | |
| 268 if v not in seen: | |
| 269 M[i, j] += edge_value(u, v) | |
| 270 if undirected: | |
| 271 M[j, i] = M[i, j] | |
| 272 | |
| 273 if undirected: | |
| 274 seen.add(u) | |
| 275 | |
| 276 if normalized: | |
| 277 M /= M.sum(axis=1).reshape((N, 1)) | |
| 278 | |
| 279 M = np.asmatrix(M) | |
| 280 | |
| 281 if rc_order is None: | |
| 282 return M, ordering | |
| 283 else: | |
| 284 return M | |
| 285 | |
| 286 | |
| 287 def attr_sparse_matrix(G, edge_attr=None, node_attr=None, | |
| 288 normalized=False, rc_order=None, dtype=None): | |
| 289 """Returns a SciPy sparse matrix using attributes from G. | |
| 290 | |
| 291 If only `G` is passed in, then the adjacency matrix is constructed. | |
| 292 | |
| 293 Let A be a discrete set of values for the node attribute `node_attr`. Then | |
| 294 the elements of A represent the rows and columns of the constructed matrix. | |
| 295 Now, iterate through every edge e=(u,v) in `G` and consider the value | |
| 296 of the edge attribute `edge_attr`. If ua and va are the values of the | |
| 297 node attribute `node_attr` for u and v, respectively, then the value of | |
| 298 the edge attribute is added to the matrix element at (ua, va). | |
| 299 | |
| 300 Parameters | |
| 301 ---------- | |
| 302 G : graph | |
| 303 The NetworkX graph used to construct the NumPy matrix. | |
| 304 | |
| 305 edge_attr : str, optional | |
| 306 Each element of the matrix represents a running total of the | |
| 307 specified edge attribute for edges whose node attributes correspond | |
| 308 to the rows/cols of the matirx. The attribute must be present for | |
| 309 all edges in the graph. If no attribute is specified, then we | |
| 310 just count the number of edges whose node attributes correspond | |
| 311 to the matrix element. | |
| 312 | |
| 313 node_attr : str, optional | |
| 314 Each row and column in the matrix represents a particular value | |
| 315 of the node attribute. The attribute must be present for all nodes | |
| 316 in the graph. Note, the values of this attribute should be reliably | |
| 317 hashable. So, float values are not recommended. If no attribute is | |
| 318 specified, then the rows and columns will be the nodes of the graph. | |
| 319 | |
| 320 normalized : bool, optional | |
| 321 If True, then each row is normalized by the summation of its values. | |
| 322 | |
| 323 rc_order : list, optional | |
| 324 A list of the node attribute values. This list specifies the ordering | |
| 325 of rows and columns of the array. If no ordering is provided, then | |
| 326 the ordering will be random (and also, a return value). | |
| 327 | |
| 328 Other Parameters | |
| 329 ---------------- | |
| 330 dtype : NumPy data-type, optional | |
| 331 A valid NumPy dtype used to initialize the array. Keep in mind certain | |
| 332 dtypes can yield unexpected results if the array is to be normalized. | |
| 333 The parameter is passed to numpy.zeros(). If unspecified, the NumPy | |
| 334 default is used. | |
| 335 | |
| 336 Returns | |
| 337 ------- | |
| 338 M : SciPy sparse matrix | |
| 339 The attribute matrix. | |
| 340 | |
| 341 ordering : list | |
| 342 If `rc_order` was specified, then only the matrix is returned. | |
| 343 However, if `rc_order` was None, then the ordering used to construct | |
| 344 the matrix is returned as well. | |
| 345 | |
| 346 Examples | |
| 347 -------- | |
| 348 Construct an adjacency matrix: | |
| 349 | |
| 350 >>> G = nx.Graph() | |
| 351 >>> G.add_edge(0,1,thickness=1,weight=3) | |
| 352 >>> G.add_edge(0,2,thickness=2) | |
| 353 >>> G.add_edge(1,2,thickness=3) | |
| 354 >>> M = nx.attr_sparse_matrix(G, rc_order=[0,1,2]) | |
| 355 >>> M.todense() | |
| 356 matrix([[0., 1., 1.], | |
| 357 [1., 0., 1.], | |
| 358 [1., 1., 0.]]) | |
| 359 | |
| 360 Alternatively, we can obtain the matrix describing edge thickness. | |
| 361 | |
| 362 >>> M = nx.attr_sparse_matrix(G, edge_attr='thickness', rc_order=[0,1,2]) | |
| 363 >>> M.todense() | |
| 364 matrix([[0., 1., 2.], | |
| 365 [1., 0., 3.], | |
| 366 [2., 3., 0.]]) | |
| 367 | |
| 368 We can also color the nodes and ask for the probability distribution over | |
| 369 all edges (u,v) describing: | |
| 370 | |
| 371 Pr(v has color Y | u has color X) | |
| 372 | |
| 373 >>> G.nodes[0]['color'] = 'red' | |
| 374 >>> G.nodes[1]['color'] = 'red' | |
| 375 >>> G.nodes[2]['color'] = 'blue' | |
| 376 >>> rc = ['red', 'blue'] | |
| 377 >>> M = nx.attr_sparse_matrix(G, node_attr='color', \ | |
| 378 normalized=True, rc_order=rc) | |
| 379 >>> M.todense() | |
| 380 matrix([[0.33333333, 0.66666667], | |
| 381 [1. , 0. ]]) | |
| 382 | |
| 383 For example, the above tells us that for all edges (u,v): | |
| 384 | |
| 385 Pr( v is red | u is red) = 1/3 | |
| 386 Pr( v is blue | u is red) = 2/3 | |
| 387 | |
| 388 Pr( v is red | u is blue) = 1 | |
| 389 Pr( v is blue | u is blue) = 0 | |
| 390 | |
| 391 Finally, we can obtain the total weights listed by the node colors. | |
| 392 | |
| 393 >>> M = nx.attr_sparse_matrix(G, edge_attr='weight',\ | |
| 394 node_attr='color', rc_order=rc) | |
| 395 >>> M.todense() | |
| 396 matrix([[3., 2.], | |
| 397 [2., 0.]]) | |
| 398 | |
| 399 Thus, the total weight over all edges (u,v) with u and v having colors: | |
| 400 | |
| 401 (red, red) is 3 # the sole contribution is from edge (0,1) | |
| 402 (red, blue) is 2 # contributions from edges (0,2) and (1,2) | |
| 403 (blue, red) is 2 # same as (red, blue) since graph is undirected | |
| 404 (blue, blue) is 0 # there are no edges with blue endpoints | |
| 405 | |
| 406 """ | |
| 407 try: | |
| 408 import numpy as np | |
| 409 from scipy import sparse | |
| 410 except ImportError: | |
| 411 raise ImportError( | |
| 412 "attr_sparse_matrix() requires scipy: http://scipy.org/ ") | |
| 413 | |
| 414 edge_value = _edge_value(G, edge_attr) | |
| 415 node_value = _node_value(G, node_attr) | |
| 416 | |
| 417 if rc_order is None: | |
| 418 ordering = list(set([node_value(n) for n in G])) | |
| 419 else: | |
| 420 ordering = rc_order | |
| 421 | |
| 422 N = len(ordering) | |
| 423 undirected = not G.is_directed() | |
| 424 index = dict(zip(ordering, range(N))) | |
| 425 M = sparse.lil_matrix((N, N), dtype=dtype) | |
| 426 | |
| 427 seen = set([]) | |
| 428 for u, nbrdict in G.adjacency(): | |
| 429 for v in nbrdict: | |
| 430 # Obtain the node attribute values. | |
| 431 i, j = index[node_value(u)], index[node_value(v)] | |
| 432 if v not in seen: | |
| 433 M[i, j] += edge_value(u, v) | |
| 434 if undirected: | |
| 435 M[j, i] = M[i, j] | |
| 436 | |
| 437 if undirected: | |
| 438 seen.add(u) | |
| 439 | |
| 440 if normalized: | |
| 441 norms = np.asarray(M.sum(axis=1)).ravel() | |
| 442 for i, norm in enumerate(norms): | |
| 443 M[i, :] /= norm | |
| 444 | |
| 445 if rc_order is None: | |
| 446 return M, ordering | |
| 447 else: | |
| 448 return M | |
| 449 | |
| 450 | |
| 451 # fixture for pytest | |
| 452 def setup_module(module): | |
| 453 import pytest | |
| 454 numpy = pytest.importorskip('numpy') | |
| 455 scipy = pytest.importorskip('scipy') |
