comparison env/lib/python3.9/site-packages/networkx/convert_matrix.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Functions to convert NetworkX graphs to and from common data containers
2 like numpy arrays, scipy sparse matrices, and pandas DataFrames.
3
4 The preferred way of converting data to a NetworkX graph is through the
5 graph constructor. The constructor calls the to_networkx_graph() function
6 which attempts to guess the input type and convert it automatically.
7
8 Examples
9 --------
10 Create a 10 node random graph from a numpy array
11
12 >>> import numpy as np
13 >>> a = np.random.randint(0, 2, size=(10, 10))
14 >>> D = nx.DiGraph(a)
15
16 or equivalently
17
18 >>> D = nx.to_networkx_graph(a, create_using=nx.DiGraph)
19
20 See Also
21 --------
22 nx_agraph, nx_pydot
23 """
24
25 import itertools
26 import networkx as nx
27 from networkx.utils import not_implemented_for
28
29 __all__ = [
30 "from_numpy_matrix",
31 "to_numpy_matrix",
32 "from_pandas_adjacency",
33 "to_pandas_adjacency",
34 "from_pandas_edgelist",
35 "to_pandas_edgelist",
36 "to_numpy_recarray",
37 "from_scipy_sparse_matrix",
38 "to_scipy_sparse_matrix",
39 "from_numpy_array",
40 "to_numpy_array",
41 ]
42
43
44 def to_pandas_adjacency(
45 G,
46 nodelist=None,
47 dtype=None,
48 order=None,
49 multigraph_weight=sum,
50 weight="weight",
51 nonedge=0.0,
52 ):
53 """Returns the graph adjacency matrix as a Pandas DataFrame.
54
55 Parameters
56 ----------
57 G : graph
58 The NetworkX graph used to construct the Pandas DataFrame.
59
60 nodelist : list, optional
61 The rows and columns are ordered according to the nodes in `nodelist`.
62 If `nodelist` is None, then the ordering is produced by G.nodes().
63
64 multigraph_weight : {sum, min, max}, optional
65 An operator that determines how weights in multigraphs are handled.
66 The default is to sum the weights of the multiple edges.
67
68 weight : string or None, optional
69 The edge attribute that holds the numerical value used for
70 the edge weight. If an edge does not have that attribute, then the
71 value 1 is used instead.
72
73 nonedge : float, optional
74 The matrix values corresponding to nonedges are typically set to zero.
75 However, this could be undesirable if there are matrix values
76 corresponding to actual edges that also have the value zero. If so,
77 one might prefer nonedges to have some other value, such as nan.
78
79 Returns
80 -------
81 df : Pandas DataFrame
82 Graph adjacency matrix
83
84 Notes
85 -----
86 For directed graphs, entry i,j corresponds to an edge from i to j.
87
88 The DataFrame entries are assigned to the weight edge attribute. When
89 an edge does not have a weight attribute, the value of the entry is set to
90 the number 1. For multiple (parallel) edges, the values of the entries
91 are determined by the 'multigraph_weight' parameter. The default is to
92 sum the weight attributes for each of the parallel edges.
93
94 When `nodelist` does not contain every node in `G`, the matrix is built
95 from the subgraph of `G` that is induced by the nodes in `nodelist`.
96
97 The convention used for self-loop edges in graphs is to assign the
98 diagonal matrix entry value to the weight attribute of the edge
99 (or the number 1 if the edge has no weight attribute). If the
100 alternate convention of doubling the edge weight is desired the
101 resulting Pandas DataFrame can be modified as follows:
102
103 >>> import pandas as pd
104 >>> pd.options.display.max_columns = 20
105 >>> import numpy as np
106 >>> G = nx.Graph([(1, 1)])
107 >>> df = nx.to_pandas_adjacency(G, dtype=int)
108 >>> df
109 1
110 1 1
111 >>> df.values[np.diag_indices_from(df)] *= 2
112 >>> df
113 1
114 1 2
115
116 Examples
117 --------
118 >>> G = nx.MultiDiGraph()
119 >>> G.add_edge(0, 1, weight=2)
120 0
121 >>> G.add_edge(1, 0)
122 0
123 >>> G.add_edge(2, 2, weight=3)
124 0
125 >>> G.add_edge(2, 2)
126 1
127 >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int)
128 0 1 2
129 0 0 2 0
130 1 1 0 0
131 2 0 0 4
132
133 """
134 import pandas as pd
135
136 M = to_numpy_array(
137 G,
138 nodelist=nodelist,
139 dtype=dtype,
140 order=order,
141 multigraph_weight=multigraph_weight,
142 weight=weight,
143 nonedge=nonedge,
144 )
145 if nodelist is None:
146 nodelist = list(G)
147 return pd.DataFrame(data=M, index=nodelist, columns=nodelist)
148
149
150 def from_pandas_adjacency(df, create_using=None):
151 r"""Returns a graph from Pandas DataFrame.
152
153 The Pandas DataFrame is interpreted as an adjacency matrix for the graph.
154
155 Parameters
156 ----------
157 df : Pandas DataFrame
158 An adjacency matrix representation of a graph
159
160 create_using : NetworkX graph constructor, optional (default=nx.Graph)
161 Graph type to create. If graph instance, then cleared before populated.
162
163 Notes
164 -----
165 For directed graphs, explicitly mention create_using=nx.DiGraph,
166 and entry i,j of df corresponds to an edge from i to j.
167
168 If `df` has a single data type for each entry it will be converted to an
169 appropriate Python data type.
170
171 If `df` has a user-specified compound data type the names
172 of the data fields will be used as attribute keys in the resulting
173 NetworkX graph.
174
175 See Also
176 --------
177 to_pandas_adjacency
178
179 Examples
180 --------
181 Simple integer weights on edges:
182
183 >>> import pandas as pd
184 >>> pd.options.display.max_columns = 20
185 >>> df = pd.DataFrame([[1, 1], [2, 1]])
186 >>> df
187 0 1
188 0 1 1
189 1 2 1
190 >>> G = nx.from_pandas_adjacency(df)
191 >>> G.name = "Graph from pandas adjacency matrix"
192 >>> print(nx.info(G))
193 Name: Graph from pandas adjacency matrix
194 Type: Graph
195 Number of nodes: 2
196 Number of edges: 3
197 Average degree: 3.0000
198
199 """
200
201 try:
202 df = df[df.index]
203 except Exception as e:
204 missing = list(set(df.index).difference(set(df.columns)))
205 msg = f"{missing} not in columns"
206 raise nx.NetworkXError("Columns must match Indices.", msg) from e
207
208 A = df.values
209 G = from_numpy_array(A, create_using=create_using)
210
211 nx.relabel.relabel_nodes(G, dict(enumerate(df.columns)), copy=False)
212 return G
213
214
215 def to_pandas_edgelist(
216 G, source="source", target="target", nodelist=None, dtype=None, order=None
217 ):
218 """Returns the graph edge list as a Pandas DataFrame.
219
220 Parameters
221 ----------
222 G : graph
223 The NetworkX graph used to construct the Pandas DataFrame.
224
225 source : str or int, optional
226 A valid column name (string or integer) for the source nodes (for the
227 directed case).
228
229 target : str or int, optional
230 A valid column name (string or integer) for the target nodes (for the
231 directed case).
232
233 nodelist : list, optional
234 Use only nodes specified in nodelist
235
236 Returns
237 -------
238 df : Pandas DataFrame
239 Graph edge list
240
241 Examples
242 --------
243 >>> G = nx.Graph(
244 ... [
245 ... ("A", "B", {"cost": 1, "weight": 7}),
246 ... ("C", "E", {"cost": 9, "weight": 10}),
247 ... ]
248 ... )
249 >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"])
250 >>> df[["source", "target", "cost", "weight"]]
251 source target cost weight
252 0 A B 1 7
253 1 C E 9 10
254
255 """
256 import pandas as pd
257
258 if nodelist is None:
259 edgelist = G.edges(data=True)
260 else:
261 edgelist = G.edges(nodelist, data=True)
262 source_nodes = [s for s, t, d in edgelist]
263 target_nodes = [t for s, t, d in edgelist]
264
265 all_keys = set().union(*(d.keys() for s, t, d in edgelist))
266 if source in all_keys:
267 raise nx.NetworkXError(f"Source name '{source}' is an edge attr name")
268 if target in all_keys:
269 raise nx.NetworkXError(f"Target name '{target}' is an edge attr name")
270
271 nan = float("nan")
272 edge_attr = {k: [d.get(k, nan) for s, t, d in edgelist] for k in all_keys}
273
274 edgelistdict = {source: source_nodes, target: target_nodes}
275 edgelistdict.update(edge_attr)
276 return pd.DataFrame(edgelistdict)
277
278
279 def from_pandas_edgelist(
280 df,
281 source="source",
282 target="target",
283 edge_attr=None,
284 create_using=None,
285 edge_key=None,
286 ):
287 """Returns a graph from Pandas DataFrame containing an edge list.
288
289 The Pandas DataFrame should contain at least two columns of node names and
290 zero or more columns of edge attributes. Each row will be processed as one
291 edge instance.
292
293 Note: This function iterates over DataFrame.values, which is not
294 guaranteed to retain the data type across columns in the row. This is only
295 a problem if your row is entirely numeric and a mix of ints and floats. In
296 that case, all values will be returned as floats. See the
297 DataFrame.iterrows documentation for an example.
298
299 Parameters
300 ----------
301 df : Pandas DataFrame
302 An edge list representation of a graph
303
304 source : str or int
305 A valid column name (string or integer) for the source nodes (for the
306 directed case).
307
308 target : str or int
309 A valid column name (string or integer) for the target nodes (for the
310 directed case).
311
312 edge_attr : str or int, iterable, True, or None
313 A valid column name (str or int) or iterable of column names that are
314 used to retrieve items and add them to the graph as edge attributes.
315 If `True`, all of the remaining columns will be added.
316 If `None`, no edge attributes are added to the graph.
317
318 create_using : NetworkX graph constructor, optional (default=nx.Graph)
319 Graph type to create. If graph instance, then cleared before populated.
320
321 edge_key : str or None, optional (default=None)
322 A valid column name for the edge keys (for a MultiGraph). The values in
323 this column are used for the edge keys when adding edges if create_using
324 is a multigraph.
325
326 See Also
327 --------
328 to_pandas_edgelist
329
330 Examples
331 --------
332 Simple integer weights on edges:
333
334 >>> import pandas as pd
335 >>> pd.options.display.max_columns = 20
336 >>> import numpy as np
337 >>> rng = np.random.RandomState(seed=5)
338 >>> ints = rng.randint(1, 11, size=(3, 2))
339 >>> a = ["A", "B", "C"]
340 >>> b = ["D", "A", "E"]
341 >>> df = pd.DataFrame(ints, columns=["weight", "cost"])
342 >>> df[0] = a
343 >>> df["b"] = b
344 >>> df[["weight", "cost", 0, "b"]]
345 weight cost 0 b
346 0 4 7 A D
347 1 7 1 B A
348 2 10 9 C E
349 >>> G = nx.from_pandas_edgelist(df, 0, "b", ["weight", "cost"])
350 >>> G["E"]["C"]["weight"]
351 10
352 >>> G["E"]["C"]["cost"]
353 9
354 >>> edges = pd.DataFrame(
355 ... {
356 ... "source": [0, 1, 2],
357 ... "target": [2, 2, 3],
358 ... "weight": [3, 4, 5],
359 ... "color": ["red", "blue", "blue"],
360 ... }
361 ... )
362 >>> G = nx.from_pandas_edgelist(edges, edge_attr=True)
363 >>> G[0][2]["color"]
364 'red'
365
366 Build multigraph with custom keys:
367
368 >>> edges = pd.DataFrame(
369 ... {
370 ... "source": [0, 1, 2, 0],
371 ... "target": [2, 2, 3, 2],
372 ... "my_edge_key": ["A", "B", "C", "D"],
373 ... "weight": [3, 4, 5, 6],
374 ... "color": ["red", "blue", "blue", "blue"],
375 ... }
376 ... )
377 >>> G = nx.from_pandas_edgelist(
378 ... edges,
379 ... edge_key="my_edge_key",
380 ... edge_attr=["weight", "color"],
381 ... create_using=nx.MultiGraph(),
382 ... )
383 >>> G[0][2]
384 AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}})
385
386
387 """
388 g = nx.empty_graph(0, create_using)
389
390 if edge_attr is None:
391 g.add_edges_from(zip(df[source], df[target]))
392 return g
393
394 reserved_columns = [source, target]
395
396 # Additional columns requested
397 attr_col_headings = []
398 attribute_data = []
399 if edge_attr is True:
400 attr_col_headings = [c for c in df.columns if c not in reserved_columns]
401 elif isinstance(edge_attr, (list, tuple)):
402 attr_col_headings = edge_attr
403 else:
404 attr_col_headings = [edge_attr]
405 if len(attr_col_headings) == 0:
406 raise nx.NetworkXError(
407 f"Invalid edge_attr argument: No columns found with name: {attr_col_headings}"
408 )
409
410 try:
411 attribute_data = zip(*[df[col] for col in attr_col_headings])
412 except (KeyError, TypeError) as e:
413 msg = f"Invalid edge_attr argument: {edge_attr}"
414 raise nx.NetworkXError(msg) from e
415
416 if g.is_multigraph():
417 # => append the edge keys from the df to the bundled data
418 if edge_key is not None:
419 try:
420 multigraph_edge_keys = df[edge_key]
421 attribute_data = zip(attribute_data, multigraph_edge_keys)
422 except (KeyError, TypeError) as e:
423 msg = f"Invalid edge_key argument: {edge_key}"
424 raise nx.NetworkXError(msg) from e
425
426 for s, t, attrs in zip(df[source], df[target], attribute_data):
427 if edge_key is not None:
428 attrs, multigraph_edge_key = attrs
429 key = g.add_edge(s, t, key=multigraph_edge_key)
430 else:
431 key = g.add_edge(s, t)
432
433 g[s][t][key].update(zip(attr_col_headings, attrs))
434 else:
435 for s, t, attrs in zip(df[source], df[target], attribute_data):
436 g.add_edge(s, t)
437 g[s][t].update(zip(attr_col_headings, attrs))
438
439 return g
440
441
442 def to_numpy_matrix(
443 G,
444 nodelist=None,
445 dtype=None,
446 order=None,
447 multigraph_weight=sum,
448 weight="weight",
449 nonedge=0.0,
450 ):
451 """Returns the graph adjacency matrix as a NumPy matrix.
452
453 Parameters
454 ----------
455 G : graph
456 The NetworkX graph used to construct the NumPy matrix.
457
458 nodelist : list, optional
459 The rows and columns are ordered according to the nodes in `nodelist`.
460 If `nodelist` is None, then the ordering is produced by G.nodes().
461
462 dtype : NumPy data type, optional
463 A valid single NumPy data type used to initialize the array.
464 This must be a simple type such as int or numpy.float64 and
465 not a compound data type (see to_numpy_recarray)
466 If None, then the NumPy default is used.
467
468 order : {'C', 'F'}, optional
469 Whether to store multidimensional data in C- or Fortran-contiguous
470 (row- or column-wise) order in memory. If None, then the NumPy default
471 is used.
472
473 multigraph_weight : {sum, min, max}, optional
474 An operator that determines how weights in multigraphs are handled.
475 The default is to sum the weights of the multiple edges.
476
477 weight : string or None optional (default = 'weight')
478 The edge attribute that holds the numerical value used for
479 the edge weight. If an edge does not have that attribute, then the
480 value 1 is used instead.
481
482 nonedge : float (default = 0.0)
483 The matrix values corresponding to nonedges are typically set to zero.
484 However, this could be undesirable if there are matrix values
485 corresponding to actual edges that also have the value zero. If so,
486 one might prefer nonedges to have some other value, such as nan.
487
488 Returns
489 -------
490 M : NumPy matrix
491 Graph adjacency matrix
492
493 See Also
494 --------
495 to_numpy_recarray, from_numpy_matrix
496
497 Notes
498 -----
499 For directed graphs, entry i,j corresponds to an edge from i to j.
500
501 The matrix entries are assigned to the weight edge attribute. When
502 an edge does not have a weight attribute, the value of the entry is set to
503 the number 1. For multiple (parallel) edges, the values of the entries
504 are determined by the `multigraph_weight` parameter. The default is to
505 sum the weight attributes for each of the parallel edges.
506
507 When `nodelist` does not contain every node in `G`, the matrix is built
508 from the subgraph of `G` that is induced by the nodes in `nodelist`.
509
510 The convention used for self-loop edges in graphs is to assign the
511 diagonal matrix entry value to the weight attribute of the edge
512 (or the number 1 if the edge has no weight attribute). If the
513 alternate convention of doubling the edge weight is desired the
514 resulting Numpy matrix can be modified as follows:
515
516 >>> import numpy as np
517 >>> G = nx.Graph([(1, 1)])
518 >>> A = nx.to_numpy_matrix(G)
519 >>> A
520 matrix([[1.]])
521 >>> A[np.diag_indices_from(A)] *= 2
522 >>> A
523 matrix([[2.]])
524
525 Examples
526 --------
527 >>> G = nx.MultiDiGraph()
528 >>> G.add_edge(0, 1, weight=2)
529 0
530 >>> G.add_edge(1, 0)
531 0
532 >>> G.add_edge(2, 2, weight=3)
533 0
534 >>> G.add_edge(2, 2)
535 1
536 >>> nx.to_numpy_matrix(G, nodelist=[0, 1, 2])
537 matrix([[0., 2., 0.],
538 [1., 0., 0.],
539 [0., 0., 4.]])
540
541 """
542 import numpy as np
543
544 A = to_numpy_array(
545 G,
546 nodelist=nodelist,
547 dtype=dtype,
548 order=order,
549 multigraph_weight=multigraph_weight,
550 weight=weight,
551 nonedge=nonedge,
552 )
553 M = np.asmatrix(A, dtype=dtype)
554 return M
555
556
557 def from_numpy_matrix(A, parallel_edges=False, create_using=None):
558 """Returns a graph from numpy matrix.
559
560 The numpy matrix is interpreted as an adjacency matrix for the graph.
561
562 Parameters
563 ----------
564 A : numpy matrix
565 An adjacency matrix representation of a graph
566
567 parallel_edges : Boolean
568 If True, `create_using` is a multigraph, and `A` is an
569 integer matrix, then entry *(i, j)* in the matrix is interpreted as the
570 number of parallel edges joining vertices *i* and *j* in the graph.
571 If False, then the entries in the adjacency matrix are interpreted as
572 the weight of a single edge joining the vertices.
573
574 create_using : NetworkX graph constructor, optional (default=nx.Graph)
575 Graph type to create. If graph instance, then cleared before populated.
576
577 Notes
578 -----
579 For directed graphs, explicitly mention create_using=nx.DiGraph,
580 and entry i,j of A corresponds to an edge from i to j.
581
582 If `create_using` is :class:`networkx.MultiGraph` or
583 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
584 entries of `A` are of type :class:`int`, then this function returns a
585 multigraph (constructed from `create_using`) with parallel edges.
586
587 If `create_using` indicates an undirected multigraph, then only the edges
588 indicated by the upper triangle of the matrix `A` will be added to the
589 graph.
590
591 If the numpy matrix has a single data type for each matrix entry it
592 will be converted to an appropriate Python data type.
593
594 If the numpy matrix has a user-specified compound data type the names
595 of the data fields will be used as attribute keys in the resulting
596 NetworkX graph.
597
598 See Also
599 --------
600 to_numpy_matrix, to_numpy_recarray
601
602 Examples
603 --------
604 Simple integer weights on edges:
605
606 >>> import numpy as np
607 >>> A = np.array([[1, 1], [2, 1]])
608 >>> G = nx.from_numpy_matrix(A)
609
610 If `create_using` indicates a multigraph and the matrix has only integer
611 entries and `parallel_edges` is False, then the entries will be treated
612 as weights for edges joining the nodes (without creating parallel edges):
613
614 >>> A = np.array([[1, 1], [1, 2]])
615 >>> G = nx.from_numpy_matrix(A, create_using=nx.MultiGraph)
616 >>> G[1][1]
617 AtlasView({0: {'weight': 2}})
618
619 If `create_using` indicates a multigraph and the matrix has only integer
620 entries and `parallel_edges` is True, then the entries will be treated
621 as the number of parallel edges joining those two vertices:
622
623 >>> A = np.array([[1, 1], [1, 2]])
624 >>> temp = nx.MultiGraph()
625 >>> G = nx.from_numpy_matrix(A, parallel_edges=True, create_using=temp)
626 >>> G[1][1]
627 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
628
629 User defined compound data type on edges:
630
631 >>> dt = [("weight", float), ("cost", int)]
632 >>> A = np.array([[(1.0, 2)]], dtype=dt)
633 >>> G = nx.from_numpy_matrix(A)
634 >>> list(G.edges())
635 [(0, 0)]
636 >>> G[0][0]["cost"]
637 2
638 >>> G[0][0]["weight"]
639 1.0
640
641 """
642 # This should never fail if you have created a numpy matrix with numpy...
643 import numpy as np
644
645 kind_to_python_type = {
646 "f": float,
647 "i": int,
648 "u": int,
649 "b": bool,
650 "c": complex,
651 "S": str,
652 "V": "void",
653 }
654 kind_to_python_type["U"] = str
655 G = nx.empty_graph(0, create_using)
656 n, m = A.shape
657 if n != m:
658 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
659 dt = A.dtype
660 try:
661 python_type = kind_to_python_type[dt.kind]
662 except Exception as e:
663 raise TypeError(f"Unknown numpy data type: {dt}") from e
664
665 # Make sure we get even the isolated nodes of the graph.
666 G.add_nodes_from(range(n))
667 # Get a list of all the entries in the matrix with nonzero entries. These
668 # coordinates become edges in the graph. (convert to int from np.int64)
669 edges = ((int(e[0]), int(e[1])) for e in zip(*np.asarray(A).nonzero()))
670 # handle numpy constructed data type
671 if python_type == "void":
672 # Sort the fields by their offset, then by dtype, then by name.
673 fields = sorted(
674 (offset, dtype, name) for name, (dtype, offset) in A.dtype.fields.items()
675 )
676 triples = (
677 (
678 u,
679 v,
680 {
681 name: kind_to_python_type[dtype.kind](val)
682 for (_, dtype, name), val in zip(fields, A[u, v])
683 },
684 )
685 for u, v in edges
686 )
687 # If the entries in the adjacency matrix are integers, the graph is a
688 # multigraph, and parallel_edges is True, then create parallel edges, each
689 # with weight 1, for each entry in the adjacency matrix. Otherwise, create
690 # one edge for each positive entry in the adjacency matrix and set the
691 # weight of that edge to be the entry in the matrix.
692 elif python_type is int and G.is_multigraph() and parallel_edges:
693 chain = itertools.chain.from_iterable
694 # The following line is equivalent to:
695 #
696 # for (u, v) in edges:
697 # for d in range(A[u, v]):
698 # G.add_edge(u, v, weight=1)
699 #
700 triples = chain(
701 ((u, v, {"weight": 1}) for d in range(A[u, v])) for (u, v) in edges
702 )
703 else: # basic data type
704 triples = ((u, v, dict(weight=python_type(A[u, v]))) for u, v in edges)
705 # If we are creating an undirected multigraph, only add the edges from the
706 # upper triangle of the matrix. Otherwise, add all the edges. This relies
707 # on the fact that the vertices created in the
708 # `_generated_weighted_edges()` function are actually the row/column
709 # indices for the matrix `A`.
710 #
711 # Without this check, we run into a problem where each edge is added twice
712 # when `G.add_edges_from()` is invoked below.
713 if G.is_multigraph() and not G.is_directed():
714 triples = ((u, v, d) for u, v, d in triples if u <= v)
715 G.add_edges_from(triples)
716 return G
717
718
719 @not_implemented_for("multigraph")
720 def to_numpy_recarray(G, nodelist=None, dtype=None, order=None):
721 """Returns the graph adjacency matrix as a NumPy recarray.
722
723 Parameters
724 ----------
725 G : graph
726 The NetworkX graph used to construct the NumPy recarray.
727
728 nodelist : list, optional
729 The rows and columns are ordered according to the nodes in `nodelist`.
730 If `nodelist` is None, then the ordering is produced by G.nodes().
731
732 dtype : NumPy data-type, optional
733 A valid NumPy named dtype used to initialize the NumPy recarray.
734 The data type names are assumed to be keys in the graph edge attribute
735 dictionary.
736
737 order : {'C', 'F'}, optional
738 Whether to store multidimensional data in C- or Fortran-contiguous
739 (row- or column-wise) order in memory. If None, then the NumPy default
740 is used.
741
742 Returns
743 -------
744 M : NumPy recarray
745 The graph with specified edge data as a Numpy recarray
746
747 Notes
748 -----
749 When `nodelist` does not contain every node in `G`, the adjacency
750 matrix is built from the subgraph of `G` that is induced by the nodes in
751 `nodelist`.
752
753 Examples
754 --------
755 >>> G = nx.Graph()
756 >>> G.add_edge(1, 2, weight=7.0, cost=5)
757 >>> A = nx.to_numpy_recarray(G, dtype=[("weight", float), ("cost", int)])
758 >>> print(A.weight)
759 [[0. 7.]
760 [7. 0.]]
761 >>> print(A.cost)
762 [[0 5]
763 [5 0]]
764
765 """
766 if dtype is None:
767 dtype = [("weight", float)]
768 import numpy as np
769
770 if nodelist is None:
771 nodelist = list(G)
772 nodeset = set(nodelist)
773 if len(nodelist) != len(nodeset):
774 msg = "Ambiguous ordering: `nodelist` contained duplicates."
775 raise nx.NetworkXError(msg)
776 nlen = len(nodelist)
777 undirected = not G.is_directed()
778 index = dict(zip(nodelist, range(nlen)))
779 M = np.zeros((nlen, nlen), dtype=dtype, order=order)
780
781 names = M.dtype.names
782 for u, v, attrs in G.edges(data=True):
783 if (u in nodeset) and (v in nodeset):
784 i, j = index[u], index[v]
785 values = tuple([attrs[n] for n in names])
786 M[i, j] = values
787 if undirected:
788 M[j, i] = M[i, j]
789
790 return M.view(np.recarray)
791
792
793 def to_scipy_sparse_matrix(G, nodelist=None, dtype=None, weight="weight", format="csr"):
794 """Returns the graph adjacency matrix as a SciPy sparse matrix.
795
796 Parameters
797 ----------
798 G : graph
799 The NetworkX graph used to construct the sparse matrix.
800
801 nodelist : list, optional
802 The rows and columns are ordered according to the nodes in `nodelist`.
803 If `nodelist` is None, then the ordering is produced by G.nodes().
804
805 dtype : NumPy data-type, optional
806 A valid NumPy dtype used to initialize the array. If None, then the
807 NumPy default is used.
808
809 weight : string or None optional (default='weight')
810 The edge attribute that holds the numerical value used for
811 the edge weight. If None then all edge weights are 1.
812
813 format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
814 The type of the matrix to be returned (default 'csr'). For
815 some algorithms different implementations of sparse matrices
816 can perform better. See [1]_ for details.
817
818 Returns
819 -------
820 M : SciPy sparse matrix
821 Graph adjacency matrix.
822
823 Notes
824 -----
825 For directed graphs, matrix entry i,j corresponds to an edge from i to j.
826
827 The matrix entries are populated using the edge attribute held in
828 parameter weight. When an edge does not have that attribute, the
829 value of the entry is 1.
830
831 For multiple edges the matrix values are the sums of the edge weights.
832
833 When `nodelist` does not contain every node in `G`, the adjacency matrix
834 is built from the subgraph of `G` that is induced by the nodes in
835 `nodelist`.
836
837 The convention used for self-loop edges in graphs is to assign the
838 diagonal matrix entry value to the weight attribute of the edge
839 (or the number 1 if the edge has no weight attribute). If the
840 alternate convention of doubling the edge weight is desired the
841 resulting Scipy sparse matrix can be modified as follows:
842
843 >>> import scipy as sp
844 >>> G = nx.Graph([(1, 1)])
845 >>> A = nx.to_scipy_sparse_matrix(G)
846 >>> print(A.todense())
847 [[1]]
848 >>> A.setdiag(A.diagonal() * 2)
849 >>> print(A.todense())
850 [[2]]
851
852 Examples
853 --------
854 >>> G = nx.MultiDiGraph()
855 >>> G.add_edge(0, 1, weight=2)
856 0
857 >>> G.add_edge(1, 0)
858 0
859 >>> G.add_edge(2, 2, weight=3)
860 0
861 >>> G.add_edge(2, 2)
862 1
863 >>> S = nx.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2])
864 >>> print(S.todense())
865 [[0 2 0]
866 [1 0 0]
867 [0 0 4]]
868
869 References
870 ----------
871 .. [1] Scipy Dev. References, "Sparse Matrices",
872 https://docs.scipy.org/doc/scipy/reference/sparse.html
873 """
874 from scipy import sparse
875
876 if nodelist is None:
877 nodelist = list(G)
878 nlen = len(nodelist)
879 if nlen == 0:
880 raise nx.NetworkXError("Graph has no nodes or edges")
881
882 if len(nodelist) != len(set(nodelist)):
883 msg = "Ambiguous ordering: `nodelist` contained duplicates."
884 raise nx.NetworkXError(msg)
885
886 index = dict(zip(nodelist, range(nlen)))
887 coefficients = zip(
888 *(
889 (index[u], index[v], d.get(weight, 1))
890 for u, v, d in G.edges(nodelist, data=True)
891 if u in index and v in index
892 )
893 )
894 try:
895 row, col, data = coefficients
896 except ValueError:
897 # there is no edge in the subgraph
898 row, col, data = [], [], []
899
900 if G.is_directed():
901 M = sparse.coo_matrix((data, (row, col)), shape=(nlen, nlen), dtype=dtype)
902 else:
903 # symmetrize matrix
904 d = data + data
905 r = row + col
906 c = col + row
907 # selfloop entries get double counted when symmetrizing
908 # so we subtract the data on the diagonal
909 selfloops = list(nx.selfloop_edges(G.subgraph(nodelist), data=True))
910 if selfloops:
911 diag_index, diag_data = zip(
912 *(
913 (index[u], -d.get(weight, 1))
914 for u, v, d in selfloops
915 if u in index and v in index
916 )
917 )
918 d += diag_data
919 r += diag_index
920 c += diag_index
921 M = sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
922 try:
923 return M.asformat(format)
924 # From Scipy 1.1.0, asformat will throw a ValueError instead of an
925 # AttributeError if the format if not recognized.
926 except (AttributeError, ValueError) as e:
927 raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from e
928
929
930 def _csr_gen_triples(A):
931 """Converts a SciPy sparse matrix in **Compressed Sparse Row** format to
932 an iterable of weighted edge triples.
933
934 """
935 nrows = A.shape[0]
936 data, indices, indptr = A.data, A.indices, A.indptr
937 for i in range(nrows):
938 for j in range(indptr[i], indptr[i + 1]):
939 yield i, indices[j], data[j]
940
941
942 def _csc_gen_triples(A):
943 """Converts a SciPy sparse matrix in **Compressed Sparse Column** format to
944 an iterable of weighted edge triples.
945
946 """
947 ncols = A.shape[1]
948 data, indices, indptr = A.data, A.indices, A.indptr
949 for i in range(ncols):
950 for j in range(indptr[i], indptr[i + 1]):
951 yield indices[j], i, data[j]
952
953
954 def _coo_gen_triples(A):
955 """Converts a SciPy sparse matrix in **Coordinate** format to an iterable
956 of weighted edge triples.
957
958 """
959 row, col, data = A.row, A.col, A.data
960 return zip(row, col, data)
961
962
963 def _dok_gen_triples(A):
964 """Converts a SciPy sparse matrix in **Dictionary of Keys** format to an
965 iterable of weighted edge triples.
966
967 """
968 for (r, c), v in A.items():
969 yield r, c, v
970
971
972 def _generate_weighted_edges(A):
973 """Returns an iterable over (u, v, w) triples, where u and v are adjacent
974 vertices and w is the weight of the edge joining u and v.
975
976 `A` is a SciPy sparse matrix (in any format).
977
978 """
979 if A.format == "csr":
980 return _csr_gen_triples(A)
981 if A.format == "csc":
982 return _csc_gen_triples(A)
983 if A.format == "dok":
984 return _dok_gen_triples(A)
985 # If A is in any other format (including COO), convert it to COO format.
986 return _coo_gen_triples(A.tocoo())
987
988
989 def from_scipy_sparse_matrix(
990 A, parallel_edges=False, create_using=None, edge_attribute="weight"
991 ):
992 """Creates a new graph from an adjacency matrix given as a SciPy sparse
993 matrix.
994
995 Parameters
996 ----------
997 A: scipy sparse matrix
998 An adjacency matrix representation of a graph
999
1000 parallel_edges : Boolean
1001 If this is True, `create_using` is a multigraph, and `A` is an
1002 integer matrix, then entry *(i, j)* in the matrix is interpreted as the
1003 number of parallel edges joining vertices *i* and *j* in the graph.
1004 If it is False, then the entries in the matrix are interpreted as
1005 the weight of a single edge joining the vertices.
1006
1007 create_using : NetworkX graph constructor, optional (default=nx.Graph)
1008 Graph type to create. If graph instance, then cleared before populated.
1009
1010 edge_attribute: string
1011 Name of edge attribute to store matrix numeric value. The data will
1012 have the same type as the matrix entry (int, float, (real,imag)).
1013
1014 Notes
1015 -----
1016 For directed graphs, explicitly mention create_using=nx.DiGraph,
1017 and entry i,j of A corresponds to an edge from i to j.
1018
1019 If `create_using` is :class:`networkx.MultiGraph` or
1020 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
1021 entries of `A` are of type :class:`int`, then this function returns a
1022 multigraph (constructed from `create_using`) with parallel edges.
1023 In this case, `edge_attribute` will be ignored.
1024
1025 If `create_using` indicates an undirected multigraph, then only the edges
1026 indicated by the upper triangle of the matrix `A` will be added to the
1027 graph.
1028
1029 Examples
1030 --------
1031 >>> import scipy as sp
1032 >>> A = sp.sparse.eye(2, 2, 1)
1033 >>> G = nx.from_scipy_sparse_matrix(A)
1034
1035 If `create_using` indicates a multigraph and the matrix has only integer
1036 entries and `parallel_edges` is False, then the entries will be treated
1037 as weights for edges joining the nodes (without creating parallel edges):
1038
1039 >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]])
1040 >>> G = nx.from_scipy_sparse_matrix(A, create_using=nx.MultiGraph)
1041 >>> G[1][1]
1042 AtlasView({0: {'weight': 2}})
1043
1044 If `create_using` indicates a multigraph and the matrix has only integer
1045 entries and `parallel_edges` is True, then the entries will be treated
1046 as the number of parallel edges joining those two vertices:
1047
1048 >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]])
1049 >>> G = nx.from_scipy_sparse_matrix(
1050 ... A, parallel_edges=True, create_using=nx.MultiGraph
1051 ... )
1052 >>> G[1][1]
1053 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
1054
1055 """
1056 G = nx.empty_graph(0, create_using)
1057 n, m = A.shape
1058 if n != m:
1059 raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
1060 # Make sure we get even the isolated nodes of the graph.
1061 G.add_nodes_from(range(n))
1062 # Create an iterable over (u, v, w) triples and for each triple, add an
1063 # edge from u to v with weight w.
1064 triples = _generate_weighted_edges(A)
1065 # If the entries in the adjacency matrix are integers, the graph is a
1066 # multigraph, and parallel_edges is True, then create parallel edges, each
1067 # with weight 1, for each entry in the adjacency matrix. Otherwise, create
1068 # one edge for each positive entry in the adjacency matrix and set the
1069 # weight of that edge to be the entry in the matrix.
1070 if A.dtype.kind in ("i", "u") and G.is_multigraph() and parallel_edges:
1071 chain = itertools.chain.from_iterable
1072 # The following line is equivalent to:
1073 #
1074 # for (u, v) in edges:
1075 # for d in range(A[u, v]):
1076 # G.add_edge(u, v, weight=1)
1077 #
1078 triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples)
1079 # If we are creating an undirected multigraph, only add the edges from the
1080 # upper triangle of the matrix. Otherwise, add all the edges. This relies
1081 # on the fact that the vertices created in the
1082 # `_generated_weighted_edges()` function are actually the row/column
1083 # indices for the matrix `A`.
1084 #
1085 # Without this check, we run into a problem where each edge is added twice
1086 # when `G.add_weighted_edges_from()` is invoked below.
1087 if G.is_multigraph() and not G.is_directed():
1088 triples = ((u, v, d) for u, v, d in triples if u <= v)
1089 G.add_weighted_edges_from(triples, weight=edge_attribute)
1090 return G
1091
1092
1093 def to_numpy_array(
1094 G,
1095 nodelist=None,
1096 dtype=None,
1097 order=None,
1098 multigraph_weight=sum,
1099 weight="weight",
1100 nonedge=0.0,
1101 ):
1102 """Returns the graph adjacency matrix as a NumPy array.
1103
1104 Parameters
1105 ----------
1106 G : graph
1107 The NetworkX graph used to construct the NumPy array.
1108
1109 nodelist : list, optional
1110 The rows and columns are ordered according to the nodes in `nodelist`.
1111 If `nodelist` is None, then the ordering is produced by G.nodes().
1112
1113 dtype : NumPy data type, optional
1114 A valid single NumPy data type used to initialize the array.
1115 This must be a simple type such as int or numpy.float64 and
1116 not a compound data type (see to_numpy_recarray)
1117 If None, then the NumPy default is used.
1118
1119 order : {'C', 'F'}, optional
1120 Whether to store multidimensional data in C- or Fortran-contiguous
1121 (row- or column-wise) order in memory. If None, then the NumPy default
1122 is used.
1123
1124 multigraph_weight : {sum, min, max}, optional
1125 An operator that determines how weights in multigraphs are handled.
1126 The default is to sum the weights of the multiple edges.
1127
1128 weight : string or None optional (default = 'weight')
1129 The edge attribute that holds the numerical value used for
1130 the edge weight. If an edge does not have that attribute, then the
1131 value 1 is used instead.
1132
1133 nonedge : float (default = 0.0)
1134 The array values corresponding to nonedges are typically set to zero.
1135 However, this could be undesirable if there are array values
1136 corresponding to actual edges that also have the value zero. If so,
1137 one might prefer nonedges to have some other value, such as nan.
1138
1139 Returns
1140 -------
1141 A : NumPy ndarray
1142 Graph adjacency matrix
1143
1144 See Also
1145 --------
1146 from_numpy_array
1147
1148 Notes
1149 -----
1150 For directed graphs, entry i,j corresponds to an edge from i to j.
1151
1152 Entries in the adjacency matrix are assigned to the weight edge attribute.
1153 When an edge does not have a weight attribute, the value of the entry is
1154 set to the number 1. For multiple (parallel) edges, the values of the
1155 entries are determined by the `multigraph_weight` parameter. The default is
1156 to sum the weight attributes for each of the parallel edges.
1157
1158 When `nodelist` does not contain every node in `G`, the adjacency matrix is
1159 built from the subgraph of `G` that is induced by the nodes in `nodelist`.
1160
1161 The convention used for self-loop edges in graphs is to assign the
1162 diagonal array entry value to the weight attribute of the edge
1163 (or the number 1 if the edge has no weight attribute). If the
1164 alternate convention of doubling the edge weight is desired the
1165 resulting NumPy array can be modified as follows:
1166
1167 >>> import numpy as np
1168 >>> G = nx.Graph([(1, 1)])
1169 >>> A = nx.to_numpy_array(G)
1170 >>> A
1171 array([[1.]])
1172 >>> A[np.diag_indices_from(A)] *= 2
1173 >>> A
1174 array([[2.]])
1175
1176 Examples
1177 --------
1178 >>> G = nx.MultiDiGraph()
1179 >>> G.add_edge(0, 1, weight=2)
1180 0
1181 >>> G.add_edge(1, 0)
1182 0
1183 >>> G.add_edge(2, 2, weight=3)
1184 0
1185 >>> G.add_edge(2, 2)
1186 1
1187 >>> nx.to_numpy_array(G, nodelist=[0, 1, 2])
1188 array([[0., 2., 0.],
1189 [1., 0., 0.],
1190 [0., 0., 4.]])
1191
1192 """
1193 import numpy as np
1194
1195 if nodelist is None:
1196 nodelist = list(G)
1197 nodeset = set(nodelist)
1198 if len(nodelist) != len(nodeset):
1199 msg = "Ambiguous ordering: `nodelist` contained duplicates."
1200 raise nx.NetworkXError(msg)
1201
1202 nlen = len(nodelist)
1203 undirected = not G.is_directed()
1204 index = dict(zip(nodelist, range(nlen)))
1205
1206 # Initially, we start with an array of nans. Then we populate the array
1207 # using data from the graph. Afterwards, any leftover nans will be
1208 # converted to the value of `nonedge`. Note, we use nans initially,
1209 # instead of zero, for two reasons:
1210 #
1211 # 1) It can be important to distinguish a real edge with the value 0
1212 # from a nonedge with the value 0.
1213 #
1214 # 2) When working with multi(di)graphs, we must combine the values of all
1215 # edges between any two nodes in some manner. This often takes the
1216 # form of a sum, min, or max. Using the value 0 for a nonedge would
1217 # have undesirable effects with min and max, but using nanmin and
1218 # nanmax with initially nan values is not problematic at all.
1219 #
1220 # That said, there are still some drawbacks to this approach. Namely, if
1221 # a real edge is nan, then that value is a) not distinguishable from
1222 # nonedges and b) is ignored by the default combinator (nansum, nanmin,
1223 # nanmax) functions used for multi(di)graphs. If this becomes an issue,
1224 # an alternative approach is to use masked arrays. Initially, every
1225 # element is masked and set to some `initial` value. As we populate the
1226 # graph, elements are unmasked (automatically) when we combine the initial
1227 # value with the values given by real edges. At the end, we convert all
1228 # masked values to `nonedge`. Using masked arrays fully addresses reason 1,
1229 # but for reason 2, we would still have the issue with min and max if the
1230 # initial values were 0.0. Note: an initial value of +inf is appropriate
1231 # for min, while an initial value of -inf is appropriate for max. When
1232 # working with sum, an initial value of zero is appropriate. Ideally then,
1233 # we'd want to allow users to specify both a value for nonedges and also
1234 # an initial value. For multi(di)graphs, the choice of the initial value
1235 # will, in general, depend on the combinator function---sensible defaults
1236 # can be provided.
1237
1238 if G.is_multigraph():
1239 # Handle MultiGraphs and MultiDiGraphs
1240 A = np.full((nlen, nlen), np.nan, order=order)
1241 # use numpy nan-aware operations
1242 operator = {sum: np.nansum, min: np.nanmin, max: np.nanmax}
1243 try:
1244 op = operator[multigraph_weight]
1245 except Exception as e:
1246 raise ValueError("multigraph_weight must be sum, min, or max") from e
1247
1248 for u, v, attrs in G.edges(data=True):
1249 if (u in nodeset) and (v in nodeset):
1250 i, j = index[u], index[v]
1251 e_weight = attrs.get(weight, 1)
1252 A[i, j] = op([e_weight, A[i, j]])
1253 if undirected:
1254 A[j, i] = A[i, j]
1255 else:
1256 # Graph or DiGraph, this is much faster than above
1257 A = np.full((nlen, nlen), np.nan, order=order)
1258 for u, nbrdict in G.adjacency():
1259 for v, d in nbrdict.items():
1260 try:
1261 A[index[u], index[v]] = d.get(weight, 1)
1262 except KeyError:
1263 # This occurs when there are fewer desired nodes than
1264 # there are nodes in the graph: len(nodelist) < len(G)
1265 pass
1266
1267 A[np.isnan(A)] = nonedge
1268 A = np.asarray(A, dtype=dtype)
1269 return A
1270
1271
1272 def from_numpy_array(A, parallel_edges=False, create_using=None):
1273 """Returns a graph from NumPy array.
1274
1275 The NumPy array is interpreted as an adjacency matrix for the graph.
1276
1277 Parameters
1278 ----------
1279 A : NumPy ndarray
1280 An adjacency matrix representation of a graph
1281
1282 parallel_edges : Boolean
1283 If this is True, `create_using` is a multigraph, and `A` is an
1284 integer array, then entry *(i, j)* in the array is interpreted as the
1285 number of parallel edges joining vertices *i* and *j* in the graph.
1286 If it is False, then the entries in the array are interpreted as
1287 the weight of a single edge joining the vertices.
1288
1289 create_using : NetworkX graph constructor, optional (default=nx.Graph)
1290 Graph type to create. If graph instance, then cleared before populated.
1291
1292 Notes
1293 -----
1294 For directed graphs, explicitly mention create_using=nx.DiGraph,
1295 and entry i,j of A corresponds to an edge from i to j.
1296
1297 If `create_using` is :class:`networkx.MultiGraph` or
1298 :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
1299 entries of `A` are of type :class:`int`, then this function returns a
1300 multigraph (of the same type as `create_using`) with parallel edges.
1301
1302 If `create_using` indicates an undirected multigraph, then only the edges
1303 indicated by the upper triangle of the array `A` will be added to the
1304 graph.
1305
1306 If the NumPy array has a single data type for each array entry it
1307 will be converted to an appropriate Python data type.
1308
1309 If the NumPy array has a user-specified compound data type the names
1310 of the data fields will be used as attribute keys in the resulting
1311 NetworkX graph.
1312
1313 See Also
1314 --------
1315 to_numpy_array
1316
1317 Examples
1318 --------
1319 Simple integer weights on edges:
1320
1321 >>> import numpy as np
1322 >>> A = np.array([[1, 1], [2, 1]])
1323 >>> G = nx.from_numpy_array(A)
1324 >>> G.edges(data=True)
1325 EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), \
1326 (1, 1, {'weight': 1})])
1327
1328 If `create_using` indicates a multigraph and the array has only integer
1329 entries and `parallel_edges` is False, then the entries will be treated
1330 as weights for edges joining the nodes (without creating parallel edges):
1331
1332 >>> A = np.array([[1, 1], [1, 2]])
1333 >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph)
1334 >>> G[1][1]
1335 AtlasView({0: {'weight': 2}})
1336
1337 If `create_using` indicates a multigraph and the array has only integer
1338 entries and `parallel_edges` is True, then the entries will be treated
1339 as the number of parallel edges joining those two vertices:
1340
1341 >>> A = np.array([[1, 1], [1, 2]])
1342 >>> temp = nx.MultiGraph()
1343 >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp)
1344 >>> G[1][1]
1345 AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
1346
1347 User defined compound data type on edges:
1348
1349 >>> dt = [("weight", float), ("cost", int)]
1350 >>> A = np.array([[(1.0, 2)]], dtype=dt)
1351 >>> G = nx.from_numpy_array(A)
1352 >>> G.edges()
1353 EdgeView([(0, 0)])
1354 >>> G[0][0]["cost"]
1355 2
1356 >>> G[0][0]["weight"]
1357 1.0
1358
1359 """
1360 return from_numpy_matrix(
1361 A, parallel_edges=parallel_edges, create_using=create_using
1362 )