comparison env/lib/python3.9/site-packages/networkx/readwrite/graphml.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 *******
3 GraphML
4 *******
5 Read and write graphs in GraphML format.
6
7 This implementation does not support mixed graphs (directed and unidirected
8 edges together), hyperedges, nested graphs, or ports.
9
10 "GraphML is a comprehensive and easy-to-use file format for graphs. It
11 consists of a language core to describe the structural properties of a
12 graph and a flexible extension mechanism to add application-specific
13 data. Its main features include support of
14
15 * directed, undirected, and mixed graphs,
16 * hypergraphs,
17 * hierarchical graphs,
18 * graphical representations,
19 * references to external data,
20 * application-specific attribute data, and
21 * light-weight parsers.
22
23 Unlike many other file formats for graphs, GraphML does not use a
24 custom syntax. Instead, it is based on XML and hence ideally suited as
25 a common denominator for all kinds of services generating, archiving,
26 or processing graphs."
27
28 http://graphml.graphdrawing.org/
29
30 Format
31 ------
32 GraphML is an XML format. See
33 http://graphml.graphdrawing.org/specification.html for the specification and
34 http://graphml.graphdrawing.org/primer/graphml-primer.html
35 for examples.
36 """
37 import warnings
38 from collections import defaultdict
39
40 from xml.etree.ElementTree import Element, ElementTree, tostring, fromstring
41
42 try:
43 import lxml.etree as lxmletree
44 except ImportError:
45 lxmletree = None
46
47 import networkx as nx
48 from networkx.utils import open_file
49
50 __all__ = [
51 "write_graphml",
52 "read_graphml",
53 "generate_graphml",
54 "write_graphml_xml",
55 "write_graphml_lxml",
56 "parse_graphml",
57 "GraphMLWriter",
58 "GraphMLReader",
59 ]
60
61
62 @open_file(1, mode="wb")
63 def write_graphml_xml(
64 G,
65 path,
66 encoding="utf-8",
67 prettyprint=True,
68 infer_numeric_types=False,
69 named_key_ids=False,
70 ):
71 """Write G in GraphML XML format to path
72
73 Parameters
74 ----------
75 G : graph
76 A networkx graph
77 path : file or string
78 File or filename to write.
79 Filenames ending in .gz or .bz2 will be compressed.
80 encoding : string (optional)
81 Encoding for text data.
82 prettyprint : bool (optional)
83 If True use line breaks and indenting in output XML.
84 infer_numeric_types : boolean
85 Determine if numeric types should be generalized.
86 For example, if edges have both int and float 'weight' attributes,
87 we infer in GraphML that both are floats.
88 named_key_ids : bool (optional)
89 If True use attr.name as value for key elements' id attribute.
90
91 Examples
92 --------
93 >>> G = nx.path_graph(4)
94 >>> nx.write_graphml(G, "test.graphml")
95
96 Notes
97 -----
98 This implementation does not support mixed graphs (directed
99 and unidirected edges together) hyperedges, nested graphs, or ports.
100 """
101 writer = GraphMLWriter(
102 encoding=encoding,
103 prettyprint=prettyprint,
104 infer_numeric_types=infer_numeric_types,
105 named_key_ids=named_key_ids,
106 )
107 writer.add_graph_element(G)
108 writer.dump(path)
109
110
111 @open_file(1, mode="wb")
112 def write_graphml_lxml(
113 G,
114 path,
115 encoding="utf-8",
116 prettyprint=True,
117 infer_numeric_types=False,
118 named_key_ids=False,
119 ):
120 """Write G in GraphML XML format to path
121
122 This function uses the LXML framework and should be faster than
123 the version using the xml library.
124
125 Parameters
126 ----------
127 G : graph
128 A networkx graph
129 path : file or string
130 File or filename to write.
131 Filenames ending in .gz or .bz2 will be compressed.
132 encoding : string (optional)
133 Encoding for text data.
134 prettyprint : bool (optional)
135 If True use line breaks and indenting in output XML.
136 infer_numeric_types : boolean
137 Determine if numeric types should be generalized.
138 For example, if edges have both int and float 'weight' attributes,
139 we infer in GraphML that both are floats.
140 named_key_ids : bool (optional)
141 If True use attr.name as value for key elements' id attribute.
142
143 Examples
144 --------
145 >>> G = nx.path_graph(4)
146 >>> nx.write_graphml_lxml(G, "fourpath.graphml") # doctest: +SKIP
147
148 Notes
149 -----
150 This implementation does not support mixed graphs (directed
151 and unidirected edges together) hyperedges, nested graphs, or ports.
152 """
153 writer = GraphMLWriterLxml(
154 path,
155 graph=G,
156 encoding=encoding,
157 prettyprint=prettyprint,
158 infer_numeric_types=infer_numeric_types,
159 named_key_ids=named_key_ids,
160 )
161 writer.dump()
162
163
164 def generate_graphml(G, encoding="utf-8", prettyprint=True, named_key_ids=False):
165 """Generate GraphML lines for G
166
167 Parameters
168 ----------
169 G : graph
170 A networkx graph
171 encoding : string (optional)
172 Encoding for text data.
173 prettyprint : bool (optional)
174 If True use line breaks and indenting in output XML.
175 named_key_ids : bool (optional)
176 If True use attr.name as value for key elements' id attribute.
177
178 Examples
179 --------
180 >>> G = nx.path_graph(4)
181 >>> linefeed = chr(10) # linefeed = \n
182 >>> s = linefeed.join(nx.generate_graphml(G)) # doctest: +SKIP
183 >>> for line in nx.generate_graphml(G): # doctest: +SKIP
184 ... print(line)
185
186 Notes
187 -----
188 This implementation does not support mixed graphs (directed and unidirected
189 edges together) hyperedges, nested graphs, or ports.
190 """
191 writer = GraphMLWriter(
192 encoding=encoding, prettyprint=prettyprint, named_key_ids=named_key_ids
193 )
194 writer.add_graph_element(G)
195 yield from str(writer).splitlines()
196
197
198 @open_file(0, mode="rb")
199 def read_graphml(path, node_type=str, edge_key_type=int, force_multigraph=False):
200 """Read graph in GraphML format from path.
201
202 Parameters
203 ----------
204 path : file or string
205 File or filename to write.
206 Filenames ending in .gz or .bz2 will be compressed.
207
208 node_type: Python type (default: str)
209 Convert node ids to this type
210
211 edge_key_type: Python type (default: int)
212 Convert graphml edge ids to this type. Multigraphs use id as edge key.
213 Non-multigraphs add to edge attribute dict with name "id".
214
215 force_multigraph : bool (default: False)
216 If True, return a multigraph with edge keys. If False (the default)
217 return a multigraph when multiedges are in the graph.
218
219 Returns
220 -------
221 graph: NetworkX graph
222 If parallel edges are present or `force_multigraph=True` then
223 a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph.
224 The returned graph is directed if the file indicates it should be.
225
226 Notes
227 -----
228 Default node and edge attributes are not propagated to each node and edge.
229 They can be obtained from `G.graph` and applied to node and edge attributes
230 if desired using something like this:
231
232 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
233 >>> for node, data in G.nodes(data=True): # doctest: +SKIP
234 ... if "color" not in data:
235 ... data["color"] = default_color
236 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
237 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
238 ... if "color" not in data:
239 ... data["color"] = default_color
240
241 This implementation does not support mixed graphs (directed and unidirected
242 edges together), hypergraphs, nested graphs, or ports.
243
244 For multigraphs the GraphML edge "id" will be used as the edge
245 key. If not specified then they "key" attribute will be used. If
246 there is no "key" attribute a default NetworkX multigraph edge key
247 will be provided.
248
249 Files with the yEd "yfiles" extension will can be read but the graphics
250 information is discarded.
251
252 yEd compressed files ("file.graphmlz" extension) can be read by renaming
253 the file to "file.graphml.gz".
254
255 """
256 reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
257 # need to check for multiple graphs
258 glist = list(reader(path=path))
259 if len(glist) == 0:
260 # If no graph comes back, try looking for an incomplete header
261 header = b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
262 path.seek(0)
263 old_bytes = path.read()
264 new_bytes = old_bytes.replace(b"<graphml>", header)
265 glist = list(reader(string=new_bytes))
266 if len(glist) == 0:
267 raise nx.NetworkXError("file not successfully read as graphml")
268 return glist[0]
269
270
271 def parse_graphml(
272 graphml_string, node_type=str, edge_key_type=int, force_multigraph=False
273 ):
274 """Read graph in GraphML format from string.
275
276 Parameters
277 ----------
278 graphml_string : string
279 String containing graphml information
280 (e.g., contents of a graphml file).
281
282 node_type: Python type (default: str)
283 Convert node ids to this type
284
285 edge_key_type: Python type (default: int)
286 Convert graphml edge ids to this type. Multigraphs use id as edge key.
287 Non-multigraphs add to edge attribute dict with name "id".
288
289 force_multigraph : bool (default: False)
290 If True, return a multigraph with edge keys. If False (the default)
291 return a multigraph when multiedges are in the graph.
292
293
294 Returns
295 -------
296 graph: NetworkX graph
297 If no parallel edges are found a Graph or DiGraph is returned.
298 Otherwise a MultiGraph or MultiDiGraph is returned.
299
300 Examples
301 --------
302 >>> G = nx.path_graph(4)
303 >>> linefeed = chr(10) # linefeed = \n
304 >>> s = linefeed.join(nx.generate_graphml(G))
305 >>> H = nx.parse_graphml(s)
306
307 Notes
308 -----
309 Default node and edge attributes are not propagated to each node and edge.
310 They can be obtained from `G.graph` and applied to node and edge attributes
311 if desired using something like this:
312
313 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
314 >>> for node, data in G.nodes(data=True): # doctest: +SKIP
315 ... if "color" not in data:
316 ... data["color"] = default_color
317 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
318 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
319 ... if "color" not in data:
320 ... data["color"] = default_color
321
322 This implementation does not support mixed graphs (directed and unidirected
323 edges together), hypergraphs, nested graphs, or ports.
324
325 For multigraphs the GraphML edge "id" will be used as the edge
326 key. If not specified then they "key" attribute will be used. If
327 there is no "key" attribute a default NetworkX multigraph edge key
328 will be provided.
329
330 """
331 reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
332 # need to check for multiple graphs
333 glist = list(reader(string=graphml_string))
334 if len(glist) == 0:
335 # If no graph comes back, try looking for an incomplete header
336 header = '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
337 new_string = graphml_string.replace("<graphml>", header)
338 glist = list(reader(string=new_string))
339 if len(glist) == 0:
340 raise nx.NetworkXError("file not successfully read as graphml")
341 return glist[0]
342
343
344 class GraphML:
345 NS_GRAPHML = "http://graphml.graphdrawing.org/xmlns"
346 NS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
347 # xmlns:y="http://www.yworks.com/xml/graphml"
348 NS_Y = "http://www.yworks.com/xml/graphml"
349 SCHEMALOCATION = " ".join(
350 [
351 "http://graphml.graphdrawing.org/xmlns",
352 "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd",
353 ]
354 )
355
356 types = [
357 (int, "integer"), # for Gephi GraphML bug
358 (str, "yfiles"),
359 (str, "string"),
360 (int, "int"),
361 (float, "float"),
362 (float, "double"),
363 (bool, "boolean"),
364 ]
365
366 # These additions to types allow writing numpy types
367 try:
368 import numpy as np
369 except:
370 pass
371 else:
372 # prepend so that python types are created upon read (last entry wins)
373 types = [
374 (np.float64, "float"),
375 (np.float32, "float"),
376 (np.float16, "float"),
377 (np.float_, "float"),
378 (np.int_, "int"),
379 (np.int8, "int"),
380 (np.int16, "int"),
381 (np.int32, "int"),
382 (np.int64, "int"),
383 (np.uint8, "int"),
384 (np.uint16, "int"),
385 (np.uint32, "int"),
386 (np.uint64, "int"),
387 (np.int_, "int"),
388 (np.intc, "int"),
389 (np.intp, "int"),
390 ] + types
391
392 xml_type = dict(types)
393 python_type = dict(reversed(a) for a in types)
394
395 # This page says that data types in GraphML follow Java(TM).
396 # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition
397 # true and false are the only boolean literals:
398 # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals
399 convert_bool = {
400 # We use data.lower() in actual use.
401 "true": True,
402 "false": False,
403 # Include integer strings for convenience.
404 "0": False,
405 0: False,
406 "1": True,
407 1: True,
408 }
409
410
411 class GraphMLWriter(GraphML):
412 def __init__(
413 self,
414 graph=None,
415 encoding="utf-8",
416 prettyprint=True,
417 infer_numeric_types=False,
418 named_key_ids=False,
419 ):
420 self.myElement = Element
421
422 self.infer_numeric_types = infer_numeric_types
423 self.prettyprint = prettyprint
424 self.named_key_ids = named_key_ids
425 self.encoding = encoding
426 self.xml = self.myElement(
427 "graphml",
428 {
429 "xmlns": self.NS_GRAPHML,
430 "xmlns:xsi": self.NS_XSI,
431 "xsi:schemaLocation": self.SCHEMALOCATION,
432 },
433 )
434 self.keys = {}
435 self.attributes = defaultdict(list)
436 self.attribute_types = defaultdict(set)
437
438 if graph is not None:
439 self.add_graph_element(graph)
440
441 def __str__(self):
442 if self.prettyprint:
443 self.indent(self.xml)
444 s = tostring(self.xml).decode(self.encoding)
445 return s
446
447 def attr_type(self, name, scope, value):
448 """Infer the attribute type of data named name. Currently this only
449 supports inference of numeric types.
450
451 If self.infer_numeric_types is false, type is used. Otherwise, pick the
452 most general of types found across all values with name and scope. This
453 means edges with data named 'weight' are treated separately from nodes
454 with data named 'weight'.
455 """
456 if self.infer_numeric_types:
457 types = self.attribute_types[(name, scope)]
458
459 if len(types) > 1:
460 types = {self.xml_type[t] for t in types}
461 if "string" in types:
462 return str
463 elif "float" in types or "double" in types:
464 return float
465 else:
466 return int
467 else:
468 return list(types)[0]
469 else:
470 return type(value)
471
472 def get_key(self, name, attr_type, scope, default):
473 keys_key = (name, attr_type, scope)
474 try:
475 return self.keys[keys_key]
476 except KeyError:
477 if self.named_key_ids:
478 new_id = name
479 else:
480 new_id = f"d{len(list(self.keys))}"
481
482 self.keys[keys_key] = new_id
483 key_kwargs = {
484 "id": new_id,
485 "for": scope,
486 "attr.name": name,
487 "attr.type": attr_type,
488 }
489 key_element = self.myElement("key", **key_kwargs)
490 # add subelement for data default value if present
491 if default is not None:
492 default_element = self.myElement("default")
493 default_element.text = str(default)
494 key_element.append(default_element)
495 self.xml.insert(0, key_element)
496 return new_id
497
498 def add_data(self, name, element_type, value, scope="all", default=None):
499 """
500 Make a data element for an edge or a node. Keep a log of the
501 type in the keys table.
502 """
503 if element_type not in self.xml_type:
504 msg = f"GraphML writer does not support {element_type} as data values."
505 raise nx.NetworkXError(msg)
506 keyid = self.get_key(name, self.xml_type[element_type], scope, default)
507 data_element = self.myElement("data", key=keyid)
508 data_element.text = str(value)
509 return data_element
510
511 def add_attributes(self, scope, xml_obj, data, default):
512 """Appends attribute data to edges or nodes, and stores type information
513 to be added later. See add_graph_element.
514 """
515 for k, v in data.items():
516 self.attribute_types[(str(k), scope)].add(type(v))
517 self.attributes[xml_obj].append([k, v, scope, default.get(k)])
518
519 def add_nodes(self, G, graph_element):
520 default = G.graph.get("node_default", {})
521 for node, data in G.nodes(data=True):
522 node_element = self.myElement("node", id=str(node))
523 self.add_attributes("node", node_element, data, default)
524 graph_element.append(node_element)
525
526 def add_edges(self, G, graph_element):
527 if G.is_multigraph():
528 for u, v, key, data in G.edges(data=True, keys=True):
529 edge_element = self.myElement(
530 "edge", source=str(u), target=str(v), id=str(key)
531 )
532 default = G.graph.get("edge_default", {})
533 self.add_attributes("edge", edge_element, data, default)
534 graph_element.append(edge_element)
535 else:
536 for u, v, data in G.edges(data=True):
537 edge_element = self.myElement("edge", source=str(u), target=str(v))
538 default = G.graph.get("edge_default", {})
539 self.add_attributes("edge", edge_element, data, default)
540 graph_element.append(edge_element)
541
542 def add_graph_element(self, G):
543 """
544 Serialize graph G in GraphML to the stream.
545 """
546 if G.is_directed():
547 default_edge_type = "directed"
548 else:
549 default_edge_type = "undirected"
550
551 graphid = G.graph.pop("id", None)
552 if graphid is None:
553 graph_element = self.myElement("graph", edgedefault=default_edge_type)
554 else:
555 graph_element = self.myElement(
556 "graph", edgedefault=default_edge_type, id=graphid
557 )
558 default = {}
559 data = {
560 k: v
561 for (k, v) in G.graph.items()
562 if k not in ["node_default", "edge_default"]
563 }
564 self.add_attributes("graph", graph_element, data, default)
565 self.add_nodes(G, graph_element)
566 self.add_edges(G, graph_element)
567
568 # self.attributes contains a mapping from XML Objects to a list of
569 # data that needs to be added to them.
570 # We postpone processing in order to do type inference/generalization.
571 # See self.attr_type
572 for (xml_obj, data) in self.attributes.items():
573 for (k, v, scope, default) in data:
574 xml_obj.append(
575 self.add_data(
576 str(k), self.attr_type(k, scope, v), str(v), scope, default
577 )
578 )
579 self.xml.append(graph_element)
580
581 def add_graphs(self, graph_list):
582 """ Add many graphs to this GraphML document. """
583 for G in graph_list:
584 self.add_graph_element(G)
585
586 def dump(self, stream):
587 if self.prettyprint:
588 self.indent(self.xml)
589 document = ElementTree(self.xml)
590 document.write(stream, encoding=self.encoding, xml_declaration=True)
591
592 def indent(self, elem, level=0):
593 # in-place prettyprint formatter
594 i = "\n" + level * " "
595 if len(elem):
596 if not elem.text or not elem.text.strip():
597 elem.text = i + " "
598 if not elem.tail or not elem.tail.strip():
599 elem.tail = i
600 for elem in elem:
601 self.indent(elem, level + 1)
602 if not elem.tail or not elem.tail.strip():
603 elem.tail = i
604 else:
605 if level and (not elem.tail or not elem.tail.strip()):
606 elem.tail = i
607
608
609 class IncrementalElement:
610 """Wrapper for _IncrementalWriter providing an Element like interface.
611
612 This wrapper does not intend to be a complete implementation but rather to
613 deal with those calls used in GraphMLWriter.
614 """
615
616 def __init__(self, xml, prettyprint):
617 self.xml = xml
618 self.prettyprint = prettyprint
619
620 def append(self, element):
621 self.xml.write(element, pretty_print=self.prettyprint)
622
623
624 class GraphMLWriterLxml(GraphMLWriter):
625 def __init__(
626 self,
627 path,
628 graph=None,
629 encoding="utf-8",
630 prettyprint=True,
631 infer_numeric_types=False,
632 named_key_ids=False,
633 ):
634 self.myElement = lxmletree.Element
635
636 self._encoding = encoding
637 self._prettyprint = prettyprint
638 self.named_key_ids = named_key_ids
639 self.infer_numeric_types = infer_numeric_types
640
641 self._xml_base = lxmletree.xmlfile(path, encoding=encoding)
642 self._xml = self._xml_base.__enter__()
643 self._xml.write_declaration()
644
645 # We need to have a xml variable that support insertion. This call is
646 # used for adding the keys to the document.
647 # We will store those keys in a plain list, and then after the graph
648 # element is closed we will add them to the main graphml element.
649 self.xml = []
650 self._keys = self.xml
651 self._graphml = self._xml.element(
652 "graphml",
653 {
654 "xmlns": self.NS_GRAPHML,
655 "xmlns:xsi": self.NS_XSI,
656 "xsi:schemaLocation": self.SCHEMALOCATION,
657 },
658 )
659 self._graphml.__enter__()
660 self.keys = {}
661 self.attribute_types = defaultdict(set)
662
663 if graph is not None:
664 self.add_graph_element(graph)
665
666 def add_graph_element(self, G):
667 """
668 Serialize graph G in GraphML to the stream.
669 """
670 if G.is_directed():
671 default_edge_type = "directed"
672 else:
673 default_edge_type = "undirected"
674
675 graphid = G.graph.pop("id", None)
676 if graphid is None:
677 graph_element = self._xml.element("graph", edgedefault=default_edge_type)
678 else:
679 graph_element = self._xml.element(
680 "graph", edgedefault=default_edge_type, id=graphid
681 )
682
683 # gather attributes types for the whole graph
684 # to find the most general numeric format needed.
685 # Then pass through attributes to create key_id for each.
686 graphdata = {
687 k: v
688 for k, v in G.graph.items()
689 if k not in ("node_default", "edge_default")
690 }
691 node_default = G.graph.get("node_default", {})
692 edge_default = G.graph.get("edge_default", {})
693 # Graph attributes
694 for k, v in graphdata.items():
695 self.attribute_types[(str(k), "graph")].add(type(v))
696 for k, v in graphdata.items():
697 element_type = self.xml_type[self.attr_type(k, "graph", v)]
698 self.get_key(str(k), element_type, "graph", None)
699 # Nodes and data
700 for node, d in G.nodes(data=True):
701 for k, v in d.items():
702 self.attribute_types[(str(k), "node")].add(type(v))
703 for node, d in G.nodes(data=True):
704 for k, v in d.items():
705 T = self.xml_type[self.attr_type(k, "node", v)]
706 self.get_key(str(k), T, "node", node_default.get(k))
707 # Edges and data
708 if G.is_multigraph():
709 for u, v, ekey, d in G.edges(keys=True, data=True):
710 for k, v in d.items():
711 self.attribute_types[(str(k), "edge")].add(type(v))
712 for u, v, ekey, d in G.edges(keys=True, data=True):
713 for k, v in d.items():
714 T = self.xml_type[self.attr_type(k, "edge", v)]
715 self.get_key(str(k), T, "edge", edge_default.get(k))
716 else:
717 for u, v, d in G.edges(data=True):
718 for k, v in d.items():
719 self.attribute_types[(str(k), "edge")].add(type(v))
720 for u, v, d in G.edges(data=True):
721 for k, v in d.items():
722 T = self.xml_type[self.attr_type(k, "edge", v)]
723 self.get_key(str(k), T, "edge", edge_default.get(k))
724
725 # Now add attribute keys to the xml file
726 for key in self.xml:
727 self._xml.write(key, pretty_print=self._prettyprint)
728
729 # The incremental_writer writes each node/edge as it is created
730 incremental_writer = IncrementalElement(self._xml, self._prettyprint)
731 with graph_element:
732 self.add_attributes("graph", incremental_writer, graphdata, {})
733 self.add_nodes(G, incremental_writer) # adds attributes too
734 self.add_edges(G, incremental_writer) # adds attributes too
735
736 def add_attributes(self, scope, xml_obj, data, default):
737 """Appends attribute data."""
738 for k, v in data.items():
739 data_element = self.add_data(
740 str(k), self.attr_type(str(k), scope, v), str(v), scope, default.get(k)
741 )
742 xml_obj.append(data_element)
743
744 def __str__(self):
745 return object.__str__(self)
746
747 def dump(self):
748 self._graphml.__exit__(None, None, None)
749 self._xml_base.__exit__(None, None, None)
750
751
752 # Choose a writer function for default
753 if lxmletree is None:
754 write_graphml = write_graphml_xml
755 else:
756 write_graphml = write_graphml_lxml
757
758
759 class GraphMLReader(GraphML):
760 """Read a GraphML document. Produces NetworkX graph objects."""
761
762 def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False):
763 self.node_type = node_type
764 self.edge_key_type = edge_key_type
765 self.multigraph = force_multigraph # If False, test for multiedges
766 self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes
767
768 def __call__(self, path=None, string=None):
769 if path is not None:
770 self.xml = ElementTree(file=path)
771 elif string is not None:
772 self.xml = fromstring(string)
773 else:
774 raise ValueError("Must specify either 'path' or 'string' as kwarg")
775 (keys, defaults) = self.find_graphml_keys(self.xml)
776 for g in self.xml.findall(f"{{{self.NS_GRAPHML}}}graph"):
777 yield self.make_graph(g, keys, defaults)
778
779 def make_graph(self, graph_xml, graphml_keys, defaults, G=None):
780 # set default graph type
781 edgedefault = graph_xml.get("edgedefault", None)
782 if G is None:
783 if edgedefault == "directed":
784 G = nx.MultiDiGraph()
785 else:
786 G = nx.MultiGraph()
787 # set defaults for graph attributes
788 G.graph["node_default"] = {}
789 G.graph["edge_default"] = {}
790 for key_id, value in defaults.items():
791 key_for = graphml_keys[key_id]["for"]
792 name = graphml_keys[key_id]["name"]
793 python_type = graphml_keys[key_id]["type"]
794 if key_for == "node":
795 G.graph["node_default"].update({name: python_type(value)})
796 if key_for == "edge":
797 G.graph["edge_default"].update({name: python_type(value)})
798 # hyperedges are not supported
799 hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge")
800 if hyperedge is not None:
801 raise nx.NetworkXError("GraphML reader doesn't support hyperedges")
802 # add nodes
803 for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"):
804 self.add_node(G, node_xml, graphml_keys, defaults)
805 # add edges
806 for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"):
807 self.add_edge(G, edge_xml, graphml_keys)
808 # add graph data
809 data = self.decode_data_elements(graphml_keys, graph_xml)
810 G.graph.update(data)
811
812 # switch to Graph or DiGraph if no parallel edges were found
813 if self.multigraph:
814 return G
815
816 G = nx.DiGraph(G) if G.is_directed() else nx.Graph(G)
817 # add explicit edge "id" from file as attribute in NX graph.
818 nx.set_edge_attributes(G, values=self.edge_ids, name="id")
819 return G
820
821 def add_node(self, G, node_xml, graphml_keys, defaults):
822 """Add a node to the graph.
823 """
824 # warn on finding unsupported ports tag
825 ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port")
826 if ports is not None:
827 warnings.warn("GraphML port tag not supported.")
828 # find the node by id and cast it to the appropriate type
829 node_id = self.node_type(node_xml.get("id"))
830 # get data/attributes for node
831 data = self.decode_data_elements(graphml_keys, node_xml)
832 G.add_node(node_id, **data)
833 # get child nodes
834 if node_xml.attrib.get("yfiles.foldertype") == "group":
835 graph_xml = node_xml.find(f"{{{self.NS_GRAPHML}}}graph")
836 self.make_graph(graph_xml, graphml_keys, defaults, G)
837
838 def add_edge(self, G, edge_element, graphml_keys):
839 """Add an edge to the graph.
840 """
841 # warn on finding unsupported ports tag
842 ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port")
843 if ports is not None:
844 warnings.warn("GraphML port tag not supported.")
845
846 # raise error if we find mixed directed and undirected edges
847 directed = edge_element.get("directed")
848 if G.is_directed() and directed == "false":
849 msg = "directed=false edge found in directed graph."
850 raise nx.NetworkXError(msg)
851 if (not G.is_directed()) and directed == "true":
852 msg = "directed=true edge found in undirected graph."
853 raise nx.NetworkXError(msg)
854
855 source = self.node_type(edge_element.get("source"))
856 target = self.node_type(edge_element.get("target"))
857 data = self.decode_data_elements(graphml_keys, edge_element)
858 # GraphML stores edge ids as an attribute
859 # NetworkX uses them as keys in multigraphs too if no key
860 # attribute is specified
861 edge_id = edge_element.get("id")
862 if edge_id:
863 # self.edge_ids is used by `make_graph` method for non-multigraphs
864 self.edge_ids[source, target] = edge_id
865 try:
866 edge_id = self.edge_key_type(edge_id)
867 except ValueError: # Could not convert.
868 pass
869 else:
870 edge_id = data.get("key")
871
872 if G.has_edge(source, target):
873 # mark this as a multigraph
874 self.multigraph = True
875
876 # Use add_edges_from to avoid error with add_edge when `'key' in data`
877 # Note there is only one edge here...
878 G.add_edges_from([(source, target, edge_id, data)])
879
880 def decode_data_elements(self, graphml_keys, obj_xml):
881 """Use the key information to decode the data XML if present."""
882 data = {}
883 for data_element in obj_xml.findall(f"{{{self.NS_GRAPHML}}}data"):
884 key = data_element.get("key")
885 try:
886 data_name = graphml_keys[key]["name"]
887 data_type = graphml_keys[key]["type"]
888 except KeyError as e:
889 raise nx.NetworkXError(f"Bad GraphML data: no key {key}") from e
890 text = data_element.text
891 # assume anything with subelements is a yfiles extension
892 if text is not None and len(list(data_element)) == 0:
893 if data_type == bool:
894 # Ignore cases.
895 # http://docs.oracle.com/javase/6/docs/api/java/lang/
896 # Boolean.html#parseBoolean%28java.lang.String%29
897 data[data_name] = self.convert_bool[text.lower()]
898 else:
899 data[data_name] = data_type(text)
900 elif len(list(data_element)) > 0:
901 # Assume yfiles as subelements, try to extract node_label
902 node_label = None
903 for node_type in ["ShapeNode", "SVGNode", "ImageNode"]:
904 pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}"
905 geometry = data_element.find(f"{pref}Geometry")
906 if geometry is not None:
907 data["x"] = geometry.get("x")
908 data["y"] = geometry.get("y")
909 if node_label is None:
910 node_label = data_element.find(f"{pref}NodeLabel")
911 if node_label is not None:
912 data["label"] = node_label.text
913
914 # check all the different types of edges avaivable in yEd.
915 for e in [
916 "PolyLineEdge",
917 "SplineEdge",
918 "QuadCurveEdge",
919 "BezierEdge",
920 "ArcEdge",
921 ]:
922 pref = f"{{{self.NS_Y}}}{e}/{{{self.NS_Y}}}"
923 edge_label = data_element.find(f"{pref}EdgeLabel")
924 if edge_label is not None:
925 break
926
927 if edge_label is not None:
928 data["label"] = edge_label.text
929 return data
930
931 def find_graphml_keys(self, graph_element):
932 """Extracts all the keys and key defaults from the xml.
933 """
934 graphml_keys = {}
935 graphml_key_defaults = {}
936 for k in graph_element.findall(f"{{{self.NS_GRAPHML}}}key"):
937 attr_id = k.get("id")
938 attr_type = k.get("attr.type")
939 attr_name = k.get("attr.name")
940 yfiles_type = k.get("yfiles.type")
941 if yfiles_type is not None:
942 attr_name = yfiles_type
943 attr_type = "yfiles"
944 if attr_type is None:
945 attr_type = "string"
946 warnings.warn(f"No key type for id {attr_id}. Using string")
947 if attr_name is None:
948 raise nx.NetworkXError(f"Unknown key for id {attr_id}.")
949 graphml_keys[attr_id] = {
950 "name": attr_name,
951 "type": self.python_type[attr_type],
952 "for": k.get("for"),
953 }
954 # check for "default" subelement of key element
955 default = k.find(f"{{{self.NS_GRAPHML}}}default")
956 if default is not None:
957 graphml_key_defaults[attr_id] = default.text
958 return graphml_keys, graphml_key_defaults