Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/readwrite/graphml.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ | |
2 ******* | |
3 GraphML | |
4 ******* | |
5 Read and write graphs in GraphML format. | |
6 | |
7 This implementation does not support mixed graphs (directed and unidirected | |
8 edges together), hyperedges, nested graphs, or ports. | |
9 | |
10 "GraphML is a comprehensive and easy-to-use file format for graphs. It | |
11 consists of a language core to describe the structural properties of a | |
12 graph and a flexible extension mechanism to add application-specific | |
13 data. Its main features include support of | |
14 | |
15 * directed, undirected, and mixed graphs, | |
16 * hypergraphs, | |
17 * hierarchical graphs, | |
18 * graphical representations, | |
19 * references to external data, | |
20 * application-specific attribute data, and | |
21 * light-weight parsers. | |
22 | |
23 Unlike many other file formats for graphs, GraphML does not use a | |
24 custom syntax. Instead, it is based on XML and hence ideally suited as | |
25 a common denominator for all kinds of services generating, archiving, | |
26 or processing graphs." | |
27 | |
28 http://graphml.graphdrawing.org/ | |
29 | |
30 Format | |
31 ------ | |
32 GraphML is an XML format. See | |
33 http://graphml.graphdrawing.org/specification.html for the specification and | |
34 http://graphml.graphdrawing.org/primer/graphml-primer.html | |
35 for examples. | |
36 """ | |
37 import warnings | |
38 from collections import defaultdict | |
39 | |
40 from xml.etree.ElementTree import Element, ElementTree, tostring, fromstring | |
41 | |
42 try: | |
43 import lxml.etree as lxmletree | |
44 except ImportError: | |
45 lxmletree = None | |
46 | |
47 import networkx as nx | |
48 from networkx.utils import open_file | |
49 | |
50 __all__ = [ | |
51 "write_graphml", | |
52 "read_graphml", | |
53 "generate_graphml", | |
54 "write_graphml_xml", | |
55 "write_graphml_lxml", | |
56 "parse_graphml", | |
57 "GraphMLWriter", | |
58 "GraphMLReader", | |
59 ] | |
60 | |
61 | |
62 @open_file(1, mode="wb") | |
63 def write_graphml_xml( | |
64 G, | |
65 path, | |
66 encoding="utf-8", | |
67 prettyprint=True, | |
68 infer_numeric_types=False, | |
69 named_key_ids=False, | |
70 ): | |
71 """Write G in GraphML XML format to path | |
72 | |
73 Parameters | |
74 ---------- | |
75 G : graph | |
76 A networkx graph | |
77 path : file or string | |
78 File or filename to write. | |
79 Filenames ending in .gz or .bz2 will be compressed. | |
80 encoding : string (optional) | |
81 Encoding for text data. | |
82 prettyprint : bool (optional) | |
83 If True use line breaks and indenting in output XML. | |
84 infer_numeric_types : boolean | |
85 Determine if numeric types should be generalized. | |
86 For example, if edges have both int and float 'weight' attributes, | |
87 we infer in GraphML that both are floats. | |
88 named_key_ids : bool (optional) | |
89 If True use attr.name as value for key elements' id attribute. | |
90 | |
91 Examples | |
92 -------- | |
93 >>> G = nx.path_graph(4) | |
94 >>> nx.write_graphml(G, "test.graphml") | |
95 | |
96 Notes | |
97 ----- | |
98 This implementation does not support mixed graphs (directed | |
99 and unidirected edges together) hyperedges, nested graphs, or ports. | |
100 """ | |
101 writer = GraphMLWriter( | |
102 encoding=encoding, | |
103 prettyprint=prettyprint, | |
104 infer_numeric_types=infer_numeric_types, | |
105 named_key_ids=named_key_ids, | |
106 ) | |
107 writer.add_graph_element(G) | |
108 writer.dump(path) | |
109 | |
110 | |
111 @open_file(1, mode="wb") | |
112 def write_graphml_lxml( | |
113 G, | |
114 path, | |
115 encoding="utf-8", | |
116 prettyprint=True, | |
117 infer_numeric_types=False, | |
118 named_key_ids=False, | |
119 ): | |
120 """Write G in GraphML XML format to path | |
121 | |
122 This function uses the LXML framework and should be faster than | |
123 the version using the xml library. | |
124 | |
125 Parameters | |
126 ---------- | |
127 G : graph | |
128 A networkx graph | |
129 path : file or string | |
130 File or filename to write. | |
131 Filenames ending in .gz or .bz2 will be compressed. | |
132 encoding : string (optional) | |
133 Encoding for text data. | |
134 prettyprint : bool (optional) | |
135 If True use line breaks and indenting in output XML. | |
136 infer_numeric_types : boolean | |
137 Determine if numeric types should be generalized. | |
138 For example, if edges have both int and float 'weight' attributes, | |
139 we infer in GraphML that both are floats. | |
140 named_key_ids : bool (optional) | |
141 If True use attr.name as value for key elements' id attribute. | |
142 | |
143 Examples | |
144 -------- | |
145 >>> G = nx.path_graph(4) | |
146 >>> nx.write_graphml_lxml(G, "fourpath.graphml") # doctest: +SKIP | |
147 | |
148 Notes | |
149 ----- | |
150 This implementation does not support mixed graphs (directed | |
151 and unidirected edges together) hyperedges, nested graphs, or ports. | |
152 """ | |
153 writer = GraphMLWriterLxml( | |
154 path, | |
155 graph=G, | |
156 encoding=encoding, | |
157 prettyprint=prettyprint, | |
158 infer_numeric_types=infer_numeric_types, | |
159 named_key_ids=named_key_ids, | |
160 ) | |
161 writer.dump() | |
162 | |
163 | |
164 def generate_graphml(G, encoding="utf-8", prettyprint=True, named_key_ids=False): | |
165 """Generate GraphML lines for G | |
166 | |
167 Parameters | |
168 ---------- | |
169 G : graph | |
170 A networkx graph | |
171 encoding : string (optional) | |
172 Encoding for text data. | |
173 prettyprint : bool (optional) | |
174 If True use line breaks and indenting in output XML. | |
175 named_key_ids : bool (optional) | |
176 If True use attr.name as value for key elements' id attribute. | |
177 | |
178 Examples | |
179 -------- | |
180 >>> G = nx.path_graph(4) | |
181 >>> linefeed = chr(10) # linefeed = \n | |
182 >>> s = linefeed.join(nx.generate_graphml(G)) # doctest: +SKIP | |
183 >>> for line in nx.generate_graphml(G): # doctest: +SKIP | |
184 ... print(line) | |
185 | |
186 Notes | |
187 ----- | |
188 This implementation does not support mixed graphs (directed and unidirected | |
189 edges together) hyperedges, nested graphs, or ports. | |
190 """ | |
191 writer = GraphMLWriter( | |
192 encoding=encoding, prettyprint=prettyprint, named_key_ids=named_key_ids | |
193 ) | |
194 writer.add_graph_element(G) | |
195 yield from str(writer).splitlines() | |
196 | |
197 | |
198 @open_file(0, mode="rb") | |
199 def read_graphml(path, node_type=str, edge_key_type=int, force_multigraph=False): | |
200 """Read graph in GraphML format from path. | |
201 | |
202 Parameters | |
203 ---------- | |
204 path : file or string | |
205 File or filename to write. | |
206 Filenames ending in .gz or .bz2 will be compressed. | |
207 | |
208 node_type: Python type (default: str) | |
209 Convert node ids to this type | |
210 | |
211 edge_key_type: Python type (default: int) | |
212 Convert graphml edge ids to this type. Multigraphs use id as edge key. | |
213 Non-multigraphs add to edge attribute dict with name "id". | |
214 | |
215 force_multigraph : bool (default: False) | |
216 If True, return a multigraph with edge keys. If False (the default) | |
217 return a multigraph when multiedges are in the graph. | |
218 | |
219 Returns | |
220 ------- | |
221 graph: NetworkX graph | |
222 If parallel edges are present or `force_multigraph=True` then | |
223 a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph. | |
224 The returned graph is directed if the file indicates it should be. | |
225 | |
226 Notes | |
227 ----- | |
228 Default node and edge attributes are not propagated to each node and edge. | |
229 They can be obtained from `G.graph` and applied to node and edge attributes | |
230 if desired using something like this: | |
231 | |
232 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP | |
233 >>> for node, data in G.nodes(data=True): # doctest: +SKIP | |
234 ... if "color" not in data: | |
235 ... data["color"] = default_color | |
236 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP | |
237 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP | |
238 ... if "color" not in data: | |
239 ... data["color"] = default_color | |
240 | |
241 This implementation does not support mixed graphs (directed and unidirected | |
242 edges together), hypergraphs, nested graphs, or ports. | |
243 | |
244 For multigraphs the GraphML edge "id" will be used as the edge | |
245 key. If not specified then they "key" attribute will be used. If | |
246 there is no "key" attribute a default NetworkX multigraph edge key | |
247 will be provided. | |
248 | |
249 Files with the yEd "yfiles" extension will can be read but the graphics | |
250 information is discarded. | |
251 | |
252 yEd compressed files ("file.graphmlz" extension) can be read by renaming | |
253 the file to "file.graphml.gz". | |
254 | |
255 """ | |
256 reader = GraphMLReader(node_type, edge_key_type, force_multigraph) | |
257 # need to check for multiple graphs | |
258 glist = list(reader(path=path)) | |
259 if len(glist) == 0: | |
260 # If no graph comes back, try looking for an incomplete header | |
261 header = b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">' | |
262 path.seek(0) | |
263 old_bytes = path.read() | |
264 new_bytes = old_bytes.replace(b"<graphml>", header) | |
265 glist = list(reader(string=new_bytes)) | |
266 if len(glist) == 0: | |
267 raise nx.NetworkXError("file not successfully read as graphml") | |
268 return glist[0] | |
269 | |
270 | |
271 def parse_graphml( | |
272 graphml_string, node_type=str, edge_key_type=int, force_multigraph=False | |
273 ): | |
274 """Read graph in GraphML format from string. | |
275 | |
276 Parameters | |
277 ---------- | |
278 graphml_string : string | |
279 String containing graphml information | |
280 (e.g., contents of a graphml file). | |
281 | |
282 node_type: Python type (default: str) | |
283 Convert node ids to this type | |
284 | |
285 edge_key_type: Python type (default: int) | |
286 Convert graphml edge ids to this type. Multigraphs use id as edge key. | |
287 Non-multigraphs add to edge attribute dict with name "id". | |
288 | |
289 force_multigraph : bool (default: False) | |
290 If True, return a multigraph with edge keys. If False (the default) | |
291 return a multigraph when multiedges are in the graph. | |
292 | |
293 | |
294 Returns | |
295 ------- | |
296 graph: NetworkX graph | |
297 If no parallel edges are found a Graph or DiGraph is returned. | |
298 Otherwise a MultiGraph or MultiDiGraph is returned. | |
299 | |
300 Examples | |
301 -------- | |
302 >>> G = nx.path_graph(4) | |
303 >>> linefeed = chr(10) # linefeed = \n | |
304 >>> s = linefeed.join(nx.generate_graphml(G)) | |
305 >>> H = nx.parse_graphml(s) | |
306 | |
307 Notes | |
308 ----- | |
309 Default node and edge attributes are not propagated to each node and edge. | |
310 They can be obtained from `G.graph` and applied to node and edge attributes | |
311 if desired using something like this: | |
312 | |
313 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP | |
314 >>> for node, data in G.nodes(data=True): # doctest: +SKIP | |
315 ... if "color" not in data: | |
316 ... data["color"] = default_color | |
317 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP | |
318 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP | |
319 ... if "color" not in data: | |
320 ... data["color"] = default_color | |
321 | |
322 This implementation does not support mixed graphs (directed and unidirected | |
323 edges together), hypergraphs, nested graphs, or ports. | |
324 | |
325 For multigraphs the GraphML edge "id" will be used as the edge | |
326 key. If not specified then they "key" attribute will be used. If | |
327 there is no "key" attribute a default NetworkX multigraph edge key | |
328 will be provided. | |
329 | |
330 """ | |
331 reader = GraphMLReader(node_type, edge_key_type, force_multigraph) | |
332 # need to check for multiple graphs | |
333 glist = list(reader(string=graphml_string)) | |
334 if len(glist) == 0: | |
335 # If no graph comes back, try looking for an incomplete header | |
336 header = '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">' | |
337 new_string = graphml_string.replace("<graphml>", header) | |
338 glist = list(reader(string=new_string)) | |
339 if len(glist) == 0: | |
340 raise nx.NetworkXError("file not successfully read as graphml") | |
341 return glist[0] | |
342 | |
343 | |
344 class GraphML: | |
345 NS_GRAPHML = "http://graphml.graphdrawing.org/xmlns" | |
346 NS_XSI = "http://www.w3.org/2001/XMLSchema-instance" | |
347 # xmlns:y="http://www.yworks.com/xml/graphml" | |
348 NS_Y = "http://www.yworks.com/xml/graphml" | |
349 SCHEMALOCATION = " ".join( | |
350 [ | |
351 "http://graphml.graphdrawing.org/xmlns", | |
352 "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd", | |
353 ] | |
354 ) | |
355 | |
356 types = [ | |
357 (int, "integer"), # for Gephi GraphML bug | |
358 (str, "yfiles"), | |
359 (str, "string"), | |
360 (int, "int"), | |
361 (float, "float"), | |
362 (float, "double"), | |
363 (bool, "boolean"), | |
364 ] | |
365 | |
366 # These additions to types allow writing numpy types | |
367 try: | |
368 import numpy as np | |
369 except: | |
370 pass | |
371 else: | |
372 # prepend so that python types are created upon read (last entry wins) | |
373 types = [ | |
374 (np.float64, "float"), | |
375 (np.float32, "float"), | |
376 (np.float16, "float"), | |
377 (np.float_, "float"), | |
378 (np.int_, "int"), | |
379 (np.int8, "int"), | |
380 (np.int16, "int"), | |
381 (np.int32, "int"), | |
382 (np.int64, "int"), | |
383 (np.uint8, "int"), | |
384 (np.uint16, "int"), | |
385 (np.uint32, "int"), | |
386 (np.uint64, "int"), | |
387 (np.int_, "int"), | |
388 (np.intc, "int"), | |
389 (np.intp, "int"), | |
390 ] + types | |
391 | |
392 xml_type = dict(types) | |
393 python_type = dict(reversed(a) for a in types) | |
394 | |
395 # This page says that data types in GraphML follow Java(TM). | |
396 # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition | |
397 # true and false are the only boolean literals: | |
398 # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals | |
399 convert_bool = { | |
400 # We use data.lower() in actual use. | |
401 "true": True, | |
402 "false": False, | |
403 # Include integer strings for convenience. | |
404 "0": False, | |
405 0: False, | |
406 "1": True, | |
407 1: True, | |
408 } | |
409 | |
410 | |
411 class GraphMLWriter(GraphML): | |
412 def __init__( | |
413 self, | |
414 graph=None, | |
415 encoding="utf-8", | |
416 prettyprint=True, | |
417 infer_numeric_types=False, | |
418 named_key_ids=False, | |
419 ): | |
420 self.myElement = Element | |
421 | |
422 self.infer_numeric_types = infer_numeric_types | |
423 self.prettyprint = prettyprint | |
424 self.named_key_ids = named_key_ids | |
425 self.encoding = encoding | |
426 self.xml = self.myElement( | |
427 "graphml", | |
428 { | |
429 "xmlns": self.NS_GRAPHML, | |
430 "xmlns:xsi": self.NS_XSI, | |
431 "xsi:schemaLocation": self.SCHEMALOCATION, | |
432 }, | |
433 ) | |
434 self.keys = {} | |
435 self.attributes = defaultdict(list) | |
436 self.attribute_types = defaultdict(set) | |
437 | |
438 if graph is not None: | |
439 self.add_graph_element(graph) | |
440 | |
441 def __str__(self): | |
442 if self.prettyprint: | |
443 self.indent(self.xml) | |
444 s = tostring(self.xml).decode(self.encoding) | |
445 return s | |
446 | |
447 def attr_type(self, name, scope, value): | |
448 """Infer the attribute type of data named name. Currently this only | |
449 supports inference of numeric types. | |
450 | |
451 If self.infer_numeric_types is false, type is used. Otherwise, pick the | |
452 most general of types found across all values with name and scope. This | |
453 means edges with data named 'weight' are treated separately from nodes | |
454 with data named 'weight'. | |
455 """ | |
456 if self.infer_numeric_types: | |
457 types = self.attribute_types[(name, scope)] | |
458 | |
459 if len(types) > 1: | |
460 types = {self.xml_type[t] for t in types} | |
461 if "string" in types: | |
462 return str | |
463 elif "float" in types or "double" in types: | |
464 return float | |
465 else: | |
466 return int | |
467 else: | |
468 return list(types)[0] | |
469 else: | |
470 return type(value) | |
471 | |
472 def get_key(self, name, attr_type, scope, default): | |
473 keys_key = (name, attr_type, scope) | |
474 try: | |
475 return self.keys[keys_key] | |
476 except KeyError: | |
477 if self.named_key_ids: | |
478 new_id = name | |
479 else: | |
480 new_id = f"d{len(list(self.keys))}" | |
481 | |
482 self.keys[keys_key] = new_id | |
483 key_kwargs = { | |
484 "id": new_id, | |
485 "for": scope, | |
486 "attr.name": name, | |
487 "attr.type": attr_type, | |
488 } | |
489 key_element = self.myElement("key", **key_kwargs) | |
490 # add subelement for data default value if present | |
491 if default is not None: | |
492 default_element = self.myElement("default") | |
493 default_element.text = str(default) | |
494 key_element.append(default_element) | |
495 self.xml.insert(0, key_element) | |
496 return new_id | |
497 | |
498 def add_data(self, name, element_type, value, scope="all", default=None): | |
499 """ | |
500 Make a data element for an edge or a node. Keep a log of the | |
501 type in the keys table. | |
502 """ | |
503 if element_type not in self.xml_type: | |
504 msg = f"GraphML writer does not support {element_type} as data values." | |
505 raise nx.NetworkXError(msg) | |
506 keyid = self.get_key(name, self.xml_type[element_type], scope, default) | |
507 data_element = self.myElement("data", key=keyid) | |
508 data_element.text = str(value) | |
509 return data_element | |
510 | |
511 def add_attributes(self, scope, xml_obj, data, default): | |
512 """Appends attribute data to edges or nodes, and stores type information | |
513 to be added later. See add_graph_element. | |
514 """ | |
515 for k, v in data.items(): | |
516 self.attribute_types[(str(k), scope)].add(type(v)) | |
517 self.attributes[xml_obj].append([k, v, scope, default.get(k)]) | |
518 | |
519 def add_nodes(self, G, graph_element): | |
520 default = G.graph.get("node_default", {}) | |
521 for node, data in G.nodes(data=True): | |
522 node_element = self.myElement("node", id=str(node)) | |
523 self.add_attributes("node", node_element, data, default) | |
524 graph_element.append(node_element) | |
525 | |
526 def add_edges(self, G, graph_element): | |
527 if G.is_multigraph(): | |
528 for u, v, key, data in G.edges(data=True, keys=True): | |
529 edge_element = self.myElement( | |
530 "edge", source=str(u), target=str(v), id=str(key) | |
531 ) | |
532 default = G.graph.get("edge_default", {}) | |
533 self.add_attributes("edge", edge_element, data, default) | |
534 graph_element.append(edge_element) | |
535 else: | |
536 for u, v, data in G.edges(data=True): | |
537 edge_element = self.myElement("edge", source=str(u), target=str(v)) | |
538 default = G.graph.get("edge_default", {}) | |
539 self.add_attributes("edge", edge_element, data, default) | |
540 graph_element.append(edge_element) | |
541 | |
542 def add_graph_element(self, G): | |
543 """ | |
544 Serialize graph G in GraphML to the stream. | |
545 """ | |
546 if G.is_directed(): | |
547 default_edge_type = "directed" | |
548 else: | |
549 default_edge_type = "undirected" | |
550 | |
551 graphid = G.graph.pop("id", None) | |
552 if graphid is None: | |
553 graph_element = self.myElement("graph", edgedefault=default_edge_type) | |
554 else: | |
555 graph_element = self.myElement( | |
556 "graph", edgedefault=default_edge_type, id=graphid | |
557 ) | |
558 default = {} | |
559 data = { | |
560 k: v | |
561 for (k, v) in G.graph.items() | |
562 if k not in ["node_default", "edge_default"] | |
563 } | |
564 self.add_attributes("graph", graph_element, data, default) | |
565 self.add_nodes(G, graph_element) | |
566 self.add_edges(G, graph_element) | |
567 | |
568 # self.attributes contains a mapping from XML Objects to a list of | |
569 # data that needs to be added to them. | |
570 # We postpone processing in order to do type inference/generalization. | |
571 # See self.attr_type | |
572 for (xml_obj, data) in self.attributes.items(): | |
573 for (k, v, scope, default) in data: | |
574 xml_obj.append( | |
575 self.add_data( | |
576 str(k), self.attr_type(k, scope, v), str(v), scope, default | |
577 ) | |
578 ) | |
579 self.xml.append(graph_element) | |
580 | |
581 def add_graphs(self, graph_list): | |
582 """ Add many graphs to this GraphML document. """ | |
583 for G in graph_list: | |
584 self.add_graph_element(G) | |
585 | |
586 def dump(self, stream): | |
587 if self.prettyprint: | |
588 self.indent(self.xml) | |
589 document = ElementTree(self.xml) | |
590 document.write(stream, encoding=self.encoding, xml_declaration=True) | |
591 | |
592 def indent(self, elem, level=0): | |
593 # in-place prettyprint formatter | |
594 i = "\n" + level * " " | |
595 if len(elem): | |
596 if not elem.text or not elem.text.strip(): | |
597 elem.text = i + " " | |
598 if not elem.tail or not elem.tail.strip(): | |
599 elem.tail = i | |
600 for elem in elem: | |
601 self.indent(elem, level + 1) | |
602 if not elem.tail or not elem.tail.strip(): | |
603 elem.tail = i | |
604 else: | |
605 if level and (not elem.tail or not elem.tail.strip()): | |
606 elem.tail = i | |
607 | |
608 | |
609 class IncrementalElement: | |
610 """Wrapper for _IncrementalWriter providing an Element like interface. | |
611 | |
612 This wrapper does not intend to be a complete implementation but rather to | |
613 deal with those calls used in GraphMLWriter. | |
614 """ | |
615 | |
616 def __init__(self, xml, prettyprint): | |
617 self.xml = xml | |
618 self.prettyprint = prettyprint | |
619 | |
620 def append(self, element): | |
621 self.xml.write(element, pretty_print=self.prettyprint) | |
622 | |
623 | |
624 class GraphMLWriterLxml(GraphMLWriter): | |
625 def __init__( | |
626 self, | |
627 path, | |
628 graph=None, | |
629 encoding="utf-8", | |
630 prettyprint=True, | |
631 infer_numeric_types=False, | |
632 named_key_ids=False, | |
633 ): | |
634 self.myElement = lxmletree.Element | |
635 | |
636 self._encoding = encoding | |
637 self._prettyprint = prettyprint | |
638 self.named_key_ids = named_key_ids | |
639 self.infer_numeric_types = infer_numeric_types | |
640 | |
641 self._xml_base = lxmletree.xmlfile(path, encoding=encoding) | |
642 self._xml = self._xml_base.__enter__() | |
643 self._xml.write_declaration() | |
644 | |
645 # We need to have a xml variable that support insertion. This call is | |
646 # used for adding the keys to the document. | |
647 # We will store those keys in a plain list, and then after the graph | |
648 # element is closed we will add them to the main graphml element. | |
649 self.xml = [] | |
650 self._keys = self.xml | |
651 self._graphml = self._xml.element( | |
652 "graphml", | |
653 { | |
654 "xmlns": self.NS_GRAPHML, | |
655 "xmlns:xsi": self.NS_XSI, | |
656 "xsi:schemaLocation": self.SCHEMALOCATION, | |
657 }, | |
658 ) | |
659 self._graphml.__enter__() | |
660 self.keys = {} | |
661 self.attribute_types = defaultdict(set) | |
662 | |
663 if graph is not None: | |
664 self.add_graph_element(graph) | |
665 | |
666 def add_graph_element(self, G): | |
667 """ | |
668 Serialize graph G in GraphML to the stream. | |
669 """ | |
670 if G.is_directed(): | |
671 default_edge_type = "directed" | |
672 else: | |
673 default_edge_type = "undirected" | |
674 | |
675 graphid = G.graph.pop("id", None) | |
676 if graphid is None: | |
677 graph_element = self._xml.element("graph", edgedefault=default_edge_type) | |
678 else: | |
679 graph_element = self._xml.element( | |
680 "graph", edgedefault=default_edge_type, id=graphid | |
681 ) | |
682 | |
683 # gather attributes types for the whole graph | |
684 # to find the most general numeric format needed. | |
685 # Then pass through attributes to create key_id for each. | |
686 graphdata = { | |
687 k: v | |
688 for k, v in G.graph.items() | |
689 if k not in ("node_default", "edge_default") | |
690 } | |
691 node_default = G.graph.get("node_default", {}) | |
692 edge_default = G.graph.get("edge_default", {}) | |
693 # Graph attributes | |
694 for k, v in graphdata.items(): | |
695 self.attribute_types[(str(k), "graph")].add(type(v)) | |
696 for k, v in graphdata.items(): | |
697 element_type = self.xml_type[self.attr_type(k, "graph", v)] | |
698 self.get_key(str(k), element_type, "graph", None) | |
699 # Nodes and data | |
700 for node, d in G.nodes(data=True): | |
701 for k, v in d.items(): | |
702 self.attribute_types[(str(k), "node")].add(type(v)) | |
703 for node, d in G.nodes(data=True): | |
704 for k, v in d.items(): | |
705 T = self.xml_type[self.attr_type(k, "node", v)] | |
706 self.get_key(str(k), T, "node", node_default.get(k)) | |
707 # Edges and data | |
708 if G.is_multigraph(): | |
709 for u, v, ekey, d in G.edges(keys=True, data=True): | |
710 for k, v in d.items(): | |
711 self.attribute_types[(str(k), "edge")].add(type(v)) | |
712 for u, v, ekey, d in G.edges(keys=True, data=True): | |
713 for k, v in d.items(): | |
714 T = self.xml_type[self.attr_type(k, "edge", v)] | |
715 self.get_key(str(k), T, "edge", edge_default.get(k)) | |
716 else: | |
717 for u, v, d in G.edges(data=True): | |
718 for k, v in d.items(): | |
719 self.attribute_types[(str(k), "edge")].add(type(v)) | |
720 for u, v, d in G.edges(data=True): | |
721 for k, v in d.items(): | |
722 T = self.xml_type[self.attr_type(k, "edge", v)] | |
723 self.get_key(str(k), T, "edge", edge_default.get(k)) | |
724 | |
725 # Now add attribute keys to the xml file | |
726 for key in self.xml: | |
727 self._xml.write(key, pretty_print=self._prettyprint) | |
728 | |
729 # The incremental_writer writes each node/edge as it is created | |
730 incremental_writer = IncrementalElement(self._xml, self._prettyprint) | |
731 with graph_element: | |
732 self.add_attributes("graph", incremental_writer, graphdata, {}) | |
733 self.add_nodes(G, incremental_writer) # adds attributes too | |
734 self.add_edges(G, incremental_writer) # adds attributes too | |
735 | |
736 def add_attributes(self, scope, xml_obj, data, default): | |
737 """Appends attribute data.""" | |
738 for k, v in data.items(): | |
739 data_element = self.add_data( | |
740 str(k), self.attr_type(str(k), scope, v), str(v), scope, default.get(k) | |
741 ) | |
742 xml_obj.append(data_element) | |
743 | |
744 def __str__(self): | |
745 return object.__str__(self) | |
746 | |
747 def dump(self): | |
748 self._graphml.__exit__(None, None, None) | |
749 self._xml_base.__exit__(None, None, None) | |
750 | |
751 | |
752 # Choose a writer function for default | |
753 if lxmletree is None: | |
754 write_graphml = write_graphml_xml | |
755 else: | |
756 write_graphml = write_graphml_lxml | |
757 | |
758 | |
759 class GraphMLReader(GraphML): | |
760 """Read a GraphML document. Produces NetworkX graph objects.""" | |
761 | |
762 def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False): | |
763 self.node_type = node_type | |
764 self.edge_key_type = edge_key_type | |
765 self.multigraph = force_multigraph # If False, test for multiedges | |
766 self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes | |
767 | |
768 def __call__(self, path=None, string=None): | |
769 if path is not None: | |
770 self.xml = ElementTree(file=path) | |
771 elif string is not None: | |
772 self.xml = fromstring(string) | |
773 else: | |
774 raise ValueError("Must specify either 'path' or 'string' as kwarg") | |
775 (keys, defaults) = self.find_graphml_keys(self.xml) | |
776 for g in self.xml.findall(f"{{{self.NS_GRAPHML}}}graph"): | |
777 yield self.make_graph(g, keys, defaults) | |
778 | |
779 def make_graph(self, graph_xml, graphml_keys, defaults, G=None): | |
780 # set default graph type | |
781 edgedefault = graph_xml.get("edgedefault", None) | |
782 if G is None: | |
783 if edgedefault == "directed": | |
784 G = nx.MultiDiGraph() | |
785 else: | |
786 G = nx.MultiGraph() | |
787 # set defaults for graph attributes | |
788 G.graph["node_default"] = {} | |
789 G.graph["edge_default"] = {} | |
790 for key_id, value in defaults.items(): | |
791 key_for = graphml_keys[key_id]["for"] | |
792 name = graphml_keys[key_id]["name"] | |
793 python_type = graphml_keys[key_id]["type"] | |
794 if key_for == "node": | |
795 G.graph["node_default"].update({name: python_type(value)}) | |
796 if key_for == "edge": | |
797 G.graph["edge_default"].update({name: python_type(value)}) | |
798 # hyperedges are not supported | |
799 hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge") | |
800 if hyperedge is not None: | |
801 raise nx.NetworkXError("GraphML reader doesn't support hyperedges") | |
802 # add nodes | |
803 for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"): | |
804 self.add_node(G, node_xml, graphml_keys, defaults) | |
805 # add edges | |
806 for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"): | |
807 self.add_edge(G, edge_xml, graphml_keys) | |
808 # add graph data | |
809 data = self.decode_data_elements(graphml_keys, graph_xml) | |
810 G.graph.update(data) | |
811 | |
812 # switch to Graph or DiGraph if no parallel edges were found | |
813 if self.multigraph: | |
814 return G | |
815 | |
816 G = nx.DiGraph(G) if G.is_directed() else nx.Graph(G) | |
817 # add explicit edge "id" from file as attribute in NX graph. | |
818 nx.set_edge_attributes(G, values=self.edge_ids, name="id") | |
819 return G | |
820 | |
821 def add_node(self, G, node_xml, graphml_keys, defaults): | |
822 """Add a node to the graph. | |
823 """ | |
824 # warn on finding unsupported ports tag | |
825 ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port") | |
826 if ports is not None: | |
827 warnings.warn("GraphML port tag not supported.") | |
828 # find the node by id and cast it to the appropriate type | |
829 node_id = self.node_type(node_xml.get("id")) | |
830 # get data/attributes for node | |
831 data = self.decode_data_elements(graphml_keys, node_xml) | |
832 G.add_node(node_id, **data) | |
833 # get child nodes | |
834 if node_xml.attrib.get("yfiles.foldertype") == "group": | |
835 graph_xml = node_xml.find(f"{{{self.NS_GRAPHML}}}graph") | |
836 self.make_graph(graph_xml, graphml_keys, defaults, G) | |
837 | |
838 def add_edge(self, G, edge_element, graphml_keys): | |
839 """Add an edge to the graph. | |
840 """ | |
841 # warn on finding unsupported ports tag | |
842 ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port") | |
843 if ports is not None: | |
844 warnings.warn("GraphML port tag not supported.") | |
845 | |
846 # raise error if we find mixed directed and undirected edges | |
847 directed = edge_element.get("directed") | |
848 if G.is_directed() and directed == "false": | |
849 msg = "directed=false edge found in directed graph." | |
850 raise nx.NetworkXError(msg) | |
851 if (not G.is_directed()) and directed == "true": | |
852 msg = "directed=true edge found in undirected graph." | |
853 raise nx.NetworkXError(msg) | |
854 | |
855 source = self.node_type(edge_element.get("source")) | |
856 target = self.node_type(edge_element.get("target")) | |
857 data = self.decode_data_elements(graphml_keys, edge_element) | |
858 # GraphML stores edge ids as an attribute | |
859 # NetworkX uses them as keys in multigraphs too if no key | |
860 # attribute is specified | |
861 edge_id = edge_element.get("id") | |
862 if edge_id: | |
863 # self.edge_ids is used by `make_graph` method for non-multigraphs | |
864 self.edge_ids[source, target] = edge_id | |
865 try: | |
866 edge_id = self.edge_key_type(edge_id) | |
867 except ValueError: # Could not convert. | |
868 pass | |
869 else: | |
870 edge_id = data.get("key") | |
871 | |
872 if G.has_edge(source, target): | |
873 # mark this as a multigraph | |
874 self.multigraph = True | |
875 | |
876 # Use add_edges_from to avoid error with add_edge when `'key' in data` | |
877 # Note there is only one edge here... | |
878 G.add_edges_from([(source, target, edge_id, data)]) | |
879 | |
880 def decode_data_elements(self, graphml_keys, obj_xml): | |
881 """Use the key information to decode the data XML if present.""" | |
882 data = {} | |
883 for data_element in obj_xml.findall(f"{{{self.NS_GRAPHML}}}data"): | |
884 key = data_element.get("key") | |
885 try: | |
886 data_name = graphml_keys[key]["name"] | |
887 data_type = graphml_keys[key]["type"] | |
888 except KeyError as e: | |
889 raise nx.NetworkXError(f"Bad GraphML data: no key {key}") from e | |
890 text = data_element.text | |
891 # assume anything with subelements is a yfiles extension | |
892 if text is not None and len(list(data_element)) == 0: | |
893 if data_type == bool: | |
894 # Ignore cases. | |
895 # http://docs.oracle.com/javase/6/docs/api/java/lang/ | |
896 # Boolean.html#parseBoolean%28java.lang.String%29 | |
897 data[data_name] = self.convert_bool[text.lower()] | |
898 else: | |
899 data[data_name] = data_type(text) | |
900 elif len(list(data_element)) > 0: | |
901 # Assume yfiles as subelements, try to extract node_label | |
902 node_label = None | |
903 for node_type in ["ShapeNode", "SVGNode", "ImageNode"]: | |
904 pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}" | |
905 geometry = data_element.find(f"{pref}Geometry") | |
906 if geometry is not None: | |
907 data["x"] = geometry.get("x") | |
908 data["y"] = geometry.get("y") | |
909 if node_label is None: | |
910 node_label = data_element.find(f"{pref}NodeLabel") | |
911 if node_label is not None: | |
912 data["label"] = node_label.text | |
913 | |
914 # check all the different types of edges avaivable in yEd. | |
915 for e in [ | |
916 "PolyLineEdge", | |
917 "SplineEdge", | |
918 "QuadCurveEdge", | |
919 "BezierEdge", | |
920 "ArcEdge", | |
921 ]: | |
922 pref = f"{{{self.NS_Y}}}{e}/{{{self.NS_Y}}}" | |
923 edge_label = data_element.find(f"{pref}EdgeLabel") | |
924 if edge_label is not None: | |
925 break | |
926 | |
927 if edge_label is not None: | |
928 data["label"] = edge_label.text | |
929 return data | |
930 | |
931 def find_graphml_keys(self, graph_element): | |
932 """Extracts all the keys and key defaults from the xml. | |
933 """ | |
934 graphml_keys = {} | |
935 graphml_key_defaults = {} | |
936 for k in graph_element.findall(f"{{{self.NS_GRAPHML}}}key"): | |
937 attr_id = k.get("id") | |
938 attr_type = k.get("attr.type") | |
939 attr_name = k.get("attr.name") | |
940 yfiles_type = k.get("yfiles.type") | |
941 if yfiles_type is not None: | |
942 attr_name = yfiles_type | |
943 attr_type = "yfiles" | |
944 if attr_type is None: | |
945 attr_type = "string" | |
946 warnings.warn(f"No key type for id {attr_id}. Using string") | |
947 if attr_name is None: | |
948 raise nx.NetworkXError(f"Unknown key for id {attr_id}.") | |
949 graphml_keys[attr_id] = { | |
950 "name": attr_name, | |
951 "type": self.python_type[attr_type], | |
952 "for": k.get("for"), | |
953 } | |
954 # check for "default" subelement of key element | |
955 default = k.find(f"{{{self.NS_GRAPHML}}}default") | |
956 if default is not None: | |
957 graphml_key_defaults[attr_id] = default.text | |
958 return graphml_keys, graphml_key_defaults |