Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/networkx/readwrite/graphml.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 """ | |
| 2 ******* | |
| 3 GraphML | |
| 4 ******* | |
| 5 Read and write graphs in GraphML format. | |
| 6 | |
| 7 This implementation does not support mixed graphs (directed and unidirected | |
| 8 edges together), hyperedges, nested graphs, or ports. | |
| 9 | |
| 10 "GraphML is a comprehensive and easy-to-use file format for graphs. It | |
| 11 consists of a language core to describe the structural properties of a | |
| 12 graph and a flexible extension mechanism to add application-specific | |
| 13 data. Its main features include support of | |
| 14 | |
| 15 * directed, undirected, and mixed graphs, | |
| 16 * hypergraphs, | |
| 17 * hierarchical graphs, | |
| 18 * graphical representations, | |
| 19 * references to external data, | |
| 20 * application-specific attribute data, and | |
| 21 * light-weight parsers. | |
| 22 | |
| 23 Unlike many other file formats for graphs, GraphML does not use a | |
| 24 custom syntax. Instead, it is based on XML and hence ideally suited as | |
| 25 a common denominator for all kinds of services generating, archiving, | |
| 26 or processing graphs." | |
| 27 | |
| 28 http://graphml.graphdrawing.org/ | |
| 29 | |
| 30 Format | |
| 31 ------ | |
| 32 GraphML is an XML format. See | |
| 33 http://graphml.graphdrawing.org/specification.html for the specification and | |
| 34 http://graphml.graphdrawing.org/primer/graphml-primer.html | |
| 35 for examples. | |
| 36 """ | |
| 37 import warnings | |
| 38 from collections import defaultdict | |
| 39 | |
| 40 from xml.etree.ElementTree import Element, ElementTree, tostring, fromstring | |
| 41 | |
| 42 try: | |
| 43 import lxml.etree as lxmletree | |
| 44 except ImportError: | |
| 45 lxmletree = None | |
| 46 | |
| 47 import networkx as nx | |
| 48 from networkx.utils import open_file | |
| 49 | |
| 50 __all__ = [ | |
| 51 "write_graphml", | |
| 52 "read_graphml", | |
| 53 "generate_graphml", | |
| 54 "write_graphml_xml", | |
| 55 "write_graphml_lxml", | |
| 56 "parse_graphml", | |
| 57 "GraphMLWriter", | |
| 58 "GraphMLReader", | |
| 59 ] | |
| 60 | |
| 61 | |
| 62 @open_file(1, mode="wb") | |
| 63 def write_graphml_xml( | |
| 64 G, | |
| 65 path, | |
| 66 encoding="utf-8", | |
| 67 prettyprint=True, | |
| 68 infer_numeric_types=False, | |
| 69 named_key_ids=False, | |
| 70 ): | |
| 71 """Write G in GraphML XML format to path | |
| 72 | |
| 73 Parameters | |
| 74 ---------- | |
| 75 G : graph | |
| 76 A networkx graph | |
| 77 path : file or string | |
| 78 File or filename to write. | |
| 79 Filenames ending in .gz or .bz2 will be compressed. | |
| 80 encoding : string (optional) | |
| 81 Encoding for text data. | |
| 82 prettyprint : bool (optional) | |
| 83 If True use line breaks and indenting in output XML. | |
| 84 infer_numeric_types : boolean | |
| 85 Determine if numeric types should be generalized. | |
| 86 For example, if edges have both int and float 'weight' attributes, | |
| 87 we infer in GraphML that both are floats. | |
| 88 named_key_ids : bool (optional) | |
| 89 If True use attr.name as value for key elements' id attribute. | |
| 90 | |
| 91 Examples | |
| 92 -------- | |
| 93 >>> G = nx.path_graph(4) | |
| 94 >>> nx.write_graphml(G, "test.graphml") | |
| 95 | |
| 96 Notes | |
| 97 ----- | |
| 98 This implementation does not support mixed graphs (directed | |
| 99 and unidirected edges together) hyperedges, nested graphs, or ports. | |
| 100 """ | |
| 101 writer = GraphMLWriter( | |
| 102 encoding=encoding, | |
| 103 prettyprint=prettyprint, | |
| 104 infer_numeric_types=infer_numeric_types, | |
| 105 named_key_ids=named_key_ids, | |
| 106 ) | |
| 107 writer.add_graph_element(G) | |
| 108 writer.dump(path) | |
| 109 | |
| 110 | |
| 111 @open_file(1, mode="wb") | |
| 112 def write_graphml_lxml( | |
| 113 G, | |
| 114 path, | |
| 115 encoding="utf-8", | |
| 116 prettyprint=True, | |
| 117 infer_numeric_types=False, | |
| 118 named_key_ids=False, | |
| 119 ): | |
| 120 """Write G in GraphML XML format to path | |
| 121 | |
| 122 This function uses the LXML framework and should be faster than | |
| 123 the version using the xml library. | |
| 124 | |
| 125 Parameters | |
| 126 ---------- | |
| 127 G : graph | |
| 128 A networkx graph | |
| 129 path : file or string | |
| 130 File or filename to write. | |
| 131 Filenames ending in .gz or .bz2 will be compressed. | |
| 132 encoding : string (optional) | |
| 133 Encoding for text data. | |
| 134 prettyprint : bool (optional) | |
| 135 If True use line breaks and indenting in output XML. | |
| 136 infer_numeric_types : boolean | |
| 137 Determine if numeric types should be generalized. | |
| 138 For example, if edges have both int and float 'weight' attributes, | |
| 139 we infer in GraphML that both are floats. | |
| 140 named_key_ids : bool (optional) | |
| 141 If True use attr.name as value for key elements' id attribute. | |
| 142 | |
| 143 Examples | |
| 144 -------- | |
| 145 >>> G = nx.path_graph(4) | |
| 146 >>> nx.write_graphml_lxml(G, "fourpath.graphml") # doctest: +SKIP | |
| 147 | |
| 148 Notes | |
| 149 ----- | |
| 150 This implementation does not support mixed graphs (directed | |
| 151 and unidirected edges together) hyperedges, nested graphs, or ports. | |
| 152 """ | |
| 153 writer = GraphMLWriterLxml( | |
| 154 path, | |
| 155 graph=G, | |
| 156 encoding=encoding, | |
| 157 prettyprint=prettyprint, | |
| 158 infer_numeric_types=infer_numeric_types, | |
| 159 named_key_ids=named_key_ids, | |
| 160 ) | |
| 161 writer.dump() | |
| 162 | |
| 163 | |
| 164 def generate_graphml(G, encoding="utf-8", prettyprint=True, named_key_ids=False): | |
| 165 """Generate GraphML lines for G | |
| 166 | |
| 167 Parameters | |
| 168 ---------- | |
| 169 G : graph | |
| 170 A networkx graph | |
| 171 encoding : string (optional) | |
| 172 Encoding for text data. | |
| 173 prettyprint : bool (optional) | |
| 174 If True use line breaks and indenting in output XML. | |
| 175 named_key_ids : bool (optional) | |
| 176 If True use attr.name as value for key elements' id attribute. | |
| 177 | |
| 178 Examples | |
| 179 -------- | |
| 180 >>> G = nx.path_graph(4) | |
| 181 >>> linefeed = chr(10) # linefeed = \n | |
| 182 >>> s = linefeed.join(nx.generate_graphml(G)) # doctest: +SKIP | |
| 183 >>> for line in nx.generate_graphml(G): # doctest: +SKIP | |
| 184 ... print(line) | |
| 185 | |
| 186 Notes | |
| 187 ----- | |
| 188 This implementation does not support mixed graphs (directed and unidirected | |
| 189 edges together) hyperedges, nested graphs, or ports. | |
| 190 """ | |
| 191 writer = GraphMLWriter( | |
| 192 encoding=encoding, prettyprint=prettyprint, named_key_ids=named_key_ids | |
| 193 ) | |
| 194 writer.add_graph_element(G) | |
| 195 yield from str(writer).splitlines() | |
| 196 | |
| 197 | |
| 198 @open_file(0, mode="rb") | |
| 199 def read_graphml(path, node_type=str, edge_key_type=int, force_multigraph=False): | |
| 200 """Read graph in GraphML format from path. | |
| 201 | |
| 202 Parameters | |
| 203 ---------- | |
| 204 path : file or string | |
| 205 File or filename to write. | |
| 206 Filenames ending in .gz or .bz2 will be compressed. | |
| 207 | |
| 208 node_type: Python type (default: str) | |
| 209 Convert node ids to this type | |
| 210 | |
| 211 edge_key_type: Python type (default: int) | |
| 212 Convert graphml edge ids to this type. Multigraphs use id as edge key. | |
| 213 Non-multigraphs add to edge attribute dict with name "id". | |
| 214 | |
| 215 force_multigraph : bool (default: False) | |
| 216 If True, return a multigraph with edge keys. If False (the default) | |
| 217 return a multigraph when multiedges are in the graph. | |
| 218 | |
| 219 Returns | |
| 220 ------- | |
| 221 graph: NetworkX graph | |
| 222 If parallel edges are present or `force_multigraph=True` then | |
| 223 a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph. | |
| 224 The returned graph is directed if the file indicates it should be. | |
| 225 | |
| 226 Notes | |
| 227 ----- | |
| 228 Default node and edge attributes are not propagated to each node and edge. | |
| 229 They can be obtained from `G.graph` and applied to node and edge attributes | |
| 230 if desired using something like this: | |
| 231 | |
| 232 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP | |
| 233 >>> for node, data in G.nodes(data=True): # doctest: +SKIP | |
| 234 ... if "color" not in data: | |
| 235 ... data["color"] = default_color | |
| 236 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP | |
| 237 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP | |
| 238 ... if "color" not in data: | |
| 239 ... data["color"] = default_color | |
| 240 | |
| 241 This implementation does not support mixed graphs (directed and unidirected | |
| 242 edges together), hypergraphs, nested graphs, or ports. | |
| 243 | |
| 244 For multigraphs the GraphML edge "id" will be used as the edge | |
| 245 key. If not specified then they "key" attribute will be used. If | |
| 246 there is no "key" attribute a default NetworkX multigraph edge key | |
| 247 will be provided. | |
| 248 | |
| 249 Files with the yEd "yfiles" extension will can be read but the graphics | |
| 250 information is discarded. | |
| 251 | |
| 252 yEd compressed files ("file.graphmlz" extension) can be read by renaming | |
| 253 the file to "file.graphml.gz". | |
| 254 | |
| 255 """ | |
| 256 reader = GraphMLReader(node_type, edge_key_type, force_multigraph) | |
| 257 # need to check for multiple graphs | |
| 258 glist = list(reader(path=path)) | |
| 259 if len(glist) == 0: | |
| 260 # If no graph comes back, try looking for an incomplete header | |
| 261 header = b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">' | |
| 262 path.seek(0) | |
| 263 old_bytes = path.read() | |
| 264 new_bytes = old_bytes.replace(b"<graphml>", header) | |
| 265 glist = list(reader(string=new_bytes)) | |
| 266 if len(glist) == 0: | |
| 267 raise nx.NetworkXError("file not successfully read as graphml") | |
| 268 return glist[0] | |
| 269 | |
| 270 | |
| 271 def parse_graphml( | |
| 272 graphml_string, node_type=str, edge_key_type=int, force_multigraph=False | |
| 273 ): | |
| 274 """Read graph in GraphML format from string. | |
| 275 | |
| 276 Parameters | |
| 277 ---------- | |
| 278 graphml_string : string | |
| 279 String containing graphml information | |
| 280 (e.g., contents of a graphml file). | |
| 281 | |
| 282 node_type: Python type (default: str) | |
| 283 Convert node ids to this type | |
| 284 | |
| 285 edge_key_type: Python type (default: int) | |
| 286 Convert graphml edge ids to this type. Multigraphs use id as edge key. | |
| 287 Non-multigraphs add to edge attribute dict with name "id". | |
| 288 | |
| 289 force_multigraph : bool (default: False) | |
| 290 If True, return a multigraph with edge keys. If False (the default) | |
| 291 return a multigraph when multiedges are in the graph. | |
| 292 | |
| 293 | |
| 294 Returns | |
| 295 ------- | |
| 296 graph: NetworkX graph | |
| 297 If no parallel edges are found a Graph or DiGraph is returned. | |
| 298 Otherwise a MultiGraph or MultiDiGraph is returned. | |
| 299 | |
| 300 Examples | |
| 301 -------- | |
| 302 >>> G = nx.path_graph(4) | |
| 303 >>> linefeed = chr(10) # linefeed = \n | |
| 304 >>> s = linefeed.join(nx.generate_graphml(G)) | |
| 305 >>> H = nx.parse_graphml(s) | |
| 306 | |
| 307 Notes | |
| 308 ----- | |
| 309 Default node and edge attributes are not propagated to each node and edge. | |
| 310 They can be obtained from `G.graph` and applied to node and edge attributes | |
| 311 if desired using something like this: | |
| 312 | |
| 313 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP | |
| 314 >>> for node, data in G.nodes(data=True): # doctest: +SKIP | |
| 315 ... if "color" not in data: | |
| 316 ... data["color"] = default_color | |
| 317 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP | |
| 318 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP | |
| 319 ... if "color" not in data: | |
| 320 ... data["color"] = default_color | |
| 321 | |
| 322 This implementation does not support mixed graphs (directed and unidirected | |
| 323 edges together), hypergraphs, nested graphs, or ports. | |
| 324 | |
| 325 For multigraphs the GraphML edge "id" will be used as the edge | |
| 326 key. If not specified then they "key" attribute will be used. If | |
| 327 there is no "key" attribute a default NetworkX multigraph edge key | |
| 328 will be provided. | |
| 329 | |
| 330 """ | |
| 331 reader = GraphMLReader(node_type, edge_key_type, force_multigraph) | |
| 332 # need to check for multiple graphs | |
| 333 glist = list(reader(string=graphml_string)) | |
| 334 if len(glist) == 0: | |
| 335 # If no graph comes back, try looking for an incomplete header | |
| 336 header = '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">' | |
| 337 new_string = graphml_string.replace("<graphml>", header) | |
| 338 glist = list(reader(string=new_string)) | |
| 339 if len(glist) == 0: | |
| 340 raise nx.NetworkXError("file not successfully read as graphml") | |
| 341 return glist[0] | |
| 342 | |
| 343 | |
| 344 class GraphML: | |
| 345 NS_GRAPHML = "http://graphml.graphdrawing.org/xmlns" | |
| 346 NS_XSI = "http://www.w3.org/2001/XMLSchema-instance" | |
| 347 # xmlns:y="http://www.yworks.com/xml/graphml" | |
| 348 NS_Y = "http://www.yworks.com/xml/graphml" | |
| 349 SCHEMALOCATION = " ".join( | |
| 350 [ | |
| 351 "http://graphml.graphdrawing.org/xmlns", | |
| 352 "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd", | |
| 353 ] | |
| 354 ) | |
| 355 | |
| 356 types = [ | |
| 357 (int, "integer"), # for Gephi GraphML bug | |
| 358 (str, "yfiles"), | |
| 359 (str, "string"), | |
| 360 (int, "int"), | |
| 361 (float, "float"), | |
| 362 (float, "double"), | |
| 363 (bool, "boolean"), | |
| 364 ] | |
| 365 | |
| 366 # These additions to types allow writing numpy types | |
| 367 try: | |
| 368 import numpy as np | |
| 369 except: | |
| 370 pass | |
| 371 else: | |
| 372 # prepend so that python types are created upon read (last entry wins) | |
| 373 types = [ | |
| 374 (np.float64, "float"), | |
| 375 (np.float32, "float"), | |
| 376 (np.float16, "float"), | |
| 377 (np.float_, "float"), | |
| 378 (np.int_, "int"), | |
| 379 (np.int8, "int"), | |
| 380 (np.int16, "int"), | |
| 381 (np.int32, "int"), | |
| 382 (np.int64, "int"), | |
| 383 (np.uint8, "int"), | |
| 384 (np.uint16, "int"), | |
| 385 (np.uint32, "int"), | |
| 386 (np.uint64, "int"), | |
| 387 (np.int_, "int"), | |
| 388 (np.intc, "int"), | |
| 389 (np.intp, "int"), | |
| 390 ] + types | |
| 391 | |
| 392 xml_type = dict(types) | |
| 393 python_type = dict(reversed(a) for a in types) | |
| 394 | |
| 395 # This page says that data types in GraphML follow Java(TM). | |
| 396 # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition | |
| 397 # true and false are the only boolean literals: | |
| 398 # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals | |
| 399 convert_bool = { | |
| 400 # We use data.lower() in actual use. | |
| 401 "true": True, | |
| 402 "false": False, | |
| 403 # Include integer strings for convenience. | |
| 404 "0": False, | |
| 405 0: False, | |
| 406 "1": True, | |
| 407 1: True, | |
| 408 } | |
| 409 | |
| 410 | |
| 411 class GraphMLWriter(GraphML): | |
| 412 def __init__( | |
| 413 self, | |
| 414 graph=None, | |
| 415 encoding="utf-8", | |
| 416 prettyprint=True, | |
| 417 infer_numeric_types=False, | |
| 418 named_key_ids=False, | |
| 419 ): | |
| 420 self.myElement = Element | |
| 421 | |
| 422 self.infer_numeric_types = infer_numeric_types | |
| 423 self.prettyprint = prettyprint | |
| 424 self.named_key_ids = named_key_ids | |
| 425 self.encoding = encoding | |
| 426 self.xml = self.myElement( | |
| 427 "graphml", | |
| 428 { | |
| 429 "xmlns": self.NS_GRAPHML, | |
| 430 "xmlns:xsi": self.NS_XSI, | |
| 431 "xsi:schemaLocation": self.SCHEMALOCATION, | |
| 432 }, | |
| 433 ) | |
| 434 self.keys = {} | |
| 435 self.attributes = defaultdict(list) | |
| 436 self.attribute_types = defaultdict(set) | |
| 437 | |
| 438 if graph is not None: | |
| 439 self.add_graph_element(graph) | |
| 440 | |
| 441 def __str__(self): | |
| 442 if self.prettyprint: | |
| 443 self.indent(self.xml) | |
| 444 s = tostring(self.xml).decode(self.encoding) | |
| 445 return s | |
| 446 | |
| 447 def attr_type(self, name, scope, value): | |
| 448 """Infer the attribute type of data named name. Currently this only | |
| 449 supports inference of numeric types. | |
| 450 | |
| 451 If self.infer_numeric_types is false, type is used. Otherwise, pick the | |
| 452 most general of types found across all values with name and scope. This | |
| 453 means edges with data named 'weight' are treated separately from nodes | |
| 454 with data named 'weight'. | |
| 455 """ | |
| 456 if self.infer_numeric_types: | |
| 457 types = self.attribute_types[(name, scope)] | |
| 458 | |
| 459 if len(types) > 1: | |
| 460 types = {self.xml_type[t] for t in types} | |
| 461 if "string" in types: | |
| 462 return str | |
| 463 elif "float" in types or "double" in types: | |
| 464 return float | |
| 465 else: | |
| 466 return int | |
| 467 else: | |
| 468 return list(types)[0] | |
| 469 else: | |
| 470 return type(value) | |
| 471 | |
| 472 def get_key(self, name, attr_type, scope, default): | |
| 473 keys_key = (name, attr_type, scope) | |
| 474 try: | |
| 475 return self.keys[keys_key] | |
| 476 except KeyError: | |
| 477 if self.named_key_ids: | |
| 478 new_id = name | |
| 479 else: | |
| 480 new_id = f"d{len(list(self.keys))}" | |
| 481 | |
| 482 self.keys[keys_key] = new_id | |
| 483 key_kwargs = { | |
| 484 "id": new_id, | |
| 485 "for": scope, | |
| 486 "attr.name": name, | |
| 487 "attr.type": attr_type, | |
| 488 } | |
| 489 key_element = self.myElement("key", **key_kwargs) | |
| 490 # add subelement for data default value if present | |
| 491 if default is not None: | |
| 492 default_element = self.myElement("default") | |
| 493 default_element.text = str(default) | |
| 494 key_element.append(default_element) | |
| 495 self.xml.insert(0, key_element) | |
| 496 return new_id | |
| 497 | |
| 498 def add_data(self, name, element_type, value, scope="all", default=None): | |
| 499 """ | |
| 500 Make a data element for an edge or a node. Keep a log of the | |
| 501 type in the keys table. | |
| 502 """ | |
| 503 if element_type not in self.xml_type: | |
| 504 msg = f"GraphML writer does not support {element_type} as data values." | |
| 505 raise nx.NetworkXError(msg) | |
| 506 keyid = self.get_key(name, self.xml_type[element_type], scope, default) | |
| 507 data_element = self.myElement("data", key=keyid) | |
| 508 data_element.text = str(value) | |
| 509 return data_element | |
| 510 | |
| 511 def add_attributes(self, scope, xml_obj, data, default): | |
| 512 """Appends attribute data to edges or nodes, and stores type information | |
| 513 to be added later. See add_graph_element. | |
| 514 """ | |
| 515 for k, v in data.items(): | |
| 516 self.attribute_types[(str(k), scope)].add(type(v)) | |
| 517 self.attributes[xml_obj].append([k, v, scope, default.get(k)]) | |
| 518 | |
| 519 def add_nodes(self, G, graph_element): | |
| 520 default = G.graph.get("node_default", {}) | |
| 521 for node, data in G.nodes(data=True): | |
| 522 node_element = self.myElement("node", id=str(node)) | |
| 523 self.add_attributes("node", node_element, data, default) | |
| 524 graph_element.append(node_element) | |
| 525 | |
| 526 def add_edges(self, G, graph_element): | |
| 527 if G.is_multigraph(): | |
| 528 for u, v, key, data in G.edges(data=True, keys=True): | |
| 529 edge_element = self.myElement( | |
| 530 "edge", source=str(u), target=str(v), id=str(key) | |
| 531 ) | |
| 532 default = G.graph.get("edge_default", {}) | |
| 533 self.add_attributes("edge", edge_element, data, default) | |
| 534 graph_element.append(edge_element) | |
| 535 else: | |
| 536 for u, v, data in G.edges(data=True): | |
| 537 edge_element = self.myElement("edge", source=str(u), target=str(v)) | |
| 538 default = G.graph.get("edge_default", {}) | |
| 539 self.add_attributes("edge", edge_element, data, default) | |
| 540 graph_element.append(edge_element) | |
| 541 | |
| 542 def add_graph_element(self, G): | |
| 543 """ | |
| 544 Serialize graph G in GraphML to the stream. | |
| 545 """ | |
| 546 if G.is_directed(): | |
| 547 default_edge_type = "directed" | |
| 548 else: | |
| 549 default_edge_type = "undirected" | |
| 550 | |
| 551 graphid = G.graph.pop("id", None) | |
| 552 if graphid is None: | |
| 553 graph_element = self.myElement("graph", edgedefault=default_edge_type) | |
| 554 else: | |
| 555 graph_element = self.myElement( | |
| 556 "graph", edgedefault=default_edge_type, id=graphid | |
| 557 ) | |
| 558 default = {} | |
| 559 data = { | |
| 560 k: v | |
| 561 for (k, v) in G.graph.items() | |
| 562 if k not in ["node_default", "edge_default"] | |
| 563 } | |
| 564 self.add_attributes("graph", graph_element, data, default) | |
| 565 self.add_nodes(G, graph_element) | |
| 566 self.add_edges(G, graph_element) | |
| 567 | |
| 568 # self.attributes contains a mapping from XML Objects to a list of | |
| 569 # data that needs to be added to them. | |
| 570 # We postpone processing in order to do type inference/generalization. | |
| 571 # See self.attr_type | |
| 572 for (xml_obj, data) in self.attributes.items(): | |
| 573 for (k, v, scope, default) in data: | |
| 574 xml_obj.append( | |
| 575 self.add_data( | |
| 576 str(k), self.attr_type(k, scope, v), str(v), scope, default | |
| 577 ) | |
| 578 ) | |
| 579 self.xml.append(graph_element) | |
| 580 | |
| 581 def add_graphs(self, graph_list): | |
| 582 """ Add many graphs to this GraphML document. """ | |
| 583 for G in graph_list: | |
| 584 self.add_graph_element(G) | |
| 585 | |
| 586 def dump(self, stream): | |
| 587 if self.prettyprint: | |
| 588 self.indent(self.xml) | |
| 589 document = ElementTree(self.xml) | |
| 590 document.write(stream, encoding=self.encoding, xml_declaration=True) | |
| 591 | |
| 592 def indent(self, elem, level=0): | |
| 593 # in-place prettyprint formatter | |
| 594 i = "\n" + level * " " | |
| 595 if len(elem): | |
| 596 if not elem.text or not elem.text.strip(): | |
| 597 elem.text = i + " " | |
| 598 if not elem.tail or not elem.tail.strip(): | |
| 599 elem.tail = i | |
| 600 for elem in elem: | |
| 601 self.indent(elem, level + 1) | |
| 602 if not elem.tail or not elem.tail.strip(): | |
| 603 elem.tail = i | |
| 604 else: | |
| 605 if level and (not elem.tail or not elem.tail.strip()): | |
| 606 elem.tail = i | |
| 607 | |
| 608 | |
| 609 class IncrementalElement: | |
| 610 """Wrapper for _IncrementalWriter providing an Element like interface. | |
| 611 | |
| 612 This wrapper does not intend to be a complete implementation but rather to | |
| 613 deal with those calls used in GraphMLWriter. | |
| 614 """ | |
| 615 | |
| 616 def __init__(self, xml, prettyprint): | |
| 617 self.xml = xml | |
| 618 self.prettyprint = prettyprint | |
| 619 | |
| 620 def append(self, element): | |
| 621 self.xml.write(element, pretty_print=self.prettyprint) | |
| 622 | |
| 623 | |
| 624 class GraphMLWriterLxml(GraphMLWriter): | |
| 625 def __init__( | |
| 626 self, | |
| 627 path, | |
| 628 graph=None, | |
| 629 encoding="utf-8", | |
| 630 prettyprint=True, | |
| 631 infer_numeric_types=False, | |
| 632 named_key_ids=False, | |
| 633 ): | |
| 634 self.myElement = lxmletree.Element | |
| 635 | |
| 636 self._encoding = encoding | |
| 637 self._prettyprint = prettyprint | |
| 638 self.named_key_ids = named_key_ids | |
| 639 self.infer_numeric_types = infer_numeric_types | |
| 640 | |
| 641 self._xml_base = lxmletree.xmlfile(path, encoding=encoding) | |
| 642 self._xml = self._xml_base.__enter__() | |
| 643 self._xml.write_declaration() | |
| 644 | |
| 645 # We need to have a xml variable that support insertion. This call is | |
| 646 # used for adding the keys to the document. | |
| 647 # We will store those keys in a plain list, and then after the graph | |
| 648 # element is closed we will add them to the main graphml element. | |
| 649 self.xml = [] | |
| 650 self._keys = self.xml | |
| 651 self._graphml = self._xml.element( | |
| 652 "graphml", | |
| 653 { | |
| 654 "xmlns": self.NS_GRAPHML, | |
| 655 "xmlns:xsi": self.NS_XSI, | |
| 656 "xsi:schemaLocation": self.SCHEMALOCATION, | |
| 657 }, | |
| 658 ) | |
| 659 self._graphml.__enter__() | |
| 660 self.keys = {} | |
| 661 self.attribute_types = defaultdict(set) | |
| 662 | |
| 663 if graph is not None: | |
| 664 self.add_graph_element(graph) | |
| 665 | |
| 666 def add_graph_element(self, G): | |
| 667 """ | |
| 668 Serialize graph G in GraphML to the stream. | |
| 669 """ | |
| 670 if G.is_directed(): | |
| 671 default_edge_type = "directed" | |
| 672 else: | |
| 673 default_edge_type = "undirected" | |
| 674 | |
| 675 graphid = G.graph.pop("id", None) | |
| 676 if graphid is None: | |
| 677 graph_element = self._xml.element("graph", edgedefault=default_edge_type) | |
| 678 else: | |
| 679 graph_element = self._xml.element( | |
| 680 "graph", edgedefault=default_edge_type, id=graphid | |
| 681 ) | |
| 682 | |
| 683 # gather attributes types for the whole graph | |
| 684 # to find the most general numeric format needed. | |
| 685 # Then pass through attributes to create key_id for each. | |
| 686 graphdata = { | |
| 687 k: v | |
| 688 for k, v in G.graph.items() | |
| 689 if k not in ("node_default", "edge_default") | |
| 690 } | |
| 691 node_default = G.graph.get("node_default", {}) | |
| 692 edge_default = G.graph.get("edge_default", {}) | |
| 693 # Graph attributes | |
| 694 for k, v in graphdata.items(): | |
| 695 self.attribute_types[(str(k), "graph")].add(type(v)) | |
| 696 for k, v in graphdata.items(): | |
| 697 element_type = self.xml_type[self.attr_type(k, "graph", v)] | |
| 698 self.get_key(str(k), element_type, "graph", None) | |
| 699 # Nodes and data | |
| 700 for node, d in G.nodes(data=True): | |
| 701 for k, v in d.items(): | |
| 702 self.attribute_types[(str(k), "node")].add(type(v)) | |
| 703 for node, d in G.nodes(data=True): | |
| 704 for k, v in d.items(): | |
| 705 T = self.xml_type[self.attr_type(k, "node", v)] | |
| 706 self.get_key(str(k), T, "node", node_default.get(k)) | |
| 707 # Edges and data | |
| 708 if G.is_multigraph(): | |
| 709 for u, v, ekey, d in G.edges(keys=True, data=True): | |
| 710 for k, v in d.items(): | |
| 711 self.attribute_types[(str(k), "edge")].add(type(v)) | |
| 712 for u, v, ekey, d in G.edges(keys=True, data=True): | |
| 713 for k, v in d.items(): | |
| 714 T = self.xml_type[self.attr_type(k, "edge", v)] | |
| 715 self.get_key(str(k), T, "edge", edge_default.get(k)) | |
| 716 else: | |
| 717 for u, v, d in G.edges(data=True): | |
| 718 for k, v in d.items(): | |
| 719 self.attribute_types[(str(k), "edge")].add(type(v)) | |
| 720 for u, v, d in G.edges(data=True): | |
| 721 for k, v in d.items(): | |
| 722 T = self.xml_type[self.attr_type(k, "edge", v)] | |
| 723 self.get_key(str(k), T, "edge", edge_default.get(k)) | |
| 724 | |
| 725 # Now add attribute keys to the xml file | |
| 726 for key in self.xml: | |
| 727 self._xml.write(key, pretty_print=self._prettyprint) | |
| 728 | |
| 729 # The incremental_writer writes each node/edge as it is created | |
| 730 incremental_writer = IncrementalElement(self._xml, self._prettyprint) | |
| 731 with graph_element: | |
| 732 self.add_attributes("graph", incremental_writer, graphdata, {}) | |
| 733 self.add_nodes(G, incremental_writer) # adds attributes too | |
| 734 self.add_edges(G, incremental_writer) # adds attributes too | |
| 735 | |
| 736 def add_attributes(self, scope, xml_obj, data, default): | |
| 737 """Appends attribute data.""" | |
| 738 for k, v in data.items(): | |
| 739 data_element = self.add_data( | |
| 740 str(k), self.attr_type(str(k), scope, v), str(v), scope, default.get(k) | |
| 741 ) | |
| 742 xml_obj.append(data_element) | |
| 743 | |
| 744 def __str__(self): | |
| 745 return object.__str__(self) | |
| 746 | |
| 747 def dump(self): | |
| 748 self._graphml.__exit__(None, None, None) | |
| 749 self._xml_base.__exit__(None, None, None) | |
| 750 | |
| 751 | |
| 752 # Choose a writer function for default | |
| 753 if lxmletree is None: | |
| 754 write_graphml = write_graphml_xml | |
| 755 else: | |
| 756 write_graphml = write_graphml_lxml | |
| 757 | |
| 758 | |
| 759 class GraphMLReader(GraphML): | |
| 760 """Read a GraphML document. Produces NetworkX graph objects.""" | |
| 761 | |
| 762 def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False): | |
| 763 self.node_type = node_type | |
| 764 self.edge_key_type = edge_key_type | |
| 765 self.multigraph = force_multigraph # If False, test for multiedges | |
| 766 self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes | |
| 767 | |
| 768 def __call__(self, path=None, string=None): | |
| 769 if path is not None: | |
| 770 self.xml = ElementTree(file=path) | |
| 771 elif string is not None: | |
| 772 self.xml = fromstring(string) | |
| 773 else: | |
| 774 raise ValueError("Must specify either 'path' or 'string' as kwarg") | |
| 775 (keys, defaults) = self.find_graphml_keys(self.xml) | |
| 776 for g in self.xml.findall(f"{{{self.NS_GRAPHML}}}graph"): | |
| 777 yield self.make_graph(g, keys, defaults) | |
| 778 | |
| 779 def make_graph(self, graph_xml, graphml_keys, defaults, G=None): | |
| 780 # set default graph type | |
| 781 edgedefault = graph_xml.get("edgedefault", None) | |
| 782 if G is None: | |
| 783 if edgedefault == "directed": | |
| 784 G = nx.MultiDiGraph() | |
| 785 else: | |
| 786 G = nx.MultiGraph() | |
| 787 # set defaults for graph attributes | |
| 788 G.graph["node_default"] = {} | |
| 789 G.graph["edge_default"] = {} | |
| 790 for key_id, value in defaults.items(): | |
| 791 key_for = graphml_keys[key_id]["for"] | |
| 792 name = graphml_keys[key_id]["name"] | |
| 793 python_type = graphml_keys[key_id]["type"] | |
| 794 if key_for == "node": | |
| 795 G.graph["node_default"].update({name: python_type(value)}) | |
| 796 if key_for == "edge": | |
| 797 G.graph["edge_default"].update({name: python_type(value)}) | |
| 798 # hyperedges are not supported | |
| 799 hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge") | |
| 800 if hyperedge is not None: | |
| 801 raise nx.NetworkXError("GraphML reader doesn't support hyperedges") | |
| 802 # add nodes | |
| 803 for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"): | |
| 804 self.add_node(G, node_xml, graphml_keys, defaults) | |
| 805 # add edges | |
| 806 for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"): | |
| 807 self.add_edge(G, edge_xml, graphml_keys) | |
| 808 # add graph data | |
| 809 data = self.decode_data_elements(graphml_keys, graph_xml) | |
| 810 G.graph.update(data) | |
| 811 | |
| 812 # switch to Graph or DiGraph if no parallel edges were found | |
| 813 if self.multigraph: | |
| 814 return G | |
| 815 | |
| 816 G = nx.DiGraph(G) if G.is_directed() else nx.Graph(G) | |
| 817 # add explicit edge "id" from file as attribute in NX graph. | |
| 818 nx.set_edge_attributes(G, values=self.edge_ids, name="id") | |
| 819 return G | |
| 820 | |
| 821 def add_node(self, G, node_xml, graphml_keys, defaults): | |
| 822 """Add a node to the graph. | |
| 823 """ | |
| 824 # warn on finding unsupported ports tag | |
| 825 ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port") | |
| 826 if ports is not None: | |
| 827 warnings.warn("GraphML port tag not supported.") | |
| 828 # find the node by id and cast it to the appropriate type | |
| 829 node_id = self.node_type(node_xml.get("id")) | |
| 830 # get data/attributes for node | |
| 831 data = self.decode_data_elements(graphml_keys, node_xml) | |
| 832 G.add_node(node_id, **data) | |
| 833 # get child nodes | |
| 834 if node_xml.attrib.get("yfiles.foldertype") == "group": | |
| 835 graph_xml = node_xml.find(f"{{{self.NS_GRAPHML}}}graph") | |
| 836 self.make_graph(graph_xml, graphml_keys, defaults, G) | |
| 837 | |
| 838 def add_edge(self, G, edge_element, graphml_keys): | |
| 839 """Add an edge to the graph. | |
| 840 """ | |
| 841 # warn on finding unsupported ports tag | |
| 842 ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port") | |
| 843 if ports is not None: | |
| 844 warnings.warn("GraphML port tag not supported.") | |
| 845 | |
| 846 # raise error if we find mixed directed and undirected edges | |
| 847 directed = edge_element.get("directed") | |
| 848 if G.is_directed() and directed == "false": | |
| 849 msg = "directed=false edge found in directed graph." | |
| 850 raise nx.NetworkXError(msg) | |
| 851 if (not G.is_directed()) and directed == "true": | |
| 852 msg = "directed=true edge found in undirected graph." | |
| 853 raise nx.NetworkXError(msg) | |
| 854 | |
| 855 source = self.node_type(edge_element.get("source")) | |
| 856 target = self.node_type(edge_element.get("target")) | |
| 857 data = self.decode_data_elements(graphml_keys, edge_element) | |
| 858 # GraphML stores edge ids as an attribute | |
| 859 # NetworkX uses them as keys in multigraphs too if no key | |
| 860 # attribute is specified | |
| 861 edge_id = edge_element.get("id") | |
| 862 if edge_id: | |
| 863 # self.edge_ids is used by `make_graph` method for non-multigraphs | |
| 864 self.edge_ids[source, target] = edge_id | |
| 865 try: | |
| 866 edge_id = self.edge_key_type(edge_id) | |
| 867 except ValueError: # Could not convert. | |
| 868 pass | |
| 869 else: | |
| 870 edge_id = data.get("key") | |
| 871 | |
| 872 if G.has_edge(source, target): | |
| 873 # mark this as a multigraph | |
| 874 self.multigraph = True | |
| 875 | |
| 876 # Use add_edges_from to avoid error with add_edge when `'key' in data` | |
| 877 # Note there is only one edge here... | |
| 878 G.add_edges_from([(source, target, edge_id, data)]) | |
| 879 | |
| 880 def decode_data_elements(self, graphml_keys, obj_xml): | |
| 881 """Use the key information to decode the data XML if present.""" | |
| 882 data = {} | |
| 883 for data_element in obj_xml.findall(f"{{{self.NS_GRAPHML}}}data"): | |
| 884 key = data_element.get("key") | |
| 885 try: | |
| 886 data_name = graphml_keys[key]["name"] | |
| 887 data_type = graphml_keys[key]["type"] | |
| 888 except KeyError as e: | |
| 889 raise nx.NetworkXError(f"Bad GraphML data: no key {key}") from e | |
| 890 text = data_element.text | |
| 891 # assume anything with subelements is a yfiles extension | |
| 892 if text is not None and len(list(data_element)) == 0: | |
| 893 if data_type == bool: | |
| 894 # Ignore cases. | |
| 895 # http://docs.oracle.com/javase/6/docs/api/java/lang/ | |
| 896 # Boolean.html#parseBoolean%28java.lang.String%29 | |
| 897 data[data_name] = self.convert_bool[text.lower()] | |
| 898 else: | |
| 899 data[data_name] = data_type(text) | |
| 900 elif len(list(data_element)) > 0: | |
| 901 # Assume yfiles as subelements, try to extract node_label | |
| 902 node_label = None | |
| 903 for node_type in ["ShapeNode", "SVGNode", "ImageNode"]: | |
| 904 pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}" | |
| 905 geometry = data_element.find(f"{pref}Geometry") | |
| 906 if geometry is not None: | |
| 907 data["x"] = geometry.get("x") | |
| 908 data["y"] = geometry.get("y") | |
| 909 if node_label is None: | |
| 910 node_label = data_element.find(f"{pref}NodeLabel") | |
| 911 if node_label is not None: | |
| 912 data["label"] = node_label.text | |
| 913 | |
| 914 # check all the different types of edges avaivable in yEd. | |
| 915 for e in [ | |
| 916 "PolyLineEdge", | |
| 917 "SplineEdge", | |
| 918 "QuadCurveEdge", | |
| 919 "BezierEdge", | |
| 920 "ArcEdge", | |
| 921 ]: | |
| 922 pref = f"{{{self.NS_Y}}}{e}/{{{self.NS_Y}}}" | |
| 923 edge_label = data_element.find(f"{pref}EdgeLabel") | |
| 924 if edge_label is not None: | |
| 925 break | |
| 926 | |
| 927 if edge_label is not None: | |
| 928 data["label"] = edge_label.text | |
| 929 return data | |
| 930 | |
| 931 def find_graphml_keys(self, graph_element): | |
| 932 """Extracts all the keys and key defaults from the xml. | |
| 933 """ | |
| 934 graphml_keys = {} | |
| 935 graphml_key_defaults = {} | |
| 936 for k in graph_element.findall(f"{{{self.NS_GRAPHML}}}key"): | |
| 937 attr_id = k.get("id") | |
| 938 attr_type = k.get("attr.type") | |
| 939 attr_name = k.get("attr.name") | |
| 940 yfiles_type = k.get("yfiles.type") | |
| 941 if yfiles_type is not None: | |
| 942 attr_name = yfiles_type | |
| 943 attr_type = "yfiles" | |
| 944 if attr_type is None: | |
| 945 attr_type = "string" | |
| 946 warnings.warn(f"No key type for id {attr_id}. Using string") | |
| 947 if attr_name is None: | |
| 948 raise nx.NetworkXError(f"Unknown key for id {attr_id}.") | |
| 949 graphml_keys[attr_id] = { | |
| 950 "name": attr_name, | |
| 951 "type": self.python_type[attr_type], | |
| 952 "for": k.get("for"), | |
| 953 } | |
| 954 # check for "default" subelement of key element | |
| 955 default = k.find(f"{{{self.NS_GRAPHML}}}default") | |
| 956 if default is not None: | |
| 957 graphml_key_defaults[attr_id] = default.text | |
| 958 return graphml_keys, graphml_key_defaults |
