Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/networkx/readwrite/gml.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 # encoding: utf-8 | |
| 2 # Copyright (C) 2008-2019 by | |
| 3 # Aric Hagberg <hagberg@lanl.gov> | |
| 4 # Dan Schult <dschult@colgate.edu> | |
| 5 # Pieter Swart <swart@lanl.gov> | |
| 6 # All rights reserved. | |
| 7 # BSD license. | |
| 8 # | |
| 9 # Author: Aric Hagberg (hagberg@lanl.gov) | |
| 10 """ | |
| 11 Read graphs in GML format. | |
| 12 | |
| 13 "GML, the Graph Modelling Language, is our proposal for a portable | |
| 14 file format for graphs. GML's key features are portability, simple | |
| 15 syntax, extensibility and flexibility. A GML file consists of a | |
| 16 hierarchical key-value lists. Graphs can be annotated with arbitrary | |
| 17 data structures. The idea for a common file format was born at the | |
| 18 GD'95; this proposal is the outcome of many discussions. GML is the | |
| 19 standard file format in the Graphlet graph editor system. It has been | |
| 20 overtaken and adapted by several other systems for drawing graphs." | |
| 21 | |
| 22 GML files are stored using a 7-bit ASCII encoding with any extended | |
| 23 ASCII characters (iso8859-1) appearing as HTML character entities. | |
| 24 You will need to give some thought into how the exported data should | |
| 25 interact with different languages and even different Python versions. | |
| 26 Re-importing from gml is also a concern. | |
| 27 | |
| 28 Without specifying a `stringizer`/`destringizer`, the code is capable of | |
| 29 handling `int`/`float`/`str`/`dict`/`list` data as required by the GML | |
| 30 specification. For other data types, you need to explicitly supply a | |
| 31 `stringizer`/`destringizer`. | |
| 32 | |
| 33 For better interoperability of data generated by Python 2 and Python 3, | |
| 34 we've provided `literal_stringizer` and `literal_destringizer`. | |
| 35 | |
| 36 For additional documentation on the GML file format, please see the | |
| 37 `GML website <http://www.infosun.fim.uni-passau.de/Graphlet/GML/gml-tr.html>`_. | |
| 38 | |
| 39 Several example graphs in GML format may be found on Mark Newman's | |
| 40 `Network data page <http://www-personal.umich.edu/~mejn/netdata/>`_. | |
| 41 """ | |
| 42 try: | |
| 43 try: | |
| 44 from cStringIO import StringIO | |
| 45 except ImportError: | |
| 46 from StringIO import StringIO | |
| 47 except ImportError: | |
| 48 from io import StringIO | |
| 49 from ast import literal_eval | |
| 50 from collections import defaultdict | |
| 51 import networkx as nx | |
| 52 from networkx.exception import NetworkXError | |
| 53 from networkx.utils import open_file | |
| 54 | |
| 55 import re | |
| 56 try: | |
| 57 import htmlentitydefs | |
| 58 except ImportError: | |
| 59 # Python 3.x | |
| 60 import html.entities as htmlentitydefs | |
| 61 | |
| 62 __all__ = ['read_gml', 'parse_gml', 'generate_gml', 'write_gml'] | |
| 63 | |
| 64 | |
| 65 try: | |
| 66 long | |
| 67 except NameError: | |
| 68 long = int | |
| 69 try: | |
| 70 unicode | |
| 71 except NameError: | |
| 72 unicode = str | |
| 73 try: | |
| 74 unichr | |
| 75 except NameError: | |
| 76 unichr = chr | |
| 77 try: | |
| 78 literal_eval(r"u'\u4444'") | |
| 79 except SyntaxError: | |
| 80 # Remove 'u' prefixes in unicode literals in Python 3 | |
| 81 def rtp_fix_unicode(s): return s[1:] | |
| 82 else: | |
| 83 rtp_fix_unicode = None | |
| 84 | |
| 85 | |
| 86 def escape(text): | |
| 87 """Use XML character references to escape characters. | |
| 88 | |
| 89 Use XML character references for unprintable or non-ASCII | |
| 90 characters, double quotes and ampersands in a string | |
| 91 """ | |
| 92 def fixup(m): | |
| 93 ch = m.group(0) | |
| 94 return '&#' + str(ord(ch)) + ';' | |
| 95 | |
| 96 text = re.sub('[^ -~]|[&"]', fixup, text) | |
| 97 return text if isinstance(text, str) else str(text) | |
| 98 | |
| 99 | |
| 100 def unescape(text): | |
| 101 """Replace XML character references with the referenced characters""" | |
| 102 def fixup(m): | |
| 103 text = m.group(0) | |
| 104 if text[1] == '#': | |
| 105 # Character reference | |
| 106 if text[2] == 'x': | |
| 107 code = int(text[3:-1], 16) | |
| 108 else: | |
| 109 code = int(text[2:-1]) | |
| 110 else: | |
| 111 # Named entity | |
| 112 try: | |
| 113 code = htmlentitydefs.name2codepoint[text[1:-1]] | |
| 114 except KeyError: | |
| 115 return text # leave unchanged | |
| 116 try: | |
| 117 return chr(code) if code < 256 else unichr(code) | |
| 118 except (ValueError, OverflowError): | |
| 119 return text # leave unchanged | |
| 120 | |
| 121 return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text) | |
| 122 | |
| 123 | |
| 124 def literal_destringizer(rep): | |
| 125 """Convert a Python literal to the value it represents. | |
| 126 | |
| 127 Parameters | |
| 128 ---------- | |
| 129 rep : string | |
| 130 A Python literal. | |
| 131 | |
| 132 Returns | |
| 133 ------- | |
| 134 value : object | |
| 135 The value of the Python literal. | |
| 136 | |
| 137 Raises | |
| 138 ------ | |
| 139 ValueError | |
| 140 If `rep` is not a Python literal. | |
| 141 """ | |
| 142 if isinstance(rep, (str, unicode)): | |
| 143 orig_rep = rep | |
| 144 if rtp_fix_unicode is not None: | |
| 145 rep = rtp_fix_unicode(rep) | |
| 146 try: | |
| 147 return literal_eval(rep) | |
| 148 except SyntaxError: | |
| 149 raise ValueError('%r is not a valid Python literal' % (orig_rep,)) | |
| 150 else: | |
| 151 raise ValueError('%r is not a string' % (rep,)) | |
| 152 | |
| 153 | |
| 154 @open_file(0, mode='rb') | |
| 155 def read_gml(path, label='label', destringizer=None): | |
| 156 """Read graph in GML format from `path`. | |
| 157 | |
| 158 Parameters | |
| 159 ---------- | |
| 160 path : filename or filehandle | |
| 161 The filename or filehandle to read from. | |
| 162 | |
| 163 label : string, optional | |
| 164 If not None, the parsed nodes will be renamed according to node | |
| 165 attributes indicated by `label`. Default value: 'label'. | |
| 166 | |
| 167 destringizer : callable, optional | |
| 168 A `destringizer` that recovers values stored as strings in GML. If it | |
| 169 cannot convert a string to a value, a `ValueError` is raised. Default | |
| 170 value : None. | |
| 171 | |
| 172 Returns | |
| 173 ------- | |
| 174 G : NetworkX graph | |
| 175 The parsed graph. | |
| 176 | |
| 177 Raises | |
| 178 ------ | |
| 179 NetworkXError | |
| 180 If the input cannot be parsed. | |
| 181 | |
| 182 See Also | |
| 183 -------- | |
| 184 write_gml, parse_gml, literal_destringizer | |
| 185 | |
| 186 Notes | |
| 187 ----- | |
| 188 GML files are stored using a 7-bit ASCII encoding with any extended | |
| 189 ASCII characters (iso8859-1) appearing as HTML character entities. | |
| 190 Without specifying a `stringizer`/`destringizer`, the code is capable of | |
| 191 handling `int`/`float`/`str`/`dict`/`list` data as required by the GML | |
| 192 specification. For other data types, you need to explicitly supply a | |
| 193 `stringizer`/`destringizer`. | |
| 194 | |
| 195 For additional documentation on the GML file format, please see the | |
| 196 `GML url <http://www.infosun.fim.uni-passau.de/Graphlet/GML/gml-tr.html>`_. | |
| 197 | |
| 198 See the module docstring :mod:`networkx.readwrite.gml` for more details. | |
| 199 | |
| 200 Examples | |
| 201 -------- | |
| 202 >>> G = nx.path_graph(4) | |
| 203 >>> nx.write_gml(G, 'test.gml') | |
| 204 >>> H = nx.read_gml('test.gml') | |
| 205 """ | |
| 206 def filter_lines(lines): | |
| 207 for line in lines: | |
| 208 try: | |
| 209 line = line.decode('ascii') | |
| 210 except UnicodeDecodeError: | |
| 211 raise NetworkXError('input is not ASCII-encoded') | |
| 212 if not isinstance(line, str): | |
| 213 lines = str(lines) | |
| 214 if line and line[-1] == '\n': | |
| 215 line = line[:-1] | |
| 216 yield line | |
| 217 | |
| 218 G = parse_gml_lines(filter_lines(path), label, destringizer) | |
| 219 return G | |
| 220 | |
| 221 | |
| 222 def parse_gml(lines, label='label', destringizer=None): | |
| 223 """Parse GML graph from a string or iterable. | |
| 224 | |
| 225 Parameters | |
| 226 ---------- | |
| 227 lines : string or iterable of strings | |
| 228 Data in GML format. | |
| 229 | |
| 230 label : string, optional | |
| 231 If not None, the parsed nodes will be renamed according to node | |
| 232 attributes indicated by `label`. Default value: 'label'. | |
| 233 | |
| 234 destringizer : callable, optional | |
| 235 A `destringizer` that recovers values stored as strings in GML. If it | |
| 236 cannot convert a string to a value, a `ValueError` is raised. Default | |
| 237 value : None. | |
| 238 | |
| 239 Returns | |
| 240 ------- | |
| 241 G : NetworkX graph | |
| 242 The parsed graph. | |
| 243 | |
| 244 Raises | |
| 245 ------ | |
| 246 NetworkXError | |
| 247 If the input cannot be parsed. | |
| 248 | |
| 249 See Also | |
| 250 -------- | |
| 251 write_gml, read_gml, literal_destringizer | |
| 252 | |
| 253 Notes | |
| 254 ----- | |
| 255 This stores nested GML attributes as dictionaries in the NetworkX graph, | |
| 256 node, and edge attribute structures. | |
| 257 | |
| 258 GML files are stored using a 7-bit ASCII encoding with any extended | |
| 259 ASCII characters (iso8859-1) appearing as HTML character entities. | |
| 260 Without specifying a `stringizer`/`destringizer`, the code is capable of | |
| 261 handling `int`/`float`/`str`/`dict`/`list` data as required by the GML | |
| 262 specification. For other data types, you need to explicitly supply a | |
| 263 `stringizer`/`destringizer`. | |
| 264 | |
| 265 For additional documentation on the GML file format, please see the | |
| 266 `GML url <http://www.infosun.fim.uni-passau.de/Graphlet/GML/gml-tr.html>`_. | |
| 267 | |
| 268 See the module docstring :mod:`networkx.readwrite.gml` for more details. | |
| 269 """ | |
| 270 def decode_line(line): | |
| 271 if isinstance(line, bytes): | |
| 272 try: | |
| 273 line.decode('ascii') | |
| 274 except UnicodeDecodeError: | |
| 275 raise NetworkXError('input is not ASCII-encoded') | |
| 276 if not isinstance(line, str): | |
| 277 line = str(line) | |
| 278 return line | |
| 279 | |
| 280 def filter_lines(lines): | |
| 281 if isinstance(lines, (str, unicode)): | |
| 282 lines = decode_line(lines) | |
| 283 lines = lines.splitlines() | |
| 284 for line in lines: | |
| 285 yield line | |
| 286 else: | |
| 287 for line in lines: | |
| 288 line = decode_line(line) | |
| 289 if line and line[-1] == '\n': | |
| 290 line = line[:-1] | |
| 291 if line.find('\n') != -1: | |
| 292 raise NetworkXError('input line contains newline') | |
| 293 yield line | |
| 294 | |
| 295 G = parse_gml_lines(filter_lines(lines), label, destringizer) | |
| 296 return G | |
| 297 | |
| 298 | |
| 299 def parse_gml_lines(lines, label, destringizer): | |
| 300 """Parse GML `lines` into a graph. | |
| 301 """ | |
| 302 def tokenize(): | |
| 303 patterns = [ | |
| 304 r'[A-Za-z][0-9A-Za-z_]*\b', # keys | |
| 305 # reals | |
| 306 r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[Ee][+-]?[0-9]+)?', | |
| 307 r'[+-]?[0-9]+', # ints | |
| 308 r'".*?"', # strings | |
| 309 r'\[', # dict start | |
| 310 r'\]', # dict end | |
| 311 r'#.*$|\s+' # comments and whitespaces | |
| 312 ] | |
| 313 tokens = re.compile( | |
| 314 '|'.join('(' + pattern + ')' for pattern in patterns)) | |
| 315 lineno = 0 | |
| 316 for line in lines: | |
| 317 length = len(line) | |
| 318 pos = 0 | |
| 319 while pos < length: | |
| 320 match = tokens.match(line, pos) | |
| 321 if match is not None: | |
| 322 for i in range(len(patterns)): | |
| 323 group = match.group(i + 1) | |
| 324 if group is not None: | |
| 325 if i == 0: # keys | |
| 326 value = group.rstrip() | |
| 327 elif i == 1: # reals | |
| 328 value = float(group) | |
| 329 elif i == 2: # ints | |
| 330 value = int(group) | |
| 331 else: | |
| 332 value = group | |
| 333 if i != 6: # comments and whitespaces | |
| 334 yield (i, value, lineno + 1, pos + 1) | |
| 335 pos += len(group) | |
| 336 break | |
| 337 else: | |
| 338 raise NetworkXError('cannot tokenize %r at (%d, %d)' % | |
| 339 (line[pos:], lineno + 1, pos + 1)) | |
| 340 lineno += 1 | |
| 341 yield (None, None, lineno + 1, 1) # EOF | |
| 342 | |
| 343 def unexpected(curr_token, expected): | |
| 344 category, value, lineno, pos = curr_token | |
| 345 raise NetworkXError( | |
| 346 'expected %s, found %s at (%d, %d)' % | |
| 347 (expected, repr(value) if value is not None else 'EOF', lineno, | |
| 348 pos)) | |
| 349 | |
| 350 def consume(curr_token, category, expected): | |
| 351 if curr_token[0] == category: | |
| 352 return next(tokens) | |
| 353 unexpected(curr_token, expected) | |
| 354 | |
| 355 def parse_kv(curr_token): | |
| 356 dct = defaultdict(list) | |
| 357 while curr_token[0] == 0: # keys | |
| 358 key = curr_token[1] | |
| 359 curr_token = next(tokens) | |
| 360 category = curr_token[0] | |
| 361 if category == 1 or category == 2: # reals or ints | |
| 362 value = curr_token[1] | |
| 363 curr_token = next(tokens) | |
| 364 elif category == 3: # strings | |
| 365 value = unescape(curr_token[1][1:-1]) | |
| 366 if destringizer: | |
| 367 try: | |
| 368 value = destringizer(value) | |
| 369 except ValueError: | |
| 370 pass | |
| 371 curr_token = next(tokens) | |
| 372 elif category == 4: # dict start | |
| 373 curr_token, value = parse_dict(curr_token) | |
| 374 else: | |
| 375 # Allow for string convertible id and label values | |
| 376 if key in ("id", "label", "source", "target"): | |
| 377 try: | |
| 378 # String convert the token value | |
| 379 value = unescape(str(curr_token[1])) | |
| 380 if destringizer: | |
| 381 try: | |
| 382 value = destringizer(value) | |
| 383 except ValueError: | |
| 384 pass | |
| 385 curr_token = next(tokens) | |
| 386 except Exception: | |
| 387 msg = "an int, float, string, '[' or string" + \ | |
| 388 " convertable ASCII value for node id or label" | |
| 389 unexpected(curr_token, msg) | |
| 390 else: # Otherwise error out | |
| 391 unexpected(curr_token, "an int, float, string or '['") | |
| 392 dct[key].append(value) | |
| 393 dct = {key: (value if not isinstance(value, list) or len(value) != 1 | |
| 394 else value[0]) for key, value in dct.items()} | |
| 395 return curr_token, dct | |
| 396 | |
| 397 def parse_dict(curr_token): | |
| 398 curr_token = consume(curr_token, 4, "'['") # dict start | |
| 399 curr_token, dct = parse_kv(curr_token) | |
| 400 curr_token = consume(curr_token, 5, "']'") # dict end | |
| 401 return curr_token, dct | |
| 402 | |
| 403 def parse_graph(): | |
| 404 curr_token, dct = parse_kv(next(tokens)) | |
| 405 if curr_token[0] is not None: # EOF | |
| 406 unexpected(curr_token, 'EOF') | |
| 407 if 'graph' not in dct: | |
| 408 raise NetworkXError('input contains no graph') | |
| 409 graph = dct['graph'] | |
| 410 if isinstance(graph, list): | |
| 411 raise NetworkXError('input contains more than one graph') | |
| 412 return graph | |
| 413 | |
| 414 tokens = tokenize() | |
| 415 graph = parse_graph() | |
| 416 | |
| 417 directed = graph.pop('directed', False) | |
| 418 multigraph = graph.pop('multigraph', False) | |
| 419 if not multigraph: | |
| 420 G = nx.DiGraph() if directed else nx.Graph() | |
| 421 else: | |
| 422 G = nx.MultiDiGraph() if directed else nx.MultiGraph() | |
| 423 G.graph.update((key, value) for key, value in graph.items() | |
| 424 if key != 'node' and key != 'edge') | |
| 425 | |
| 426 def pop_attr(dct, category, attr, i): | |
| 427 try: | |
| 428 return dct.pop(attr) | |
| 429 except KeyError: | |
| 430 raise NetworkXError( | |
| 431 "%s #%d has no '%s' attribute" % (category, i, attr)) | |
| 432 | |
| 433 nodes = graph.get('node', []) | |
| 434 mapping = {} | |
| 435 node_labels = set() | |
| 436 for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]): | |
| 437 id = pop_attr(node, 'node', 'id', i) | |
| 438 if id in G: | |
| 439 raise NetworkXError('node id %r is duplicated' % (id,)) | |
| 440 if label is not None and label != 'id': | |
| 441 node_label = pop_attr(node, 'node', label, i) | |
| 442 if node_label in node_labels: | |
| 443 raise NetworkXError('node label %r is duplicated' % | |
| 444 (node_label,)) | |
| 445 node_labels.add(node_label) | |
| 446 mapping[id] = node_label | |
| 447 G.add_node(id, **node) | |
| 448 | |
| 449 edges = graph.get('edge', []) | |
| 450 for i, edge in enumerate(edges if isinstance(edges, list) else [edges]): | |
| 451 source = pop_attr(edge, 'edge', 'source', i) | |
| 452 target = pop_attr(edge, 'edge', 'target', i) | |
| 453 if source not in G: | |
| 454 raise NetworkXError( | |
| 455 'edge #%d has an undefined source %r' % (i, source)) | |
| 456 if target not in G: | |
| 457 raise NetworkXError( | |
| 458 'edge #%d has an undefined target %r' % (i, target)) | |
| 459 if not multigraph: | |
| 460 if not G.has_edge(source, target): | |
| 461 G.add_edge(source, target, **edge) | |
| 462 else: | |
| 463 msg = "edge #%d (%r%s%r) is duplicated.\n" | |
| 464 msg2 = 'Hint: If multigraph add "multigraph 1" to file header.' | |
| 465 info = (i, source, '->' if directed else '--', target) | |
| 466 raise nx.NetworkXError((msg % info) + msg2) | |
| 467 else: | |
| 468 key = edge.pop('key', None) | |
| 469 if key is not None and G.has_edge(source, target, key): | |
| 470 raise nx.NetworkXError( | |
| 471 'edge #%d (%r%s%r, %r) is duplicated' % | |
| 472 (i, source, '->' if directed else '--', target, key)) | |
| 473 G.add_edge(source, target, key, **edge) | |
| 474 | |
| 475 if label is not None and label != 'id': | |
| 476 G = nx.relabel_nodes(G, mapping) | |
| 477 return G | |
| 478 | |
| 479 | |
| 480 def literal_stringizer(value): | |
| 481 """Convert a `value` to a Python literal in GML representation. | |
| 482 | |
| 483 Parameters | |
| 484 ---------- | |
| 485 value : object | |
| 486 The `value` to be converted to GML representation. | |
| 487 | |
| 488 Returns | |
| 489 ------- | |
| 490 rep : string | |
| 491 A double-quoted Python literal representing value. Unprintable | |
| 492 characters are replaced by XML character references. | |
| 493 | |
| 494 Raises | |
| 495 ------ | |
| 496 ValueError | |
| 497 If `value` cannot be converted to GML. | |
| 498 | |
| 499 Notes | |
| 500 ----- | |
| 501 `literal_stringizer` is largely the same as `repr` in terms of | |
| 502 functionality but attempts prefix `unicode` and `bytes` literals with | |
| 503 `u` and `b` to provide better interoperability of data generated by | |
| 504 Python 2 and Python 3. | |
| 505 | |
| 506 The original value can be recovered using the | |
| 507 :func:`networkx.readwrite.gml.literal_destringizer` function. | |
| 508 """ | |
| 509 def stringize(value): | |
| 510 if isinstance(value, (int, long, bool)) or value is None: | |
| 511 if value is True: # GML uses 1/0 for boolean values. | |
| 512 buf.write(str(1)) | |
| 513 elif value is False: | |
| 514 buf.write(str(0)) | |
| 515 else: | |
| 516 buf.write(str(value)) | |
| 517 elif isinstance(value, unicode): | |
| 518 text = repr(value) | |
| 519 if text[0] != 'u': | |
| 520 try: | |
| 521 value.encode('latin1') | |
| 522 except UnicodeEncodeError: | |
| 523 text = 'u' + text | |
| 524 buf.write(text) | |
| 525 elif isinstance(value, (float, complex, str, bytes)): | |
| 526 buf.write(repr(value)) | |
| 527 elif isinstance(value, list): | |
| 528 buf.write('[') | |
| 529 first = True | |
| 530 for item in value: | |
| 531 if not first: | |
| 532 buf.write(',') | |
| 533 else: | |
| 534 first = False | |
| 535 stringize(item) | |
| 536 buf.write(']') | |
| 537 elif isinstance(value, tuple): | |
| 538 if len(value) > 1: | |
| 539 buf.write('(') | |
| 540 first = True | |
| 541 for item in value: | |
| 542 if not first: | |
| 543 buf.write(',') | |
| 544 else: | |
| 545 first = False | |
| 546 stringize(item) | |
| 547 buf.write(')') | |
| 548 elif value: | |
| 549 buf.write('(') | |
| 550 stringize(value[0]) | |
| 551 buf.write(',)') | |
| 552 else: | |
| 553 buf.write('()') | |
| 554 elif isinstance(value, dict): | |
| 555 buf.write('{') | |
| 556 first = True | |
| 557 for key, value in value.items(): | |
| 558 if not first: | |
| 559 buf.write(',') | |
| 560 else: | |
| 561 first = False | |
| 562 stringize(key) | |
| 563 buf.write(':') | |
| 564 stringize(value) | |
| 565 buf.write('}') | |
| 566 elif isinstance(value, set): | |
| 567 buf.write('{') | |
| 568 first = True | |
| 569 for item in value: | |
| 570 if not first: | |
| 571 buf.write(',') | |
| 572 else: | |
| 573 first = False | |
| 574 stringize(item) | |
| 575 buf.write('}') | |
| 576 else: | |
| 577 raise ValueError( | |
| 578 '%r cannot be converted into a Python literal' % (value,)) | |
| 579 | |
| 580 buf = StringIO() | |
| 581 stringize(value) | |
| 582 return buf.getvalue() | |
| 583 | |
| 584 | |
| 585 def generate_gml(G, stringizer=None): | |
| 586 r"""Generate a single entry of the graph `G` in GML format. | |
| 587 | |
| 588 Parameters | |
| 589 ---------- | |
| 590 G : NetworkX graph | |
| 591 The graph to be converted to GML. | |
| 592 | |
| 593 stringizer : callable, optional | |
| 594 A `stringizer` which converts non-int/non-float/non-dict values into | |
| 595 strings. If it cannot convert a value into a string, it should raise a | |
| 596 `ValueError` to indicate that. Default value: None. | |
| 597 | |
| 598 Returns | |
| 599 ------- | |
| 600 lines: generator of strings | |
| 601 Lines of GML data. Newlines are not appended. | |
| 602 | |
| 603 Raises | |
| 604 ------ | |
| 605 NetworkXError | |
| 606 If `stringizer` cannot convert a value into a string, or the value to | |
| 607 convert is not a string while `stringizer` is None. | |
| 608 | |
| 609 See Also | |
| 610 -------- | |
| 611 literal_stringizer | |
| 612 | |
| 613 Notes | |
| 614 ----- | |
| 615 Graph attributes named 'directed', 'multigraph', 'node' or | |
| 616 'edge', node attributes named 'id' or 'label', edge attributes | |
| 617 named 'source' or 'target' (or 'key' if `G` is a multigraph) | |
| 618 are ignored because these attribute names are used to encode the graph | |
| 619 structure. | |
| 620 | |
| 621 GML files are stored using a 7-bit ASCII encoding with any extended | |
| 622 ASCII characters (iso8859-1) appearing as HTML character entities. | |
| 623 Without specifying a `stringizer`/`destringizer`, the code is capable of | |
| 624 handling `int`/`float`/`str`/`dict`/`list` data as required by the GML | |
| 625 specification. For other data types, you need to explicitly supply a | |
| 626 `stringizer`/`destringizer`. | |
| 627 | |
| 628 For additional documentation on the GML file format, please see the | |
| 629 `GML url <http://www.infosun.fim.uni-passau.de/Graphlet/GML/gml-tr.html>`_. | |
| 630 | |
| 631 See the module docstring :mod:`networkx.readwrite.gml` for more details. | |
| 632 | |
| 633 Examples | |
| 634 -------- | |
| 635 >>> G = nx.Graph() | |
| 636 >>> G.add_node("1") | |
| 637 >>> print("\n".join(nx.generate_gml(G))) | |
| 638 graph [ | |
| 639 node [ | |
| 640 id 0 | |
| 641 label "1" | |
| 642 ] | |
| 643 ] | |
| 644 >>> G = nx.OrderedMultiGraph([("a", "b"), ("a", "b")]) | |
| 645 >>> print("\n".join(nx.generate_gml(G))) | |
| 646 graph [ | |
| 647 multigraph 1 | |
| 648 node [ | |
| 649 id 0 | |
| 650 label "a" | |
| 651 ] | |
| 652 node [ | |
| 653 id 1 | |
| 654 label "b" | |
| 655 ] | |
| 656 edge [ | |
| 657 source 0 | |
| 658 target 1 | |
| 659 key 0 | |
| 660 ] | |
| 661 edge [ | |
| 662 source 0 | |
| 663 target 1 | |
| 664 key 1 | |
| 665 ] | |
| 666 ] | |
| 667 """ | |
| 668 valid_keys = re.compile('^[A-Za-z][0-9A-Za-z]*$') | |
| 669 | |
| 670 def stringize(key, value, ignored_keys, indent, in_list=False): | |
| 671 if not isinstance(key, (str, unicode)): | |
| 672 raise NetworkXError('%r is not a string' % (key,)) | |
| 673 if not valid_keys.match(key): | |
| 674 raise NetworkXError('%r is not a valid key' % (key,)) | |
| 675 if not isinstance(key, str): | |
| 676 key = str(key) | |
| 677 if key not in ignored_keys: | |
| 678 if isinstance(value, (int, long, bool)): | |
| 679 if key == 'label': | |
| 680 yield indent + key + ' "' + str(value) + '"' | |
| 681 elif value is True: | |
| 682 # python bool is an instance of int | |
| 683 yield indent + key + ' 1' | |
| 684 elif value is False: | |
| 685 yield indent + key + ' 0' | |
| 686 # GML only supports signed 32-bit integers | |
| 687 elif value < -2**31 or value >= 2**31: | |
| 688 yield indent + key + ' "' + str(value) + '"' | |
| 689 else: | |
| 690 yield indent + key + ' ' + str(value) | |
| 691 elif isinstance(value, float): | |
| 692 text = repr(value).upper() | |
| 693 # GML requires that a real literal contain a decimal point, but | |
| 694 # repr may not output a decimal point when the mantissa is | |
| 695 # integral and hence needs fixing. | |
| 696 epos = text.rfind('E') | |
| 697 if epos != -1 and text.find('.', 0, epos) == -1: | |
| 698 text = text[:epos] + '.' + text[epos:] | |
| 699 if key == 'label': | |
| 700 yield indent + key + ' "' + text + '"' | |
| 701 else: | |
| 702 yield indent + key + ' ' + text | |
| 703 elif isinstance(value, dict): | |
| 704 yield indent + key + ' [' | |
| 705 next_indent = indent + ' ' | |
| 706 for key, value in value.items(): | |
| 707 for line in stringize(key, value, (), next_indent): | |
| 708 yield line | |
| 709 yield indent + ']' | |
| 710 elif isinstance(value, (list, tuple)) and key != 'label' \ | |
| 711 and value and not in_list: | |
| 712 next_indent = indent + ' ' | |
| 713 for val in value: | |
| 714 for line in stringize(key, val, (), next_indent, True): | |
| 715 yield line | |
| 716 else: | |
| 717 if stringizer: | |
| 718 try: | |
| 719 value = stringizer(value) | |
| 720 except ValueError: | |
| 721 raise NetworkXError( | |
| 722 '%r cannot be converted into a string' % (value,)) | |
| 723 if not isinstance(value, (str, unicode)): | |
| 724 raise NetworkXError('%r is not a string' % (value,)) | |
| 725 yield indent + key + ' "' + escape(value) + '"' | |
| 726 | |
| 727 multigraph = G.is_multigraph() | |
| 728 yield 'graph [' | |
| 729 | |
| 730 # Output graph attributes | |
| 731 if G.is_directed(): | |
| 732 yield ' directed 1' | |
| 733 if multigraph: | |
| 734 yield ' multigraph 1' | |
| 735 ignored_keys = {'directed', 'multigraph', 'node', 'edge'} | |
| 736 for attr, value in G.graph.items(): | |
| 737 for line in stringize(attr, value, ignored_keys, ' '): | |
| 738 yield line | |
| 739 | |
| 740 # Output node data | |
| 741 node_id = dict(zip(G, range(len(G)))) | |
| 742 ignored_keys = {'id', 'label'} | |
| 743 for node, attrs in G.nodes.items(): | |
| 744 yield ' node [' | |
| 745 yield ' id ' + str(node_id[node]) | |
| 746 for line in stringize('label', node, (), ' '): | |
| 747 yield line | |
| 748 for attr, value in attrs.items(): | |
| 749 for line in stringize(attr, value, ignored_keys, ' '): | |
| 750 yield line | |
| 751 yield ' ]' | |
| 752 | |
| 753 # Output edge data | |
| 754 ignored_keys = {'source', 'target'} | |
| 755 kwargs = {'data': True} | |
| 756 if multigraph: | |
| 757 ignored_keys.add('key') | |
| 758 kwargs['keys'] = True | |
| 759 for e in G.edges(**kwargs): | |
| 760 yield ' edge [' | |
| 761 yield ' source ' + str(node_id[e[0]]) | |
| 762 yield ' target ' + str(node_id[e[1]]) | |
| 763 if multigraph: | |
| 764 for line in stringize('key', e[2], (), ' '): | |
| 765 yield line | |
| 766 for attr, value in e[-1].items(): | |
| 767 for line in stringize(attr, value, ignored_keys, ' '): | |
| 768 yield line | |
| 769 yield ' ]' | |
| 770 yield ']' | |
| 771 | |
| 772 | |
| 773 @open_file(1, mode='wb') | |
| 774 def write_gml(G, path, stringizer=None): | |
| 775 """Write a graph `G` in GML format to the file or file handle `path`. | |
| 776 | |
| 777 Parameters | |
| 778 ---------- | |
| 779 G : NetworkX graph | |
| 780 The graph to be converted to GML. | |
| 781 | |
| 782 path : filename or filehandle | |
| 783 The filename or filehandle to write. Files whose names end with .gz or | |
| 784 .bz2 will be compressed. | |
| 785 | |
| 786 stringizer : callable, optional | |
| 787 A `stringizer` which converts non-int/non-float/non-dict values into | |
| 788 strings. If it cannot convert a value into a string, it should raise a | |
| 789 `ValueError` to indicate that. Default value: None. | |
| 790 | |
| 791 Raises | |
| 792 ------ | |
| 793 NetworkXError | |
| 794 If `stringizer` cannot convert a value into a string, or the value to | |
| 795 convert is not a string while `stringizer` is None. | |
| 796 | |
| 797 See Also | |
| 798 -------- | |
| 799 read_gml, generate_gml, literal_stringizer | |
| 800 | |
| 801 Notes | |
| 802 ----- | |
| 803 Graph attributes named 'directed', 'multigraph', 'node' or | |
| 804 'edge', node attributes named 'id' or 'label', edge attributes | |
| 805 named 'source' or 'target' (or 'key' if `G` is a multigraph) | |
| 806 are ignored because these attribute names are used to encode the graph | |
| 807 structure. | |
| 808 | |
| 809 GML files are stored using a 7-bit ASCII encoding with any extended | |
| 810 ASCII characters (iso8859-1) appearing as HTML character entities. | |
| 811 Without specifying a `stringizer`/`destringizer`, the code is capable of | |
| 812 handling `int`/`float`/`str`/`dict`/`list` data as required by the GML | |
| 813 specification. For other data types, you need to explicitly supply a | |
| 814 `stringizer`/`destringizer`. | |
| 815 | |
| 816 Note that while we allow non-standard GML to be read from a file, we make | |
| 817 sure to write GML format. In particular, underscores are not allowed in | |
| 818 attribute names. | |
| 819 For additional documentation on the GML file format, please see the | |
| 820 `GML url <http://www.infosun.fim.uni-passau.de/Graphlet/GML/gml-tr.html>`_. | |
| 821 | |
| 822 See the module docstring :mod:`networkx.readwrite.gml` for more details. | |
| 823 | |
| 824 Examples | |
| 825 -------- | |
| 826 >>> G = nx.path_graph(4) | |
| 827 >>> nx.write_gml(G, "test.gml") | |
| 828 | |
| 829 Filenames ending in .gz or .bz2 will be compressed. | |
| 830 | |
| 831 >>> nx.write_gml(G, "test.gml.gz") | |
| 832 """ | |
| 833 for line in generate_gml(G, stringizer): | |
| 834 path.write((line + '\n').encode('ascii')) | |
| 835 | |
| 836 | |
| 837 # fixture for pytest | |
| 838 def teardown_module(module): | |
| 839 import os | |
| 840 for fname in ['test.gml', 'test.gml.gz']: | |
| 841 if os.path.isfile(fname): | |
| 842 os.unlink(fname) |
