Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/yaml/emitter.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 | |
| 2 # Emitter expects events obeying the following grammar: | |
| 3 # stream ::= STREAM-START document* STREAM-END | |
| 4 # document ::= DOCUMENT-START node DOCUMENT-END | |
| 5 # node ::= SCALAR | sequence | mapping | |
| 6 # sequence ::= SEQUENCE-START node* SEQUENCE-END | |
| 7 # mapping ::= MAPPING-START (node node)* MAPPING-END | |
| 8 | |
| 9 __all__ = ['Emitter', 'EmitterError'] | |
| 10 | |
| 11 from .error import YAMLError | |
| 12 from .events import * | |
| 13 | |
| 14 class EmitterError(YAMLError): | |
| 15 pass | |
| 16 | |
| 17 class ScalarAnalysis: | |
| 18 def __init__(self, scalar, empty, multiline, | |
| 19 allow_flow_plain, allow_block_plain, | |
| 20 allow_single_quoted, allow_double_quoted, | |
| 21 allow_block): | |
| 22 self.scalar = scalar | |
| 23 self.empty = empty | |
| 24 self.multiline = multiline | |
| 25 self.allow_flow_plain = allow_flow_plain | |
| 26 self.allow_block_plain = allow_block_plain | |
| 27 self.allow_single_quoted = allow_single_quoted | |
| 28 self.allow_double_quoted = allow_double_quoted | |
| 29 self.allow_block = allow_block | |
| 30 | |
| 31 class Emitter: | |
| 32 | |
| 33 DEFAULT_TAG_PREFIXES = { | |
| 34 '!' : '!', | |
| 35 'tag:yaml.org,2002:' : '!!', | |
| 36 } | |
| 37 | |
| 38 def __init__(self, stream, canonical=None, indent=None, width=None, | |
| 39 allow_unicode=None, line_break=None): | |
| 40 | |
| 41 # The stream should have the methods `write` and possibly `flush`. | |
| 42 self.stream = stream | |
| 43 | |
| 44 # Encoding can be overridden by STREAM-START. | |
| 45 self.encoding = None | |
| 46 | |
| 47 # Emitter is a state machine with a stack of states to handle nested | |
| 48 # structures. | |
| 49 self.states = [] | |
| 50 self.state = self.expect_stream_start | |
| 51 | |
| 52 # Current event and the event queue. | |
| 53 self.events = [] | |
| 54 self.event = None | |
| 55 | |
| 56 # The current indentation level and the stack of previous indents. | |
| 57 self.indents = [] | |
| 58 self.indent = None | |
| 59 | |
| 60 # Flow level. | |
| 61 self.flow_level = 0 | |
| 62 | |
| 63 # Contexts. | |
| 64 self.root_context = False | |
| 65 self.sequence_context = False | |
| 66 self.mapping_context = False | |
| 67 self.simple_key_context = False | |
| 68 | |
| 69 # Characteristics of the last emitted character: | |
| 70 # - current position. | |
| 71 # - is it a whitespace? | |
| 72 # - is it an indention character | |
| 73 # (indentation space, '-', '?', or ':')? | |
| 74 self.line = 0 | |
| 75 self.column = 0 | |
| 76 self.whitespace = True | |
| 77 self.indention = True | |
| 78 | |
| 79 # Whether the document requires an explicit document indicator | |
| 80 self.open_ended = False | |
| 81 | |
| 82 # Formatting details. | |
| 83 self.canonical = canonical | |
| 84 self.allow_unicode = allow_unicode | |
| 85 self.best_indent = 2 | |
| 86 if indent and 1 < indent < 10: | |
| 87 self.best_indent = indent | |
| 88 self.best_width = 80 | |
| 89 if width and width > self.best_indent*2: | |
| 90 self.best_width = width | |
| 91 self.best_line_break = '\n' | |
| 92 if line_break in ['\r', '\n', '\r\n']: | |
| 93 self.best_line_break = line_break | |
| 94 | |
| 95 # Tag prefixes. | |
| 96 self.tag_prefixes = None | |
| 97 | |
| 98 # Prepared anchor and tag. | |
| 99 self.prepared_anchor = None | |
| 100 self.prepared_tag = None | |
| 101 | |
| 102 # Scalar analysis and style. | |
| 103 self.analysis = None | |
| 104 self.style = None | |
| 105 | |
| 106 def dispose(self): | |
| 107 # Reset the state attributes (to clear self-references) | |
| 108 self.states = [] | |
| 109 self.state = None | |
| 110 | |
| 111 def emit(self, event): | |
| 112 self.events.append(event) | |
| 113 while not self.need_more_events(): | |
| 114 self.event = self.events.pop(0) | |
| 115 self.state() | |
| 116 self.event = None | |
| 117 | |
| 118 # In some cases, we wait for a few next events before emitting. | |
| 119 | |
| 120 def need_more_events(self): | |
| 121 if not self.events: | |
| 122 return True | |
| 123 event = self.events[0] | |
| 124 if isinstance(event, DocumentStartEvent): | |
| 125 return self.need_events(1) | |
| 126 elif isinstance(event, SequenceStartEvent): | |
| 127 return self.need_events(2) | |
| 128 elif isinstance(event, MappingStartEvent): | |
| 129 return self.need_events(3) | |
| 130 else: | |
| 131 return False | |
| 132 | |
| 133 def need_events(self, count): | |
| 134 level = 0 | |
| 135 for event in self.events[1:]: | |
| 136 if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): | |
| 137 level += 1 | |
| 138 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): | |
| 139 level -= 1 | |
| 140 elif isinstance(event, StreamEndEvent): | |
| 141 level = -1 | |
| 142 if level < 0: | |
| 143 return False | |
| 144 return (len(self.events) < count+1) | |
| 145 | |
| 146 def increase_indent(self, flow=False, indentless=False): | |
| 147 self.indents.append(self.indent) | |
| 148 if self.indent is None: | |
| 149 if flow: | |
| 150 self.indent = self.best_indent | |
| 151 else: | |
| 152 self.indent = 0 | |
| 153 elif not indentless: | |
| 154 self.indent += self.best_indent | |
| 155 | |
| 156 # States. | |
| 157 | |
| 158 # Stream handlers. | |
| 159 | |
| 160 def expect_stream_start(self): | |
| 161 if isinstance(self.event, StreamStartEvent): | |
| 162 if self.event.encoding and not hasattr(self.stream, 'encoding'): | |
| 163 self.encoding = self.event.encoding | |
| 164 self.write_stream_start() | |
| 165 self.state = self.expect_first_document_start | |
| 166 else: | |
| 167 raise EmitterError("expected StreamStartEvent, but got %s" | |
| 168 % self.event) | |
| 169 | |
| 170 def expect_nothing(self): | |
| 171 raise EmitterError("expected nothing, but got %s" % self.event) | |
| 172 | |
| 173 # Document handlers. | |
| 174 | |
| 175 def expect_first_document_start(self): | |
| 176 return self.expect_document_start(first=True) | |
| 177 | |
| 178 def expect_document_start(self, first=False): | |
| 179 if isinstance(self.event, DocumentStartEvent): | |
| 180 if (self.event.version or self.event.tags) and self.open_ended: | |
| 181 self.write_indicator('...', True) | |
| 182 self.write_indent() | |
| 183 if self.event.version: | |
| 184 version_text = self.prepare_version(self.event.version) | |
| 185 self.write_version_directive(version_text) | |
| 186 self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() | |
| 187 if self.event.tags: | |
| 188 handles = sorted(self.event.tags.keys()) | |
| 189 for handle in handles: | |
| 190 prefix = self.event.tags[handle] | |
| 191 self.tag_prefixes[prefix] = handle | |
| 192 handle_text = self.prepare_tag_handle(handle) | |
| 193 prefix_text = self.prepare_tag_prefix(prefix) | |
| 194 self.write_tag_directive(handle_text, prefix_text) | |
| 195 implicit = (first and not self.event.explicit and not self.canonical | |
| 196 and not self.event.version and not self.event.tags | |
| 197 and not self.check_empty_document()) | |
| 198 if not implicit: | |
| 199 self.write_indent() | |
| 200 self.write_indicator('---', True) | |
| 201 if self.canonical: | |
| 202 self.write_indent() | |
| 203 self.state = self.expect_document_root | |
| 204 elif isinstance(self.event, StreamEndEvent): | |
| 205 if self.open_ended: | |
| 206 self.write_indicator('...', True) | |
| 207 self.write_indent() | |
| 208 self.write_stream_end() | |
| 209 self.state = self.expect_nothing | |
| 210 else: | |
| 211 raise EmitterError("expected DocumentStartEvent, but got %s" | |
| 212 % self.event) | |
| 213 | |
| 214 def expect_document_end(self): | |
| 215 if isinstance(self.event, DocumentEndEvent): | |
| 216 self.write_indent() | |
| 217 if self.event.explicit: | |
| 218 self.write_indicator('...', True) | |
| 219 self.write_indent() | |
| 220 self.flush_stream() | |
| 221 self.state = self.expect_document_start | |
| 222 else: | |
| 223 raise EmitterError("expected DocumentEndEvent, but got %s" | |
| 224 % self.event) | |
| 225 | |
| 226 def expect_document_root(self): | |
| 227 self.states.append(self.expect_document_end) | |
| 228 self.expect_node(root=True) | |
| 229 | |
| 230 # Node handlers. | |
| 231 | |
| 232 def expect_node(self, root=False, sequence=False, mapping=False, | |
| 233 simple_key=False): | |
| 234 self.root_context = root | |
| 235 self.sequence_context = sequence | |
| 236 self.mapping_context = mapping | |
| 237 self.simple_key_context = simple_key | |
| 238 if isinstance(self.event, AliasEvent): | |
| 239 self.expect_alias() | |
| 240 elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): | |
| 241 self.process_anchor('&') | |
| 242 self.process_tag() | |
| 243 if isinstance(self.event, ScalarEvent): | |
| 244 self.expect_scalar() | |
| 245 elif isinstance(self.event, SequenceStartEvent): | |
| 246 if self.flow_level or self.canonical or self.event.flow_style \ | |
| 247 or self.check_empty_sequence(): | |
| 248 self.expect_flow_sequence() | |
| 249 else: | |
| 250 self.expect_block_sequence() | |
| 251 elif isinstance(self.event, MappingStartEvent): | |
| 252 if self.flow_level or self.canonical or self.event.flow_style \ | |
| 253 or self.check_empty_mapping(): | |
| 254 self.expect_flow_mapping() | |
| 255 else: | |
| 256 self.expect_block_mapping() | |
| 257 else: | |
| 258 raise EmitterError("expected NodeEvent, but got %s" % self.event) | |
| 259 | |
| 260 def expect_alias(self): | |
| 261 if self.event.anchor is None: | |
| 262 raise EmitterError("anchor is not specified for alias") | |
| 263 self.process_anchor('*') | |
| 264 self.state = self.states.pop() | |
| 265 | |
| 266 def expect_scalar(self): | |
| 267 self.increase_indent(flow=True) | |
| 268 self.process_scalar() | |
| 269 self.indent = self.indents.pop() | |
| 270 self.state = self.states.pop() | |
| 271 | |
| 272 # Flow sequence handlers. | |
| 273 | |
| 274 def expect_flow_sequence(self): | |
| 275 self.write_indicator('[', True, whitespace=True) | |
| 276 self.flow_level += 1 | |
| 277 self.increase_indent(flow=True) | |
| 278 self.state = self.expect_first_flow_sequence_item | |
| 279 | |
| 280 def expect_first_flow_sequence_item(self): | |
| 281 if isinstance(self.event, SequenceEndEvent): | |
| 282 self.indent = self.indents.pop() | |
| 283 self.flow_level -= 1 | |
| 284 self.write_indicator(']', False) | |
| 285 self.state = self.states.pop() | |
| 286 else: | |
| 287 if self.canonical or self.column > self.best_width: | |
| 288 self.write_indent() | |
| 289 self.states.append(self.expect_flow_sequence_item) | |
| 290 self.expect_node(sequence=True) | |
| 291 | |
| 292 def expect_flow_sequence_item(self): | |
| 293 if isinstance(self.event, SequenceEndEvent): | |
| 294 self.indent = self.indents.pop() | |
| 295 self.flow_level -= 1 | |
| 296 if self.canonical: | |
| 297 self.write_indicator(',', False) | |
| 298 self.write_indent() | |
| 299 self.write_indicator(']', False) | |
| 300 self.state = self.states.pop() | |
| 301 else: | |
| 302 self.write_indicator(',', False) | |
| 303 if self.canonical or self.column > self.best_width: | |
| 304 self.write_indent() | |
| 305 self.states.append(self.expect_flow_sequence_item) | |
| 306 self.expect_node(sequence=True) | |
| 307 | |
| 308 # Flow mapping handlers. | |
| 309 | |
| 310 def expect_flow_mapping(self): | |
| 311 self.write_indicator('{', True, whitespace=True) | |
| 312 self.flow_level += 1 | |
| 313 self.increase_indent(flow=True) | |
| 314 self.state = self.expect_first_flow_mapping_key | |
| 315 | |
| 316 def expect_first_flow_mapping_key(self): | |
| 317 if isinstance(self.event, MappingEndEvent): | |
| 318 self.indent = self.indents.pop() | |
| 319 self.flow_level -= 1 | |
| 320 self.write_indicator('}', False) | |
| 321 self.state = self.states.pop() | |
| 322 else: | |
| 323 if self.canonical or self.column > self.best_width: | |
| 324 self.write_indent() | |
| 325 if not self.canonical and self.check_simple_key(): | |
| 326 self.states.append(self.expect_flow_mapping_simple_value) | |
| 327 self.expect_node(mapping=True, simple_key=True) | |
| 328 else: | |
| 329 self.write_indicator('?', True) | |
| 330 self.states.append(self.expect_flow_mapping_value) | |
| 331 self.expect_node(mapping=True) | |
| 332 | |
| 333 def expect_flow_mapping_key(self): | |
| 334 if isinstance(self.event, MappingEndEvent): | |
| 335 self.indent = self.indents.pop() | |
| 336 self.flow_level -= 1 | |
| 337 if self.canonical: | |
| 338 self.write_indicator(',', False) | |
| 339 self.write_indent() | |
| 340 self.write_indicator('}', False) | |
| 341 self.state = self.states.pop() | |
| 342 else: | |
| 343 self.write_indicator(',', False) | |
| 344 if self.canonical or self.column > self.best_width: | |
| 345 self.write_indent() | |
| 346 if not self.canonical and self.check_simple_key(): | |
| 347 self.states.append(self.expect_flow_mapping_simple_value) | |
| 348 self.expect_node(mapping=True, simple_key=True) | |
| 349 else: | |
| 350 self.write_indicator('?', True) | |
| 351 self.states.append(self.expect_flow_mapping_value) | |
| 352 self.expect_node(mapping=True) | |
| 353 | |
| 354 def expect_flow_mapping_simple_value(self): | |
| 355 self.write_indicator(':', False) | |
| 356 self.states.append(self.expect_flow_mapping_key) | |
| 357 self.expect_node(mapping=True) | |
| 358 | |
| 359 def expect_flow_mapping_value(self): | |
| 360 if self.canonical or self.column > self.best_width: | |
| 361 self.write_indent() | |
| 362 self.write_indicator(':', True) | |
| 363 self.states.append(self.expect_flow_mapping_key) | |
| 364 self.expect_node(mapping=True) | |
| 365 | |
| 366 # Block sequence handlers. | |
| 367 | |
| 368 def expect_block_sequence(self): | |
| 369 indentless = (self.mapping_context and not self.indention) | |
| 370 self.increase_indent(flow=False, indentless=indentless) | |
| 371 self.state = self.expect_first_block_sequence_item | |
| 372 | |
| 373 def expect_first_block_sequence_item(self): | |
| 374 return self.expect_block_sequence_item(first=True) | |
| 375 | |
| 376 def expect_block_sequence_item(self, first=False): | |
| 377 if not first and isinstance(self.event, SequenceEndEvent): | |
| 378 self.indent = self.indents.pop() | |
| 379 self.state = self.states.pop() | |
| 380 else: | |
| 381 self.write_indent() | |
| 382 self.write_indicator('-', True, indention=True) | |
| 383 self.states.append(self.expect_block_sequence_item) | |
| 384 self.expect_node(sequence=True) | |
| 385 | |
| 386 # Block mapping handlers. | |
| 387 | |
| 388 def expect_block_mapping(self): | |
| 389 self.increase_indent(flow=False) | |
| 390 self.state = self.expect_first_block_mapping_key | |
| 391 | |
| 392 def expect_first_block_mapping_key(self): | |
| 393 return self.expect_block_mapping_key(first=True) | |
| 394 | |
| 395 def expect_block_mapping_key(self, first=False): | |
| 396 if not first and isinstance(self.event, MappingEndEvent): | |
| 397 self.indent = self.indents.pop() | |
| 398 self.state = self.states.pop() | |
| 399 else: | |
| 400 self.write_indent() | |
| 401 if self.check_simple_key(): | |
| 402 self.states.append(self.expect_block_mapping_simple_value) | |
| 403 self.expect_node(mapping=True, simple_key=True) | |
| 404 else: | |
| 405 self.write_indicator('?', True, indention=True) | |
| 406 self.states.append(self.expect_block_mapping_value) | |
| 407 self.expect_node(mapping=True) | |
| 408 | |
| 409 def expect_block_mapping_simple_value(self): | |
| 410 self.write_indicator(':', False) | |
| 411 self.states.append(self.expect_block_mapping_key) | |
| 412 self.expect_node(mapping=True) | |
| 413 | |
| 414 def expect_block_mapping_value(self): | |
| 415 self.write_indent() | |
| 416 self.write_indicator(':', True, indention=True) | |
| 417 self.states.append(self.expect_block_mapping_key) | |
| 418 self.expect_node(mapping=True) | |
| 419 | |
| 420 # Checkers. | |
| 421 | |
| 422 def check_empty_sequence(self): | |
| 423 return (isinstance(self.event, SequenceStartEvent) and self.events | |
| 424 and isinstance(self.events[0], SequenceEndEvent)) | |
| 425 | |
| 426 def check_empty_mapping(self): | |
| 427 return (isinstance(self.event, MappingStartEvent) and self.events | |
| 428 and isinstance(self.events[0], MappingEndEvent)) | |
| 429 | |
| 430 def check_empty_document(self): | |
| 431 if not isinstance(self.event, DocumentStartEvent) or not self.events: | |
| 432 return False | |
| 433 event = self.events[0] | |
| 434 return (isinstance(event, ScalarEvent) and event.anchor is None | |
| 435 and event.tag is None and event.implicit and event.value == '') | |
| 436 | |
| 437 def check_simple_key(self): | |
| 438 length = 0 | |
| 439 if isinstance(self.event, NodeEvent) and self.event.anchor is not None: | |
| 440 if self.prepared_anchor is None: | |
| 441 self.prepared_anchor = self.prepare_anchor(self.event.anchor) | |
| 442 length += len(self.prepared_anchor) | |
| 443 if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ | |
| 444 and self.event.tag is not None: | |
| 445 if self.prepared_tag is None: | |
| 446 self.prepared_tag = self.prepare_tag(self.event.tag) | |
| 447 length += len(self.prepared_tag) | |
| 448 if isinstance(self.event, ScalarEvent): | |
| 449 if self.analysis is None: | |
| 450 self.analysis = self.analyze_scalar(self.event.value) | |
| 451 length += len(self.analysis.scalar) | |
| 452 return (length < 128 and (isinstance(self.event, AliasEvent) | |
| 453 or (isinstance(self.event, ScalarEvent) | |
| 454 and not self.analysis.empty and not self.analysis.multiline) | |
| 455 or self.check_empty_sequence() or self.check_empty_mapping())) | |
| 456 | |
| 457 # Anchor, Tag, and Scalar processors. | |
| 458 | |
| 459 def process_anchor(self, indicator): | |
| 460 if self.event.anchor is None: | |
| 461 self.prepared_anchor = None | |
| 462 return | |
| 463 if self.prepared_anchor is None: | |
| 464 self.prepared_anchor = self.prepare_anchor(self.event.anchor) | |
| 465 if self.prepared_anchor: | |
| 466 self.write_indicator(indicator+self.prepared_anchor, True) | |
| 467 self.prepared_anchor = None | |
| 468 | |
| 469 def process_tag(self): | |
| 470 tag = self.event.tag | |
| 471 if isinstance(self.event, ScalarEvent): | |
| 472 if self.style is None: | |
| 473 self.style = self.choose_scalar_style() | |
| 474 if ((not self.canonical or tag is None) and | |
| 475 ((self.style == '' and self.event.implicit[0]) | |
| 476 or (self.style != '' and self.event.implicit[1]))): | |
| 477 self.prepared_tag = None | |
| 478 return | |
| 479 if self.event.implicit[0] and tag is None: | |
| 480 tag = '!' | |
| 481 self.prepared_tag = None | |
| 482 else: | |
| 483 if (not self.canonical or tag is None) and self.event.implicit: | |
| 484 self.prepared_tag = None | |
| 485 return | |
| 486 if tag is None: | |
| 487 raise EmitterError("tag is not specified") | |
| 488 if self.prepared_tag is None: | |
| 489 self.prepared_tag = self.prepare_tag(tag) | |
| 490 if self.prepared_tag: | |
| 491 self.write_indicator(self.prepared_tag, True) | |
| 492 self.prepared_tag = None | |
| 493 | |
| 494 def choose_scalar_style(self): | |
| 495 if self.analysis is None: | |
| 496 self.analysis = self.analyze_scalar(self.event.value) | |
| 497 if self.event.style == '"' or self.canonical: | |
| 498 return '"' | |
| 499 if not self.event.style and self.event.implicit[0]: | |
| 500 if (not (self.simple_key_context and | |
| 501 (self.analysis.empty or self.analysis.multiline)) | |
| 502 and (self.flow_level and self.analysis.allow_flow_plain | |
| 503 or (not self.flow_level and self.analysis.allow_block_plain))): | |
| 504 return '' | |
| 505 if self.event.style and self.event.style in '|>': | |
| 506 if (not self.flow_level and not self.simple_key_context | |
| 507 and self.analysis.allow_block): | |
| 508 return self.event.style | |
| 509 if not self.event.style or self.event.style == '\'': | |
| 510 if (self.analysis.allow_single_quoted and | |
| 511 not (self.simple_key_context and self.analysis.multiline)): | |
| 512 return '\'' | |
| 513 return '"' | |
| 514 | |
| 515 def process_scalar(self): | |
| 516 if self.analysis is None: | |
| 517 self.analysis = self.analyze_scalar(self.event.value) | |
| 518 if self.style is None: | |
| 519 self.style = self.choose_scalar_style() | |
| 520 split = (not self.simple_key_context) | |
| 521 #if self.analysis.multiline and split \ | |
| 522 # and (not self.style or self.style in '\'\"'): | |
| 523 # self.write_indent() | |
| 524 if self.style == '"': | |
| 525 self.write_double_quoted(self.analysis.scalar, split) | |
| 526 elif self.style == '\'': | |
| 527 self.write_single_quoted(self.analysis.scalar, split) | |
| 528 elif self.style == '>': | |
| 529 self.write_folded(self.analysis.scalar) | |
| 530 elif self.style == '|': | |
| 531 self.write_literal(self.analysis.scalar) | |
| 532 else: | |
| 533 self.write_plain(self.analysis.scalar, split) | |
| 534 self.analysis = None | |
| 535 self.style = None | |
| 536 | |
| 537 # Analyzers. | |
| 538 | |
| 539 def prepare_version(self, version): | |
| 540 major, minor = version | |
| 541 if major != 1: | |
| 542 raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) | |
| 543 return '%d.%d' % (major, minor) | |
| 544 | |
| 545 def prepare_tag_handle(self, handle): | |
| 546 if not handle: | |
| 547 raise EmitterError("tag handle must not be empty") | |
| 548 if handle[0] != '!' or handle[-1] != '!': | |
| 549 raise EmitterError("tag handle must start and end with '!': %r" % handle) | |
| 550 for ch in handle[1:-1]: | |
| 551 if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ | |
| 552 or ch in '-_'): | |
| 553 raise EmitterError("invalid character %r in the tag handle: %r" | |
| 554 % (ch, handle)) | |
| 555 return handle | |
| 556 | |
| 557 def prepare_tag_prefix(self, prefix): | |
| 558 if not prefix: | |
| 559 raise EmitterError("tag prefix must not be empty") | |
| 560 chunks = [] | |
| 561 start = end = 0 | |
| 562 if prefix[0] == '!': | |
| 563 end = 1 | |
| 564 while end < len(prefix): | |
| 565 ch = prefix[end] | |
| 566 if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ | |
| 567 or ch in '-;/?!:@&=+$,_.~*\'()[]': | |
| 568 end += 1 | |
| 569 else: | |
| 570 if start < end: | |
| 571 chunks.append(prefix[start:end]) | |
| 572 start = end = end+1 | |
| 573 data = ch.encode('utf-8') | |
| 574 for ch in data: | |
| 575 chunks.append('%%%02X' % ord(ch)) | |
| 576 if start < end: | |
| 577 chunks.append(prefix[start:end]) | |
| 578 return ''.join(chunks) | |
| 579 | |
| 580 def prepare_tag(self, tag): | |
| 581 if not tag: | |
| 582 raise EmitterError("tag must not be empty") | |
| 583 if tag == '!': | |
| 584 return tag | |
| 585 handle = None | |
| 586 suffix = tag | |
| 587 prefixes = sorted(self.tag_prefixes.keys()) | |
| 588 for prefix in prefixes: | |
| 589 if tag.startswith(prefix) \ | |
| 590 and (prefix == '!' or len(prefix) < len(tag)): | |
| 591 handle = self.tag_prefixes[prefix] | |
| 592 suffix = tag[len(prefix):] | |
| 593 chunks = [] | |
| 594 start = end = 0 | |
| 595 while end < len(suffix): | |
| 596 ch = suffix[end] | |
| 597 if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ | |
| 598 or ch in '-;/?:@&=+$,_.~*\'()[]' \ | |
| 599 or (ch == '!' and handle != '!'): | |
| 600 end += 1 | |
| 601 else: | |
| 602 if start < end: | |
| 603 chunks.append(suffix[start:end]) | |
| 604 start = end = end+1 | |
| 605 data = ch.encode('utf-8') | |
| 606 for ch in data: | |
| 607 chunks.append('%%%02X' % ch) | |
| 608 if start < end: | |
| 609 chunks.append(suffix[start:end]) | |
| 610 suffix_text = ''.join(chunks) | |
| 611 if handle: | |
| 612 return '%s%s' % (handle, suffix_text) | |
| 613 else: | |
| 614 return '!<%s>' % suffix_text | |
| 615 | |
| 616 def prepare_anchor(self, anchor): | |
| 617 if not anchor: | |
| 618 raise EmitterError("anchor must not be empty") | |
| 619 for ch in anchor: | |
| 620 if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ | |
| 621 or ch in '-_'): | |
| 622 raise EmitterError("invalid character %r in the anchor: %r" | |
| 623 % (ch, anchor)) | |
| 624 return anchor | |
| 625 | |
| 626 def analyze_scalar(self, scalar): | |
| 627 | |
| 628 # Empty scalar is a special case. | |
| 629 if not scalar: | |
| 630 return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, | |
| 631 allow_flow_plain=False, allow_block_plain=True, | |
| 632 allow_single_quoted=True, allow_double_quoted=True, | |
| 633 allow_block=False) | |
| 634 | |
| 635 # Indicators and special characters. | |
| 636 block_indicators = False | |
| 637 flow_indicators = False | |
| 638 line_breaks = False | |
| 639 special_characters = False | |
| 640 | |
| 641 # Important whitespace combinations. | |
| 642 leading_space = False | |
| 643 leading_break = False | |
| 644 trailing_space = False | |
| 645 trailing_break = False | |
| 646 break_space = False | |
| 647 space_break = False | |
| 648 | |
| 649 # Check document indicators. | |
| 650 if scalar.startswith('---') or scalar.startswith('...'): | |
| 651 block_indicators = True | |
| 652 flow_indicators = True | |
| 653 | |
| 654 # First character or preceded by a whitespace. | |
| 655 preceded_by_whitespace = True | |
| 656 | |
| 657 # Last character or followed by a whitespace. | |
| 658 followed_by_whitespace = (len(scalar) == 1 or | |
| 659 scalar[1] in '\0 \t\r\n\x85\u2028\u2029') | |
| 660 | |
| 661 # The previous character is a space. | |
| 662 previous_space = False | |
| 663 | |
| 664 # The previous character is a break. | |
| 665 previous_break = False | |
| 666 | |
| 667 index = 0 | |
| 668 while index < len(scalar): | |
| 669 ch = scalar[index] | |
| 670 | |
| 671 # Check for indicators. | |
| 672 if index == 0: | |
| 673 # Leading indicators are special characters. | |
| 674 if ch in '#,[]{}&*!|>\'\"%@`': | |
| 675 flow_indicators = True | |
| 676 block_indicators = True | |
| 677 if ch in '?:': | |
| 678 flow_indicators = True | |
| 679 if followed_by_whitespace: | |
| 680 block_indicators = True | |
| 681 if ch == '-' and followed_by_whitespace: | |
| 682 flow_indicators = True | |
| 683 block_indicators = True | |
| 684 else: | |
| 685 # Some indicators cannot appear within a scalar as well. | |
| 686 if ch in ',?[]{}': | |
| 687 flow_indicators = True | |
| 688 if ch == ':': | |
| 689 flow_indicators = True | |
| 690 if followed_by_whitespace: | |
| 691 block_indicators = True | |
| 692 if ch == '#' and preceded_by_whitespace: | |
| 693 flow_indicators = True | |
| 694 block_indicators = True | |
| 695 | |
| 696 # Check for line breaks, special, and unicode characters. | |
| 697 if ch in '\n\x85\u2028\u2029': | |
| 698 line_breaks = True | |
| 699 if not (ch == '\n' or '\x20' <= ch <= '\x7E'): | |
| 700 if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' | |
| 701 or '\uE000' <= ch <= '\uFFFD' | |
| 702 or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': | |
| 703 unicode_characters = True | |
| 704 if not self.allow_unicode: | |
| 705 special_characters = True | |
| 706 else: | |
| 707 special_characters = True | |
| 708 | |
| 709 # Detect important whitespace combinations. | |
| 710 if ch == ' ': | |
| 711 if index == 0: | |
| 712 leading_space = True | |
| 713 if index == len(scalar)-1: | |
| 714 trailing_space = True | |
| 715 if previous_break: | |
| 716 break_space = True | |
| 717 previous_space = True | |
| 718 previous_break = False | |
| 719 elif ch in '\n\x85\u2028\u2029': | |
| 720 if index == 0: | |
| 721 leading_break = True | |
| 722 if index == len(scalar)-1: | |
| 723 trailing_break = True | |
| 724 if previous_space: | |
| 725 space_break = True | |
| 726 previous_space = False | |
| 727 previous_break = True | |
| 728 else: | |
| 729 previous_space = False | |
| 730 previous_break = False | |
| 731 | |
| 732 # Prepare for the next character. | |
| 733 index += 1 | |
| 734 preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') | |
| 735 followed_by_whitespace = (index+1 >= len(scalar) or | |
| 736 scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') | |
| 737 | |
| 738 # Let's decide what styles are allowed. | |
| 739 allow_flow_plain = True | |
| 740 allow_block_plain = True | |
| 741 allow_single_quoted = True | |
| 742 allow_double_quoted = True | |
| 743 allow_block = True | |
| 744 | |
| 745 # Leading and trailing whitespaces are bad for plain scalars. | |
| 746 if (leading_space or leading_break | |
| 747 or trailing_space or trailing_break): | |
| 748 allow_flow_plain = allow_block_plain = False | |
| 749 | |
| 750 # We do not permit trailing spaces for block scalars. | |
| 751 if trailing_space: | |
| 752 allow_block = False | |
| 753 | |
| 754 # Spaces at the beginning of a new line are only acceptable for block | |
| 755 # scalars. | |
| 756 if break_space: | |
| 757 allow_flow_plain = allow_block_plain = allow_single_quoted = False | |
| 758 | |
| 759 # Spaces followed by breaks, as well as special character are only | |
| 760 # allowed for double quoted scalars. | |
| 761 if space_break or special_characters: | |
| 762 allow_flow_plain = allow_block_plain = \ | |
| 763 allow_single_quoted = allow_block = False | |
| 764 | |
| 765 # Although the plain scalar writer supports breaks, we never emit | |
| 766 # multiline plain scalars. | |
| 767 if line_breaks: | |
| 768 allow_flow_plain = allow_block_plain = False | |
| 769 | |
| 770 # Flow indicators are forbidden for flow plain scalars. | |
| 771 if flow_indicators: | |
| 772 allow_flow_plain = False | |
| 773 | |
| 774 # Block indicators are forbidden for block plain scalars. | |
| 775 if block_indicators: | |
| 776 allow_block_plain = False | |
| 777 | |
| 778 return ScalarAnalysis(scalar=scalar, | |
| 779 empty=False, multiline=line_breaks, | |
| 780 allow_flow_plain=allow_flow_plain, | |
| 781 allow_block_plain=allow_block_plain, | |
| 782 allow_single_quoted=allow_single_quoted, | |
| 783 allow_double_quoted=allow_double_quoted, | |
| 784 allow_block=allow_block) | |
| 785 | |
| 786 # Writers. | |
| 787 | |
| 788 def flush_stream(self): | |
| 789 if hasattr(self.stream, 'flush'): | |
| 790 self.stream.flush() | |
| 791 | |
| 792 def write_stream_start(self): | |
| 793 # Write BOM if needed. | |
| 794 if self.encoding and self.encoding.startswith('utf-16'): | |
| 795 self.stream.write('\uFEFF'.encode(self.encoding)) | |
| 796 | |
| 797 def write_stream_end(self): | |
| 798 self.flush_stream() | |
| 799 | |
| 800 def write_indicator(self, indicator, need_whitespace, | |
| 801 whitespace=False, indention=False): | |
| 802 if self.whitespace or not need_whitespace: | |
| 803 data = indicator | |
| 804 else: | |
| 805 data = ' '+indicator | |
| 806 self.whitespace = whitespace | |
| 807 self.indention = self.indention and indention | |
| 808 self.column += len(data) | |
| 809 self.open_ended = False | |
| 810 if self.encoding: | |
| 811 data = data.encode(self.encoding) | |
| 812 self.stream.write(data) | |
| 813 | |
| 814 def write_indent(self): | |
| 815 indent = self.indent or 0 | |
| 816 if not self.indention or self.column > indent \ | |
| 817 or (self.column == indent and not self.whitespace): | |
| 818 self.write_line_break() | |
| 819 if self.column < indent: | |
| 820 self.whitespace = True | |
| 821 data = ' '*(indent-self.column) | |
| 822 self.column = indent | |
| 823 if self.encoding: | |
| 824 data = data.encode(self.encoding) | |
| 825 self.stream.write(data) | |
| 826 | |
| 827 def write_line_break(self, data=None): | |
| 828 if data is None: | |
| 829 data = self.best_line_break | |
| 830 self.whitespace = True | |
| 831 self.indention = True | |
| 832 self.line += 1 | |
| 833 self.column = 0 | |
| 834 if self.encoding: | |
| 835 data = data.encode(self.encoding) | |
| 836 self.stream.write(data) | |
| 837 | |
| 838 def write_version_directive(self, version_text): | |
| 839 data = '%%YAML %s' % version_text | |
| 840 if self.encoding: | |
| 841 data = data.encode(self.encoding) | |
| 842 self.stream.write(data) | |
| 843 self.write_line_break() | |
| 844 | |
| 845 def write_tag_directive(self, handle_text, prefix_text): | |
| 846 data = '%%TAG %s %s' % (handle_text, prefix_text) | |
| 847 if self.encoding: | |
| 848 data = data.encode(self.encoding) | |
| 849 self.stream.write(data) | |
| 850 self.write_line_break() | |
| 851 | |
| 852 # Scalar streams. | |
| 853 | |
| 854 def write_single_quoted(self, text, split=True): | |
| 855 self.write_indicator('\'', True) | |
| 856 spaces = False | |
| 857 breaks = False | |
| 858 start = end = 0 | |
| 859 while end <= len(text): | |
| 860 ch = None | |
| 861 if end < len(text): | |
| 862 ch = text[end] | |
| 863 if spaces: | |
| 864 if ch is None or ch != ' ': | |
| 865 if start+1 == end and self.column > self.best_width and split \ | |
| 866 and start != 0 and end != len(text): | |
| 867 self.write_indent() | |
| 868 else: | |
| 869 data = text[start:end] | |
| 870 self.column += len(data) | |
| 871 if self.encoding: | |
| 872 data = data.encode(self.encoding) | |
| 873 self.stream.write(data) | |
| 874 start = end | |
| 875 elif breaks: | |
| 876 if ch is None or ch not in '\n\x85\u2028\u2029': | |
| 877 if text[start] == '\n': | |
| 878 self.write_line_break() | |
| 879 for br in text[start:end]: | |
| 880 if br == '\n': | |
| 881 self.write_line_break() | |
| 882 else: | |
| 883 self.write_line_break(br) | |
| 884 self.write_indent() | |
| 885 start = end | |
| 886 else: | |
| 887 if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': | |
| 888 if start < end: | |
| 889 data = text[start:end] | |
| 890 self.column += len(data) | |
| 891 if self.encoding: | |
| 892 data = data.encode(self.encoding) | |
| 893 self.stream.write(data) | |
| 894 start = end | |
| 895 if ch == '\'': | |
| 896 data = '\'\'' | |
| 897 self.column += 2 | |
| 898 if self.encoding: | |
| 899 data = data.encode(self.encoding) | |
| 900 self.stream.write(data) | |
| 901 start = end + 1 | |
| 902 if ch is not None: | |
| 903 spaces = (ch == ' ') | |
| 904 breaks = (ch in '\n\x85\u2028\u2029') | |
| 905 end += 1 | |
| 906 self.write_indicator('\'', False) | |
| 907 | |
| 908 ESCAPE_REPLACEMENTS = { | |
| 909 '\0': '0', | |
| 910 '\x07': 'a', | |
| 911 '\x08': 'b', | |
| 912 '\x09': 't', | |
| 913 '\x0A': 'n', | |
| 914 '\x0B': 'v', | |
| 915 '\x0C': 'f', | |
| 916 '\x0D': 'r', | |
| 917 '\x1B': 'e', | |
| 918 '\"': '\"', | |
| 919 '\\': '\\', | |
| 920 '\x85': 'N', | |
| 921 '\xA0': '_', | |
| 922 '\u2028': 'L', | |
| 923 '\u2029': 'P', | |
| 924 } | |
| 925 | |
| 926 def write_double_quoted(self, text, split=True): | |
| 927 self.write_indicator('"', True) | |
| 928 start = end = 0 | |
| 929 while end <= len(text): | |
| 930 ch = None | |
| 931 if end < len(text): | |
| 932 ch = text[end] | |
| 933 if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ | |
| 934 or not ('\x20' <= ch <= '\x7E' | |
| 935 or (self.allow_unicode | |
| 936 and ('\xA0' <= ch <= '\uD7FF' | |
| 937 or '\uE000' <= ch <= '\uFFFD'))): | |
| 938 if start < end: | |
| 939 data = text[start:end] | |
| 940 self.column += len(data) | |
| 941 if self.encoding: | |
| 942 data = data.encode(self.encoding) | |
| 943 self.stream.write(data) | |
| 944 start = end | |
| 945 if ch is not None: | |
| 946 if ch in self.ESCAPE_REPLACEMENTS: | |
| 947 data = '\\'+self.ESCAPE_REPLACEMENTS[ch] | |
| 948 elif ch <= '\xFF': | |
| 949 data = '\\x%02X' % ord(ch) | |
| 950 elif ch <= '\uFFFF': | |
| 951 data = '\\u%04X' % ord(ch) | |
| 952 else: | |
| 953 data = '\\U%08X' % ord(ch) | |
| 954 self.column += len(data) | |
| 955 if self.encoding: | |
| 956 data = data.encode(self.encoding) | |
| 957 self.stream.write(data) | |
| 958 start = end+1 | |
| 959 if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ | |
| 960 and self.column+(end-start) > self.best_width and split: | |
| 961 data = text[start:end]+'\\' | |
| 962 if start < end: | |
| 963 start = end | |
| 964 self.column += len(data) | |
| 965 if self.encoding: | |
| 966 data = data.encode(self.encoding) | |
| 967 self.stream.write(data) | |
| 968 self.write_indent() | |
| 969 self.whitespace = False | |
| 970 self.indention = False | |
| 971 if text[start] == ' ': | |
| 972 data = '\\' | |
| 973 self.column += len(data) | |
| 974 if self.encoding: | |
| 975 data = data.encode(self.encoding) | |
| 976 self.stream.write(data) | |
| 977 end += 1 | |
| 978 self.write_indicator('"', False) | |
| 979 | |
| 980 def determine_block_hints(self, text): | |
| 981 hints = '' | |
| 982 if text: | |
| 983 if text[0] in ' \n\x85\u2028\u2029': | |
| 984 hints += str(self.best_indent) | |
| 985 if text[-1] not in '\n\x85\u2028\u2029': | |
| 986 hints += '-' | |
| 987 elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': | |
| 988 hints += '+' | |
| 989 return hints | |
| 990 | |
| 991 def write_folded(self, text): | |
| 992 hints = self.determine_block_hints(text) | |
| 993 self.write_indicator('>'+hints, True) | |
| 994 if hints[-1:] == '+': | |
| 995 self.open_ended = True | |
| 996 self.write_line_break() | |
| 997 leading_space = True | |
| 998 spaces = False | |
| 999 breaks = True | |
| 1000 start = end = 0 | |
| 1001 while end <= len(text): | |
| 1002 ch = None | |
| 1003 if end < len(text): | |
| 1004 ch = text[end] | |
| 1005 if breaks: | |
| 1006 if ch is None or ch not in '\n\x85\u2028\u2029': | |
| 1007 if not leading_space and ch is not None and ch != ' ' \ | |
| 1008 and text[start] == '\n': | |
| 1009 self.write_line_break() | |
| 1010 leading_space = (ch == ' ') | |
| 1011 for br in text[start:end]: | |
| 1012 if br == '\n': | |
| 1013 self.write_line_break() | |
| 1014 else: | |
| 1015 self.write_line_break(br) | |
| 1016 if ch is not None: | |
| 1017 self.write_indent() | |
| 1018 start = end | |
| 1019 elif spaces: | |
| 1020 if ch != ' ': | |
| 1021 if start+1 == end and self.column > self.best_width: | |
| 1022 self.write_indent() | |
| 1023 else: | |
| 1024 data = text[start:end] | |
| 1025 self.column += len(data) | |
| 1026 if self.encoding: | |
| 1027 data = data.encode(self.encoding) | |
| 1028 self.stream.write(data) | |
| 1029 start = end | |
| 1030 else: | |
| 1031 if ch is None or ch in ' \n\x85\u2028\u2029': | |
| 1032 data = text[start:end] | |
| 1033 self.column += len(data) | |
| 1034 if self.encoding: | |
| 1035 data = data.encode(self.encoding) | |
| 1036 self.stream.write(data) | |
| 1037 if ch is None: | |
| 1038 self.write_line_break() | |
| 1039 start = end | |
| 1040 if ch is not None: | |
| 1041 breaks = (ch in '\n\x85\u2028\u2029') | |
| 1042 spaces = (ch == ' ') | |
| 1043 end += 1 | |
| 1044 | |
| 1045 def write_literal(self, text): | |
| 1046 hints = self.determine_block_hints(text) | |
| 1047 self.write_indicator('|'+hints, True) | |
| 1048 if hints[-1:] == '+': | |
| 1049 self.open_ended = True | |
| 1050 self.write_line_break() | |
| 1051 breaks = True | |
| 1052 start = end = 0 | |
| 1053 while end <= len(text): | |
| 1054 ch = None | |
| 1055 if end < len(text): | |
| 1056 ch = text[end] | |
| 1057 if breaks: | |
| 1058 if ch is None or ch not in '\n\x85\u2028\u2029': | |
| 1059 for br in text[start:end]: | |
| 1060 if br == '\n': | |
| 1061 self.write_line_break() | |
| 1062 else: | |
| 1063 self.write_line_break(br) | |
| 1064 if ch is not None: | |
| 1065 self.write_indent() | |
| 1066 start = end | |
| 1067 else: | |
| 1068 if ch is None or ch in '\n\x85\u2028\u2029': | |
| 1069 data = text[start:end] | |
| 1070 if self.encoding: | |
| 1071 data = data.encode(self.encoding) | |
| 1072 self.stream.write(data) | |
| 1073 if ch is None: | |
| 1074 self.write_line_break() | |
| 1075 start = end | |
| 1076 if ch is not None: | |
| 1077 breaks = (ch in '\n\x85\u2028\u2029') | |
| 1078 end += 1 | |
| 1079 | |
| 1080 def write_plain(self, text, split=True): | |
| 1081 if self.root_context: | |
| 1082 self.open_ended = True | |
| 1083 if not text: | |
| 1084 return | |
| 1085 if not self.whitespace: | |
| 1086 data = ' ' | |
| 1087 self.column += len(data) | |
| 1088 if self.encoding: | |
| 1089 data = data.encode(self.encoding) | |
| 1090 self.stream.write(data) | |
| 1091 self.whitespace = False | |
| 1092 self.indention = False | |
| 1093 spaces = False | |
| 1094 breaks = False | |
| 1095 start = end = 0 | |
| 1096 while end <= len(text): | |
| 1097 ch = None | |
| 1098 if end < len(text): | |
| 1099 ch = text[end] | |
| 1100 if spaces: | |
| 1101 if ch != ' ': | |
| 1102 if start+1 == end and self.column > self.best_width and split: | |
| 1103 self.write_indent() | |
| 1104 self.whitespace = False | |
| 1105 self.indention = False | |
| 1106 else: | |
| 1107 data = text[start:end] | |
| 1108 self.column += len(data) | |
| 1109 if self.encoding: | |
| 1110 data = data.encode(self.encoding) | |
| 1111 self.stream.write(data) | |
| 1112 start = end | |
| 1113 elif breaks: | |
| 1114 if ch not in '\n\x85\u2028\u2029': | |
| 1115 if text[start] == '\n': | |
| 1116 self.write_line_break() | |
| 1117 for br in text[start:end]: | |
| 1118 if br == '\n': | |
| 1119 self.write_line_break() | |
| 1120 else: | |
| 1121 self.write_line_break(br) | |
| 1122 self.write_indent() | |
| 1123 self.whitespace = False | |
| 1124 self.indention = False | |
| 1125 start = end | |
| 1126 else: | |
| 1127 if ch is None or ch in ' \n\x85\u2028\u2029': | |
| 1128 data = text[start:end] | |
| 1129 self.column += len(data) | |
| 1130 if self.encoding: | |
| 1131 data = data.encode(self.encoding) | |
| 1132 self.stream.write(data) | |
| 1133 start = end | |
| 1134 if ch is not None: | |
| 1135 spaces = (ch == ' ') | |
| 1136 breaks = (ch in '\n\x85\u2028\u2029') | |
| 1137 end += 1 |
