Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/ruamel/yaml/scanner.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 (2021-03-22) |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # coding: utf-8 | |
2 | |
3 from __future__ import print_function, absolute_import, division, unicode_literals | |
4 | |
5 # Scanner produces tokens of the following types: | |
6 # STREAM-START | |
7 # STREAM-END | |
8 # DIRECTIVE(name, value) | |
9 # DOCUMENT-START | |
10 # DOCUMENT-END | |
11 # BLOCK-SEQUENCE-START | |
12 # BLOCK-MAPPING-START | |
13 # BLOCK-END | |
14 # FLOW-SEQUENCE-START | |
15 # FLOW-MAPPING-START | |
16 # FLOW-SEQUENCE-END | |
17 # FLOW-MAPPING-END | |
18 # BLOCK-ENTRY | |
19 # FLOW-ENTRY | |
20 # KEY | |
21 # VALUE | |
22 # ALIAS(value) | |
23 # ANCHOR(value) | |
24 # TAG(value) | |
25 # SCALAR(value, plain, style) | |
26 # | |
27 # RoundTripScanner | |
28 # COMMENT(value) | |
29 # | |
30 # Read comments in the Scanner code for more details. | |
31 # | |
32 | |
33 from ruamel.yaml.error import MarkedYAMLError | |
34 from ruamel.yaml.tokens import * # NOQA | |
35 from ruamel.yaml.compat import utf8, unichr, PY3, check_anchorname_char, nprint # NOQA | |
36 | |
37 if False: # MYPY | |
38 from typing import Any, Dict, Optional, List, Union, Text # NOQA | |
39 from ruamel.yaml.compat import VersionType # NOQA | |
40 | |
41 __all__ = ['Scanner', 'RoundTripScanner', 'ScannerError'] | |
42 | |
43 | |
44 _THE_END = '\n\0\r\x85\u2028\u2029' | |
45 _THE_END_SPACE_TAB = ' \n\0\t\r\x85\u2028\u2029' | |
46 _SPACE_TAB = ' \t' | |
47 | |
48 | |
49 class ScannerError(MarkedYAMLError): | |
50 pass | |
51 | |
52 | |
53 class SimpleKey(object): | |
54 # See below simple keys treatment. | |
55 | |
56 def __init__(self, token_number, required, index, line, column, mark): | |
57 # type: (Any, Any, int, int, int, Any) -> None | |
58 self.token_number = token_number | |
59 self.required = required | |
60 self.index = index | |
61 self.line = line | |
62 self.column = column | |
63 self.mark = mark | |
64 | |
65 | |
66 class Scanner(object): | |
67 def __init__(self, loader=None): | |
68 # type: (Any) -> None | |
69 """Initialize the scanner.""" | |
70 # It is assumed that Scanner and Reader will have a common descendant. | |
71 # Reader do the dirty work of checking for BOM and converting the | |
72 # input data to Unicode. It also adds NUL to the end. | |
73 # | |
74 # Reader supports the following methods | |
75 # self.peek(i=0) # peek the next i-th character | |
76 # self.prefix(l=1) # peek the next l characters | |
77 # self.forward(l=1) # read the next l characters and move the pointer | |
78 | |
79 self.loader = loader | |
80 if self.loader is not None and getattr(self.loader, '_scanner', None) is None: | |
81 self.loader._scanner = self | |
82 self.reset_scanner() | |
83 self.first_time = False | |
84 self.yaml_version = None # type: Any | |
85 | |
86 @property | |
87 def flow_level(self): | |
88 # type: () -> int | |
89 return len(self.flow_context) | |
90 | |
91 def reset_scanner(self): | |
92 # type: () -> None | |
93 # Had we reached the end of the stream? | |
94 self.done = False | |
95 | |
96 # flow_context is an expanding/shrinking list consisting of '{' and '[' | |
97 # for each unclosed flow context. If empty list that means block context | |
98 self.flow_context = [] # type: List[Text] | |
99 | |
100 # List of processed tokens that are not yet emitted. | |
101 self.tokens = [] # type: List[Any] | |
102 | |
103 # Add the STREAM-START token. | |
104 self.fetch_stream_start() | |
105 | |
106 # Number of tokens that were emitted through the `get_token` method. | |
107 self.tokens_taken = 0 | |
108 | |
109 # The current indentation level. | |
110 self.indent = -1 | |
111 | |
112 # Past indentation levels. | |
113 self.indents = [] # type: List[int] | |
114 | |
115 # Variables related to simple keys treatment. | |
116 | |
117 # A simple key is a key that is not denoted by the '?' indicator. | |
118 # Example of simple keys: | |
119 # --- | |
120 # block simple key: value | |
121 # ? not a simple key: | |
122 # : { flow simple key: value } | |
123 # We emit the KEY token before all keys, so when we find a potential | |
124 # simple key, we try to locate the corresponding ':' indicator. | |
125 # Simple keys should be limited to a single line and 1024 characters. | |
126 | |
127 # Can a simple key start at the current position? A simple key may | |
128 # start: | |
129 # - at the beginning of the line, not counting indentation spaces | |
130 # (in block context), | |
131 # - after '{', '[', ',' (in the flow context), | |
132 # - after '?', ':', '-' (in the block context). | |
133 # In the block context, this flag also signifies if a block collection | |
134 # may start at the current position. | |
135 self.allow_simple_key = True | |
136 | |
137 # Keep track of possible simple keys. This is a dictionary. The key | |
138 # is `flow_level`; there can be no more that one possible simple key | |
139 # for each level. The value is a SimpleKey record: | |
140 # (token_number, required, index, line, column, mark) | |
141 # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), | |
142 # '[', or '{' tokens. | |
143 self.possible_simple_keys = {} # type: Dict[Any, Any] | |
144 | |
145 @property | |
146 def reader(self): | |
147 # type: () -> Any | |
148 try: | |
149 return self._scanner_reader # type: ignore | |
150 except AttributeError: | |
151 if hasattr(self.loader, 'typ'): | |
152 self._scanner_reader = self.loader.reader | |
153 else: | |
154 self._scanner_reader = self.loader._reader | |
155 return self._scanner_reader | |
156 | |
157 @property | |
158 def scanner_processing_version(self): # prefix until un-composited | |
159 # type: () -> Any | |
160 if hasattr(self.loader, 'typ'): | |
161 return self.loader.resolver.processing_version | |
162 return self.loader.processing_version | |
163 | |
164 # Public methods. | |
165 | |
166 def check_token(self, *choices): | |
167 # type: (Any) -> bool | |
168 # Check if the next token is one of the given types. | |
169 while self.need_more_tokens(): | |
170 self.fetch_more_tokens() | |
171 if bool(self.tokens): | |
172 if not choices: | |
173 return True | |
174 for choice in choices: | |
175 if isinstance(self.tokens[0], choice): | |
176 return True | |
177 return False | |
178 | |
179 def peek_token(self): | |
180 # type: () -> Any | |
181 # Return the next token, but do not delete if from the queue. | |
182 while self.need_more_tokens(): | |
183 self.fetch_more_tokens() | |
184 if bool(self.tokens): | |
185 return self.tokens[0] | |
186 | |
187 def get_token(self): | |
188 # type: () -> Any | |
189 # Return the next token. | |
190 while self.need_more_tokens(): | |
191 self.fetch_more_tokens() | |
192 if bool(self.tokens): | |
193 self.tokens_taken += 1 | |
194 return self.tokens.pop(0) | |
195 | |
196 # Private methods. | |
197 | |
198 def need_more_tokens(self): | |
199 # type: () -> bool | |
200 if self.done: | |
201 return False | |
202 if not self.tokens: | |
203 return True | |
204 # The current token may be a potential simple key, so we | |
205 # need to look further. | |
206 self.stale_possible_simple_keys() | |
207 if self.next_possible_simple_key() == self.tokens_taken: | |
208 return True | |
209 return False | |
210 | |
211 def fetch_comment(self, comment): | |
212 # type: (Any) -> None | |
213 raise NotImplementedError | |
214 | |
215 def fetch_more_tokens(self): | |
216 # type: () -> Any | |
217 # Eat whitespaces and comments until we reach the next token. | |
218 comment = self.scan_to_next_token() | |
219 if comment is not None: # never happens for base scanner | |
220 return self.fetch_comment(comment) | |
221 # Remove obsolete possible simple keys. | |
222 self.stale_possible_simple_keys() | |
223 | |
224 # Compare the current indentation and column. It may add some tokens | |
225 # and decrease the current indentation level. | |
226 self.unwind_indent(self.reader.column) | |
227 | |
228 # Peek the next character. | |
229 ch = self.reader.peek() | |
230 | |
231 # Is it the end of stream? | |
232 if ch == '\0': | |
233 return self.fetch_stream_end() | |
234 | |
235 # Is it a directive? | |
236 if ch == '%' and self.check_directive(): | |
237 return self.fetch_directive() | |
238 | |
239 # Is it the document start? | |
240 if ch == '-' and self.check_document_start(): | |
241 return self.fetch_document_start() | |
242 | |
243 # Is it the document end? | |
244 if ch == '.' and self.check_document_end(): | |
245 return self.fetch_document_end() | |
246 | |
247 # TODO: support for BOM within a stream. | |
248 # if ch == u'\uFEFF': | |
249 # return self.fetch_bom() <-- issue BOMToken | |
250 | |
251 # Note: the order of the following checks is NOT significant. | |
252 | |
253 # Is it the flow sequence start indicator? | |
254 if ch == '[': | |
255 return self.fetch_flow_sequence_start() | |
256 | |
257 # Is it the flow mapping start indicator? | |
258 if ch == '{': | |
259 return self.fetch_flow_mapping_start() | |
260 | |
261 # Is it the flow sequence end indicator? | |
262 if ch == ']': | |
263 return self.fetch_flow_sequence_end() | |
264 | |
265 # Is it the flow mapping end indicator? | |
266 if ch == '}': | |
267 return self.fetch_flow_mapping_end() | |
268 | |
269 # Is it the flow entry indicator? | |
270 if ch == ',': | |
271 return self.fetch_flow_entry() | |
272 | |
273 # Is it the block entry indicator? | |
274 if ch == '-' and self.check_block_entry(): | |
275 return self.fetch_block_entry() | |
276 | |
277 # Is it the key indicator? | |
278 if ch == '?' and self.check_key(): | |
279 return self.fetch_key() | |
280 | |
281 # Is it the value indicator? | |
282 if ch == ':' and self.check_value(): | |
283 return self.fetch_value() | |
284 | |
285 # Is it an alias? | |
286 if ch == '*': | |
287 return self.fetch_alias() | |
288 | |
289 # Is it an anchor? | |
290 if ch == '&': | |
291 return self.fetch_anchor() | |
292 | |
293 # Is it a tag? | |
294 if ch == '!': | |
295 return self.fetch_tag() | |
296 | |
297 # Is it a literal scalar? | |
298 if ch == '|' and not self.flow_level: | |
299 return self.fetch_literal() | |
300 | |
301 # Is it a folded scalar? | |
302 if ch == '>' and not self.flow_level: | |
303 return self.fetch_folded() | |
304 | |
305 # Is it a single quoted scalar? | |
306 if ch == "'": | |
307 return self.fetch_single() | |
308 | |
309 # Is it a double quoted scalar? | |
310 if ch == '"': | |
311 return self.fetch_double() | |
312 | |
313 # It must be a plain scalar then. | |
314 if self.check_plain(): | |
315 return self.fetch_plain() | |
316 | |
317 # No? It's an error. Let's produce a nice error message. | |
318 raise ScannerError( | |
319 'while scanning for the next token', | |
320 None, | |
321 'found character %r that cannot start any token' % utf8(ch), | |
322 self.reader.get_mark(), | |
323 ) | |
324 | |
325 # Simple keys treatment. | |
326 | |
327 def next_possible_simple_key(self): | |
328 # type: () -> Any | |
329 # Return the number of the nearest possible simple key. Actually we | |
330 # don't need to loop through the whole dictionary. We may replace it | |
331 # with the following code: | |
332 # if not self.possible_simple_keys: | |
333 # return None | |
334 # return self.possible_simple_keys[ | |
335 # min(self.possible_simple_keys.keys())].token_number | |
336 min_token_number = None | |
337 for level in self.possible_simple_keys: | |
338 key = self.possible_simple_keys[level] | |
339 if min_token_number is None or key.token_number < min_token_number: | |
340 min_token_number = key.token_number | |
341 return min_token_number | |
342 | |
343 def stale_possible_simple_keys(self): | |
344 # type: () -> None | |
345 # Remove entries that are no longer possible simple keys. According to | |
346 # the YAML specification, simple keys | |
347 # - should be limited to a single line, | |
348 # - should be no longer than 1024 characters. | |
349 # Disabling this procedure will allow simple keys of any length and | |
350 # height (may cause problems if indentation is broken though). | |
351 for level in list(self.possible_simple_keys): | |
352 key = self.possible_simple_keys[level] | |
353 if key.line != self.reader.line or self.reader.index - key.index > 1024: | |
354 if key.required: | |
355 raise ScannerError( | |
356 'while scanning a simple key', | |
357 key.mark, | |
358 "could not find expected ':'", | |
359 self.reader.get_mark(), | |
360 ) | |
361 del self.possible_simple_keys[level] | |
362 | |
363 def save_possible_simple_key(self): | |
364 # type: () -> None | |
365 # The next token may start a simple key. We check if it's possible | |
366 # and save its position. This function is called for | |
367 # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. | |
368 | |
369 # Check if a simple key is required at the current position. | |
370 required = not self.flow_level and self.indent == self.reader.column | |
371 | |
372 # The next token might be a simple key. Let's save it's number and | |
373 # position. | |
374 if self.allow_simple_key: | |
375 self.remove_possible_simple_key() | |
376 token_number = self.tokens_taken + len(self.tokens) | |
377 key = SimpleKey( | |
378 token_number, | |
379 required, | |
380 self.reader.index, | |
381 self.reader.line, | |
382 self.reader.column, | |
383 self.reader.get_mark(), | |
384 ) | |
385 self.possible_simple_keys[self.flow_level] = key | |
386 | |
387 def remove_possible_simple_key(self): | |
388 # type: () -> None | |
389 # Remove the saved possible key position at the current flow level. | |
390 if self.flow_level in self.possible_simple_keys: | |
391 key = self.possible_simple_keys[self.flow_level] | |
392 | |
393 if key.required: | |
394 raise ScannerError( | |
395 'while scanning a simple key', | |
396 key.mark, | |
397 "could not find expected ':'", | |
398 self.reader.get_mark(), | |
399 ) | |
400 | |
401 del self.possible_simple_keys[self.flow_level] | |
402 | |
403 # Indentation functions. | |
404 | |
405 def unwind_indent(self, column): | |
406 # type: (Any) -> None | |
407 # In flow context, tokens should respect indentation. | |
408 # Actually the condition should be `self.indent >= column` according to | |
409 # the spec. But this condition will prohibit intuitively correct | |
410 # constructions such as | |
411 # key : { | |
412 # } | |
413 # #### | |
414 # if self.flow_level and self.indent > column: | |
415 # raise ScannerError(None, None, | |
416 # "invalid intendation or unclosed '[' or '{'", | |
417 # self.reader.get_mark()) | |
418 | |
419 # In the flow context, indentation is ignored. We make the scanner less | |
420 # restrictive then specification requires. | |
421 if bool(self.flow_level): | |
422 return | |
423 | |
424 # In block context, we may need to issue the BLOCK-END tokens. | |
425 while self.indent > column: | |
426 mark = self.reader.get_mark() | |
427 self.indent = self.indents.pop() | |
428 self.tokens.append(BlockEndToken(mark, mark)) | |
429 | |
430 def add_indent(self, column): | |
431 # type: (int) -> bool | |
432 # Check if we need to increase indentation. | |
433 if self.indent < column: | |
434 self.indents.append(self.indent) | |
435 self.indent = column | |
436 return True | |
437 return False | |
438 | |
439 # Fetchers. | |
440 | |
441 def fetch_stream_start(self): | |
442 # type: () -> None | |
443 # We always add STREAM-START as the first token and STREAM-END as the | |
444 # last token. | |
445 # Read the token. | |
446 mark = self.reader.get_mark() | |
447 # Add STREAM-START. | |
448 self.tokens.append(StreamStartToken(mark, mark, encoding=self.reader.encoding)) | |
449 | |
450 def fetch_stream_end(self): | |
451 # type: () -> None | |
452 # Set the current intendation to -1. | |
453 self.unwind_indent(-1) | |
454 # Reset simple keys. | |
455 self.remove_possible_simple_key() | |
456 self.allow_simple_key = False | |
457 self.possible_simple_keys = {} | |
458 # Read the token. | |
459 mark = self.reader.get_mark() | |
460 # Add STREAM-END. | |
461 self.tokens.append(StreamEndToken(mark, mark)) | |
462 # The steam is finished. | |
463 self.done = True | |
464 | |
465 def fetch_directive(self): | |
466 # type: () -> None | |
467 # Set the current intendation to -1. | |
468 self.unwind_indent(-1) | |
469 | |
470 # Reset simple keys. | |
471 self.remove_possible_simple_key() | |
472 self.allow_simple_key = False | |
473 | |
474 # Scan and add DIRECTIVE. | |
475 self.tokens.append(self.scan_directive()) | |
476 | |
477 def fetch_document_start(self): | |
478 # type: () -> None | |
479 self.fetch_document_indicator(DocumentStartToken) | |
480 | |
481 def fetch_document_end(self): | |
482 # type: () -> None | |
483 self.fetch_document_indicator(DocumentEndToken) | |
484 | |
485 def fetch_document_indicator(self, TokenClass): | |
486 # type: (Any) -> None | |
487 # Set the current intendation to -1. | |
488 self.unwind_indent(-1) | |
489 | |
490 # Reset simple keys. Note that there could not be a block collection | |
491 # after '---'. | |
492 self.remove_possible_simple_key() | |
493 self.allow_simple_key = False | |
494 | |
495 # Add DOCUMENT-START or DOCUMENT-END. | |
496 start_mark = self.reader.get_mark() | |
497 self.reader.forward(3) | |
498 end_mark = self.reader.get_mark() | |
499 self.tokens.append(TokenClass(start_mark, end_mark)) | |
500 | |
501 def fetch_flow_sequence_start(self): | |
502 # type: () -> None | |
503 self.fetch_flow_collection_start(FlowSequenceStartToken, to_push='[') | |
504 | |
505 def fetch_flow_mapping_start(self): | |
506 # type: () -> None | |
507 self.fetch_flow_collection_start(FlowMappingStartToken, to_push='{') | |
508 | |
509 def fetch_flow_collection_start(self, TokenClass, to_push): | |
510 # type: (Any, Text) -> None | |
511 # '[' and '{' may start a simple key. | |
512 self.save_possible_simple_key() | |
513 # Increase the flow level. | |
514 self.flow_context.append(to_push) | |
515 # Simple keys are allowed after '[' and '{'. | |
516 self.allow_simple_key = True | |
517 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. | |
518 start_mark = self.reader.get_mark() | |
519 self.reader.forward() | |
520 end_mark = self.reader.get_mark() | |
521 self.tokens.append(TokenClass(start_mark, end_mark)) | |
522 | |
523 def fetch_flow_sequence_end(self): | |
524 # type: () -> None | |
525 self.fetch_flow_collection_end(FlowSequenceEndToken) | |
526 | |
527 def fetch_flow_mapping_end(self): | |
528 # type: () -> None | |
529 self.fetch_flow_collection_end(FlowMappingEndToken) | |
530 | |
531 def fetch_flow_collection_end(self, TokenClass): | |
532 # type: (Any) -> None | |
533 # Reset possible simple key on the current level. | |
534 self.remove_possible_simple_key() | |
535 # Decrease the flow level. | |
536 try: | |
537 popped = self.flow_context.pop() # NOQA | |
538 except IndexError: | |
539 # We must not be in a list or object. | |
540 # Defer error handling to the parser. | |
541 pass | |
542 # No simple keys after ']' or '}'. | |
543 self.allow_simple_key = False | |
544 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. | |
545 start_mark = self.reader.get_mark() | |
546 self.reader.forward() | |
547 end_mark = self.reader.get_mark() | |
548 self.tokens.append(TokenClass(start_mark, end_mark)) | |
549 | |
550 def fetch_flow_entry(self): | |
551 # type: () -> None | |
552 # Simple keys are allowed after ','. | |
553 self.allow_simple_key = True | |
554 # Reset possible simple key on the current level. | |
555 self.remove_possible_simple_key() | |
556 # Add FLOW-ENTRY. | |
557 start_mark = self.reader.get_mark() | |
558 self.reader.forward() | |
559 end_mark = self.reader.get_mark() | |
560 self.tokens.append(FlowEntryToken(start_mark, end_mark)) | |
561 | |
562 def fetch_block_entry(self): | |
563 # type: () -> None | |
564 # Block context needs additional checks. | |
565 if not self.flow_level: | |
566 # Are we allowed to start a new entry? | |
567 if not self.allow_simple_key: | |
568 raise ScannerError( | |
569 None, None, 'sequence entries are not allowed here', self.reader.get_mark() | |
570 ) | |
571 # We may need to add BLOCK-SEQUENCE-START. | |
572 if self.add_indent(self.reader.column): | |
573 mark = self.reader.get_mark() | |
574 self.tokens.append(BlockSequenceStartToken(mark, mark)) | |
575 # It's an error for the block entry to occur in the flow context, | |
576 # but we let the parser detect this. | |
577 else: | |
578 pass | |
579 # Simple keys are allowed after '-'. | |
580 self.allow_simple_key = True | |
581 # Reset possible simple key on the current level. | |
582 self.remove_possible_simple_key() | |
583 | |
584 # Add BLOCK-ENTRY. | |
585 start_mark = self.reader.get_mark() | |
586 self.reader.forward() | |
587 end_mark = self.reader.get_mark() | |
588 self.tokens.append(BlockEntryToken(start_mark, end_mark)) | |
589 | |
590 def fetch_key(self): | |
591 # type: () -> None | |
592 # Block context needs additional checks. | |
593 if not self.flow_level: | |
594 | |
595 # Are we allowed to start a key (not nessesary a simple)? | |
596 if not self.allow_simple_key: | |
597 raise ScannerError( | |
598 None, None, 'mapping keys are not allowed here', self.reader.get_mark() | |
599 ) | |
600 | |
601 # We may need to add BLOCK-MAPPING-START. | |
602 if self.add_indent(self.reader.column): | |
603 mark = self.reader.get_mark() | |
604 self.tokens.append(BlockMappingStartToken(mark, mark)) | |
605 | |
606 # Simple keys are allowed after '?' in the block context. | |
607 self.allow_simple_key = not self.flow_level | |
608 | |
609 # Reset possible simple key on the current level. | |
610 self.remove_possible_simple_key() | |
611 | |
612 # Add KEY. | |
613 start_mark = self.reader.get_mark() | |
614 self.reader.forward() | |
615 end_mark = self.reader.get_mark() | |
616 self.tokens.append(KeyToken(start_mark, end_mark)) | |
617 | |
618 def fetch_value(self): | |
619 # type: () -> None | |
620 # Do we determine a simple key? | |
621 if self.flow_level in self.possible_simple_keys: | |
622 # Add KEY. | |
623 key = self.possible_simple_keys[self.flow_level] | |
624 del self.possible_simple_keys[self.flow_level] | |
625 self.tokens.insert( | |
626 key.token_number - self.tokens_taken, KeyToken(key.mark, key.mark) | |
627 ) | |
628 | |
629 # If this key starts a new block mapping, we need to add | |
630 # BLOCK-MAPPING-START. | |
631 if not self.flow_level: | |
632 if self.add_indent(key.column): | |
633 self.tokens.insert( | |
634 key.token_number - self.tokens_taken, | |
635 BlockMappingStartToken(key.mark, key.mark), | |
636 ) | |
637 | |
638 # There cannot be two simple keys one after another. | |
639 self.allow_simple_key = False | |
640 | |
641 # It must be a part of a complex key. | |
642 else: | |
643 | |
644 # Block context needs additional checks. | |
645 # (Do we really need them? They will be caught by the parser | |
646 # anyway.) | |
647 if not self.flow_level: | |
648 | |
649 # We are allowed to start a complex value if and only if | |
650 # we can start a simple key. | |
651 if not self.allow_simple_key: | |
652 raise ScannerError( | |
653 None, | |
654 None, | |
655 'mapping values are not allowed here', | |
656 self.reader.get_mark(), | |
657 ) | |
658 | |
659 # If this value starts a new block mapping, we need to add | |
660 # BLOCK-MAPPING-START. It will be detected as an error later by | |
661 # the parser. | |
662 if not self.flow_level: | |
663 if self.add_indent(self.reader.column): | |
664 mark = self.reader.get_mark() | |
665 self.tokens.append(BlockMappingStartToken(mark, mark)) | |
666 | |
667 # Simple keys are allowed after ':' in the block context. | |
668 self.allow_simple_key = not self.flow_level | |
669 | |
670 # Reset possible simple key on the current level. | |
671 self.remove_possible_simple_key() | |
672 | |
673 # Add VALUE. | |
674 start_mark = self.reader.get_mark() | |
675 self.reader.forward() | |
676 end_mark = self.reader.get_mark() | |
677 self.tokens.append(ValueToken(start_mark, end_mark)) | |
678 | |
679 def fetch_alias(self): | |
680 # type: () -> None | |
681 # ALIAS could be a simple key. | |
682 self.save_possible_simple_key() | |
683 # No simple keys after ALIAS. | |
684 self.allow_simple_key = False | |
685 # Scan and add ALIAS. | |
686 self.tokens.append(self.scan_anchor(AliasToken)) | |
687 | |
688 def fetch_anchor(self): | |
689 # type: () -> None | |
690 # ANCHOR could start a simple key. | |
691 self.save_possible_simple_key() | |
692 # No simple keys after ANCHOR. | |
693 self.allow_simple_key = False | |
694 # Scan and add ANCHOR. | |
695 self.tokens.append(self.scan_anchor(AnchorToken)) | |
696 | |
697 def fetch_tag(self): | |
698 # type: () -> None | |
699 # TAG could start a simple key. | |
700 self.save_possible_simple_key() | |
701 # No simple keys after TAG. | |
702 self.allow_simple_key = False | |
703 # Scan and add TAG. | |
704 self.tokens.append(self.scan_tag()) | |
705 | |
706 def fetch_literal(self): | |
707 # type: () -> None | |
708 self.fetch_block_scalar(style='|') | |
709 | |
710 def fetch_folded(self): | |
711 # type: () -> None | |
712 self.fetch_block_scalar(style='>') | |
713 | |
714 def fetch_block_scalar(self, style): | |
715 # type: (Any) -> None | |
716 # A simple key may follow a block scalar. | |
717 self.allow_simple_key = True | |
718 # Reset possible simple key on the current level. | |
719 self.remove_possible_simple_key() | |
720 # Scan and add SCALAR. | |
721 self.tokens.append(self.scan_block_scalar(style)) | |
722 | |
723 def fetch_single(self): | |
724 # type: () -> None | |
725 self.fetch_flow_scalar(style="'") | |
726 | |
727 def fetch_double(self): | |
728 # type: () -> None | |
729 self.fetch_flow_scalar(style='"') | |
730 | |
731 def fetch_flow_scalar(self, style): | |
732 # type: (Any) -> None | |
733 # A flow scalar could be a simple key. | |
734 self.save_possible_simple_key() | |
735 # No simple keys after flow scalars. | |
736 self.allow_simple_key = False | |
737 # Scan and add SCALAR. | |
738 self.tokens.append(self.scan_flow_scalar(style)) | |
739 | |
740 def fetch_plain(self): | |
741 # type: () -> None | |
742 # A plain scalar could be a simple key. | |
743 self.save_possible_simple_key() | |
744 # No simple keys after plain scalars. But note that `scan_plain` will | |
745 # change this flag if the scan is finished at the beginning of the | |
746 # line. | |
747 self.allow_simple_key = False | |
748 # Scan and add SCALAR. May change `allow_simple_key`. | |
749 self.tokens.append(self.scan_plain()) | |
750 | |
751 # Checkers. | |
752 | |
753 def check_directive(self): | |
754 # type: () -> Any | |
755 # DIRECTIVE: ^ '%' ... | |
756 # The '%' indicator is already checked. | |
757 if self.reader.column == 0: | |
758 return True | |
759 return None | |
760 | |
761 def check_document_start(self): | |
762 # type: () -> Any | |
763 # DOCUMENT-START: ^ '---' (' '|'\n') | |
764 if self.reader.column == 0: | |
765 if self.reader.prefix(3) == '---' and self.reader.peek(3) in _THE_END_SPACE_TAB: | |
766 return True | |
767 return None | |
768 | |
769 def check_document_end(self): | |
770 # type: () -> Any | |
771 # DOCUMENT-END: ^ '...' (' '|'\n') | |
772 if self.reader.column == 0: | |
773 if self.reader.prefix(3) == '...' and self.reader.peek(3) in _THE_END_SPACE_TAB: | |
774 return True | |
775 return None | |
776 | |
777 def check_block_entry(self): | |
778 # type: () -> Any | |
779 # BLOCK-ENTRY: '-' (' '|'\n') | |
780 return self.reader.peek(1) in _THE_END_SPACE_TAB | |
781 | |
782 def check_key(self): | |
783 # type: () -> Any | |
784 # KEY(flow context): '?' | |
785 if bool(self.flow_level): | |
786 return True | |
787 # KEY(block context): '?' (' '|'\n') | |
788 return self.reader.peek(1) in _THE_END_SPACE_TAB | |
789 | |
790 def check_value(self): | |
791 # type: () -> Any | |
792 # VALUE(flow context): ':' | |
793 if self.scanner_processing_version == (1, 1): | |
794 if bool(self.flow_level): | |
795 return True | |
796 else: | |
797 if bool(self.flow_level): | |
798 if self.flow_context[-1] == '[': | |
799 if self.reader.peek(1) not in _THE_END_SPACE_TAB: | |
800 return False | |
801 elif self.tokens and isinstance(self.tokens[-1], ValueToken): | |
802 # mapping flow context scanning a value token | |
803 if self.reader.peek(1) not in _THE_END_SPACE_TAB: | |
804 return False | |
805 return True | |
806 # VALUE(block context): ':' (' '|'\n') | |
807 return self.reader.peek(1) in _THE_END_SPACE_TAB | |
808 | |
809 def check_plain(self): | |
810 # type: () -> Any | |
811 # A plain scalar may start with any non-space character except: | |
812 # '-', '?', ':', ',', '[', ']', '{', '}', | |
813 # '#', '&', '*', '!', '|', '>', '\'', '\"', | |
814 # '%', '@', '`'. | |
815 # | |
816 # It may also start with | |
817 # '-', '?', ':' | |
818 # if it is followed by a non-space character. | |
819 # | |
820 # Note that we limit the last rule to the block context (except the | |
821 # '-' character) because we want the flow context to be space | |
822 # independent. | |
823 srp = self.reader.peek | |
824 ch = srp() | |
825 if self.scanner_processing_version == (1, 1): | |
826 return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`' or ( | |
827 srp(1) not in _THE_END_SPACE_TAB | |
828 and (ch == '-' or (not self.flow_level and ch in '?:')) | |
829 ) | |
830 # YAML 1.2 | |
831 if ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`': | |
832 # ################### ^ ??? | |
833 return True | |
834 ch1 = srp(1) | |
835 if ch == '-' and ch1 not in _THE_END_SPACE_TAB: | |
836 return True | |
837 if ch == ':' and bool(self.flow_level) and ch1 not in _SPACE_TAB: | |
838 return True | |
839 | |
840 return srp(1) not in _THE_END_SPACE_TAB and ( | |
841 ch == '-' or (not self.flow_level and ch in '?:') | |
842 ) | |
843 | |
844 # Scanners. | |
845 | |
846 def scan_to_next_token(self): | |
847 # type: () -> Any | |
848 # We ignore spaces, line breaks and comments. | |
849 # If we find a line break in the block context, we set the flag | |
850 # `allow_simple_key` on. | |
851 # The byte order mark is stripped if it's the first character in the | |
852 # stream. We do not yet support BOM inside the stream as the | |
853 # specification requires. Any such mark will be considered as a part | |
854 # of the document. | |
855 # | |
856 # TODO: We need to make tab handling rules more sane. A good rule is | |
857 # Tabs cannot precede tokens | |
858 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, | |
859 # KEY(block), VALUE(block), BLOCK-ENTRY | |
860 # So the checking code is | |
861 # if <TAB>: | |
862 # self.allow_simple_keys = False | |
863 # We also need to add the check for `allow_simple_keys == True` to | |
864 # `unwind_indent` before issuing BLOCK-END. | |
865 # Scanners for block, flow, and plain scalars need to be modified. | |
866 srp = self.reader.peek | |
867 srf = self.reader.forward | |
868 if self.reader.index == 0 and srp() == '\uFEFF': | |
869 srf() | |
870 found = False | |
871 _the_end = _THE_END | |
872 while not found: | |
873 while srp() == ' ': | |
874 srf() | |
875 if srp() == '#': | |
876 while srp() not in _the_end: | |
877 srf() | |
878 if self.scan_line_break(): | |
879 if not self.flow_level: | |
880 self.allow_simple_key = True | |
881 else: | |
882 found = True | |
883 return None | |
884 | |
885 def scan_directive(self): | |
886 # type: () -> Any | |
887 # See the specification for details. | |
888 srp = self.reader.peek | |
889 srf = self.reader.forward | |
890 start_mark = self.reader.get_mark() | |
891 srf() | |
892 name = self.scan_directive_name(start_mark) | |
893 value = None | |
894 if name == 'YAML': | |
895 value = self.scan_yaml_directive_value(start_mark) | |
896 end_mark = self.reader.get_mark() | |
897 elif name == 'TAG': | |
898 value = self.scan_tag_directive_value(start_mark) | |
899 end_mark = self.reader.get_mark() | |
900 else: | |
901 end_mark = self.reader.get_mark() | |
902 while srp() not in _THE_END: | |
903 srf() | |
904 self.scan_directive_ignored_line(start_mark) | |
905 return DirectiveToken(name, value, start_mark, end_mark) | |
906 | |
907 def scan_directive_name(self, start_mark): | |
908 # type: (Any) -> Any | |
909 # See the specification for details. | |
910 length = 0 | |
911 srp = self.reader.peek | |
912 ch = srp(length) | |
913 while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_:.': | |
914 length += 1 | |
915 ch = srp(length) | |
916 if not length: | |
917 raise ScannerError( | |
918 'while scanning a directive', | |
919 start_mark, | |
920 'expected alphabetic or numeric character, but found %r' % utf8(ch), | |
921 self.reader.get_mark(), | |
922 ) | |
923 value = self.reader.prefix(length) | |
924 self.reader.forward(length) | |
925 ch = srp() | |
926 if ch not in '\0 \r\n\x85\u2028\u2029': | |
927 raise ScannerError( | |
928 'while scanning a directive', | |
929 start_mark, | |
930 'expected alphabetic or numeric character, but found %r' % utf8(ch), | |
931 self.reader.get_mark(), | |
932 ) | |
933 return value | |
934 | |
935 def scan_yaml_directive_value(self, start_mark): | |
936 # type: (Any) -> Any | |
937 # See the specification for details. | |
938 srp = self.reader.peek | |
939 srf = self.reader.forward | |
940 while srp() == ' ': | |
941 srf() | |
942 major = self.scan_yaml_directive_number(start_mark) | |
943 if srp() != '.': | |
944 raise ScannerError( | |
945 'while scanning a directive', | |
946 start_mark, | |
947 "expected a digit or '.', but found %r" % utf8(srp()), | |
948 self.reader.get_mark(), | |
949 ) | |
950 srf() | |
951 minor = self.scan_yaml_directive_number(start_mark) | |
952 if srp() not in '\0 \r\n\x85\u2028\u2029': | |
953 raise ScannerError( | |
954 'while scanning a directive', | |
955 start_mark, | |
956 "expected a digit or ' ', but found %r" % utf8(srp()), | |
957 self.reader.get_mark(), | |
958 ) | |
959 self.yaml_version = (major, minor) | |
960 return self.yaml_version | |
961 | |
962 def scan_yaml_directive_number(self, start_mark): | |
963 # type: (Any) -> Any | |
964 # See the specification for details. | |
965 srp = self.reader.peek | |
966 srf = self.reader.forward | |
967 ch = srp() | |
968 if not ('0' <= ch <= '9'): | |
969 raise ScannerError( | |
970 'while scanning a directive', | |
971 start_mark, | |
972 'expected a digit, but found %r' % utf8(ch), | |
973 self.reader.get_mark(), | |
974 ) | |
975 length = 0 | |
976 while '0' <= srp(length) <= '9': | |
977 length += 1 | |
978 value = int(self.reader.prefix(length)) | |
979 srf(length) | |
980 return value | |
981 | |
982 def scan_tag_directive_value(self, start_mark): | |
983 # type: (Any) -> Any | |
984 # See the specification for details. | |
985 srp = self.reader.peek | |
986 srf = self.reader.forward | |
987 while srp() == ' ': | |
988 srf() | |
989 handle = self.scan_tag_directive_handle(start_mark) | |
990 while srp() == ' ': | |
991 srf() | |
992 prefix = self.scan_tag_directive_prefix(start_mark) | |
993 return (handle, prefix) | |
994 | |
995 def scan_tag_directive_handle(self, start_mark): | |
996 # type: (Any) -> Any | |
997 # See the specification for details. | |
998 value = self.scan_tag_handle('directive', start_mark) | |
999 ch = self.reader.peek() | |
1000 if ch != ' ': | |
1001 raise ScannerError( | |
1002 'while scanning a directive', | |
1003 start_mark, | |
1004 "expected ' ', but found %r" % utf8(ch), | |
1005 self.reader.get_mark(), | |
1006 ) | |
1007 return value | |
1008 | |
1009 def scan_tag_directive_prefix(self, start_mark): | |
1010 # type: (Any) -> Any | |
1011 # See the specification for details. | |
1012 value = self.scan_tag_uri('directive', start_mark) | |
1013 ch = self.reader.peek() | |
1014 if ch not in '\0 \r\n\x85\u2028\u2029': | |
1015 raise ScannerError( | |
1016 'while scanning a directive', | |
1017 start_mark, | |
1018 "expected ' ', but found %r" % utf8(ch), | |
1019 self.reader.get_mark(), | |
1020 ) | |
1021 return value | |
1022 | |
1023 def scan_directive_ignored_line(self, start_mark): | |
1024 # type: (Any) -> None | |
1025 # See the specification for details. | |
1026 srp = self.reader.peek | |
1027 srf = self.reader.forward | |
1028 while srp() == ' ': | |
1029 srf() | |
1030 if srp() == '#': | |
1031 while srp() not in _THE_END: | |
1032 srf() | |
1033 ch = srp() | |
1034 if ch not in _THE_END: | |
1035 raise ScannerError( | |
1036 'while scanning a directive', | |
1037 start_mark, | |
1038 'expected a comment or a line break, but found %r' % utf8(ch), | |
1039 self.reader.get_mark(), | |
1040 ) | |
1041 self.scan_line_break() | |
1042 | |
1043 def scan_anchor(self, TokenClass): | |
1044 # type: (Any) -> Any | |
1045 # The specification does not restrict characters for anchors and | |
1046 # aliases. This may lead to problems, for instance, the document: | |
1047 # [ *alias, value ] | |
1048 # can be interpteted in two ways, as | |
1049 # [ "value" ] | |
1050 # and | |
1051 # [ *alias , "value" ] | |
1052 # Therefore we restrict aliases to numbers and ASCII letters. | |
1053 srp = self.reader.peek | |
1054 start_mark = self.reader.get_mark() | |
1055 indicator = srp() | |
1056 if indicator == '*': | |
1057 name = 'alias' | |
1058 else: | |
1059 name = 'anchor' | |
1060 self.reader.forward() | |
1061 length = 0 | |
1062 ch = srp(length) | |
1063 # while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ | |
1064 # or ch in u'-_': | |
1065 while check_anchorname_char(ch): | |
1066 length += 1 | |
1067 ch = srp(length) | |
1068 if not length: | |
1069 raise ScannerError( | |
1070 'while scanning an %s' % (name,), | |
1071 start_mark, | |
1072 'expected alphabetic or numeric character, but found %r' % utf8(ch), | |
1073 self.reader.get_mark(), | |
1074 ) | |
1075 value = self.reader.prefix(length) | |
1076 self.reader.forward(length) | |
1077 # ch1 = ch | |
1078 # ch = srp() # no need to peek, ch is already set | |
1079 # assert ch1 == ch | |
1080 if ch not in '\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`': | |
1081 raise ScannerError( | |
1082 'while scanning an %s' % (name,), | |
1083 start_mark, | |
1084 'expected alphabetic or numeric character, but found %r' % utf8(ch), | |
1085 self.reader.get_mark(), | |
1086 ) | |
1087 end_mark = self.reader.get_mark() | |
1088 return TokenClass(value, start_mark, end_mark) | |
1089 | |
1090 def scan_tag(self): | |
1091 # type: () -> Any | |
1092 # See the specification for details. | |
1093 srp = self.reader.peek | |
1094 start_mark = self.reader.get_mark() | |
1095 ch = srp(1) | |
1096 if ch == '<': | |
1097 handle = None | |
1098 self.reader.forward(2) | |
1099 suffix = self.scan_tag_uri('tag', start_mark) | |
1100 if srp() != '>': | |
1101 raise ScannerError( | |
1102 'while parsing a tag', | |
1103 start_mark, | |
1104 "expected '>', but found %r" % utf8(srp()), | |
1105 self.reader.get_mark(), | |
1106 ) | |
1107 self.reader.forward() | |
1108 elif ch in _THE_END_SPACE_TAB: | |
1109 handle = None | |
1110 suffix = '!' | |
1111 self.reader.forward() | |
1112 else: | |
1113 length = 1 | |
1114 use_handle = False | |
1115 while ch not in '\0 \r\n\x85\u2028\u2029': | |
1116 if ch == '!': | |
1117 use_handle = True | |
1118 break | |
1119 length += 1 | |
1120 ch = srp(length) | |
1121 handle = '!' | |
1122 if use_handle: | |
1123 handle = self.scan_tag_handle('tag', start_mark) | |
1124 else: | |
1125 handle = '!' | |
1126 self.reader.forward() | |
1127 suffix = self.scan_tag_uri('tag', start_mark) | |
1128 ch = srp() | |
1129 if ch not in '\0 \r\n\x85\u2028\u2029': | |
1130 raise ScannerError( | |
1131 'while scanning a tag', | |
1132 start_mark, | |
1133 "expected ' ', but found %r" % utf8(ch), | |
1134 self.reader.get_mark(), | |
1135 ) | |
1136 value = (handle, suffix) | |
1137 end_mark = self.reader.get_mark() | |
1138 return TagToken(value, start_mark, end_mark) | |
1139 | |
1140 def scan_block_scalar(self, style, rt=False): | |
1141 # type: (Any, Optional[bool]) -> Any | |
1142 # See the specification for details. | |
1143 srp = self.reader.peek | |
1144 if style == '>': | |
1145 folded = True | |
1146 else: | |
1147 folded = False | |
1148 | |
1149 chunks = [] # type: List[Any] | |
1150 start_mark = self.reader.get_mark() | |
1151 | |
1152 # Scan the header. | |
1153 self.reader.forward() | |
1154 chomping, increment = self.scan_block_scalar_indicators(start_mark) | |
1155 # block scalar comment e.g. : |+ # comment text | |
1156 block_scalar_comment = self.scan_block_scalar_ignored_line(start_mark) | |
1157 | |
1158 # Determine the indentation level and go to the first non-empty line. | |
1159 min_indent = self.indent + 1 | |
1160 if increment is None: | |
1161 # no increment and top level, min_indent could be 0 | |
1162 if min_indent < 1 and ( | |
1163 style not in '|>' | |
1164 or (self.scanner_processing_version == (1, 1)) | |
1165 and getattr( | |
1166 self.loader, 'top_level_block_style_scalar_no_indent_error_1_1', False | |
1167 ) | |
1168 ): | |
1169 min_indent = 1 | |
1170 breaks, max_indent, end_mark = self.scan_block_scalar_indentation() | |
1171 indent = max(min_indent, max_indent) | |
1172 else: | |
1173 if min_indent < 1: | |
1174 min_indent = 1 | |
1175 indent = min_indent + increment - 1 | |
1176 breaks, end_mark = self.scan_block_scalar_breaks(indent) | |
1177 line_break = "" | |
1178 | |
1179 # Scan the inner part of the block scalar. | |
1180 while self.reader.column == indent and srp() != '\0': | |
1181 chunks.extend(breaks) | |
1182 leading_non_space = srp() not in ' \t' | |
1183 length = 0 | |
1184 while srp(length) not in _THE_END: | |
1185 length += 1 | |
1186 chunks.append(self.reader.prefix(length)) | |
1187 self.reader.forward(length) | |
1188 line_break = self.scan_line_break() | |
1189 breaks, end_mark = self.scan_block_scalar_breaks(indent) | |
1190 if style in '|>' and min_indent == 0: | |
1191 # at the beginning of a line, if in block style see if | |
1192 # end of document/start_new_document | |
1193 if self.check_document_start() or self.check_document_end(): | |
1194 break | |
1195 if self.reader.column == indent and srp() != '\0': | |
1196 | |
1197 # Unfortunately, folding rules are ambiguous. | |
1198 # | |
1199 # This is the folding according to the specification: | |
1200 | |
1201 if rt and folded and line_break == '\n': | |
1202 chunks.append('\a') | |
1203 if folded and line_break == '\n' and leading_non_space and srp() not in ' \t': | |
1204 if not breaks: | |
1205 chunks.append(' ') | |
1206 else: | |
1207 chunks.append(line_break) | |
1208 | |
1209 # This is Clark Evans's interpretation (also in the spec | |
1210 # examples): | |
1211 # | |
1212 # if folded and line_break == u'\n': | |
1213 # if not breaks: | |
1214 # if srp() not in ' \t': | |
1215 # chunks.append(u' ') | |
1216 # else: | |
1217 # chunks.append(line_break) | |
1218 # else: | |
1219 # chunks.append(line_break) | |
1220 else: | |
1221 break | |
1222 | |
1223 # Process trailing line breaks. The 'chomping' setting determines | |
1224 # whether they are included in the value. | |
1225 trailing = [] # type: List[Any] | |
1226 if chomping in [None, True]: | |
1227 chunks.append(line_break) | |
1228 if chomping is True: | |
1229 chunks.extend(breaks) | |
1230 elif chomping in [None, False]: | |
1231 trailing.extend(breaks) | |
1232 | |
1233 # We are done. | |
1234 token = ScalarToken("".join(chunks), False, start_mark, end_mark, style) | |
1235 if block_scalar_comment is not None: | |
1236 token.add_pre_comments([block_scalar_comment]) | |
1237 if len(trailing) > 0: | |
1238 # nprint('trailing 1', trailing) # XXXXX | |
1239 # Eat whitespaces and comments until we reach the next token. | |
1240 comment = self.scan_to_next_token() | |
1241 while comment: | |
1242 trailing.append(' ' * comment[1].column + comment[0]) | |
1243 comment = self.scan_to_next_token() | |
1244 | |
1245 # Keep track of the trailing whitespace and following comments | |
1246 # as a comment token, if isn't all included in the actual value. | |
1247 comment_end_mark = self.reader.get_mark() | |
1248 comment = CommentToken("".join(trailing), end_mark, comment_end_mark) | |
1249 token.add_post_comment(comment) | |
1250 return token | |
1251 | |
1252 def scan_block_scalar_indicators(self, start_mark): | |
1253 # type: (Any) -> Any | |
1254 # See the specification for details. | |
1255 srp = self.reader.peek | |
1256 chomping = None | |
1257 increment = None | |
1258 ch = srp() | |
1259 if ch in '+-': | |
1260 if ch == '+': | |
1261 chomping = True | |
1262 else: | |
1263 chomping = False | |
1264 self.reader.forward() | |
1265 ch = srp() | |
1266 if ch in '0123456789': | |
1267 increment = int(ch) | |
1268 if increment == 0: | |
1269 raise ScannerError( | |
1270 'while scanning a block scalar', | |
1271 start_mark, | |
1272 'expected indentation indicator in the range 1-9, ' 'but found 0', | |
1273 self.reader.get_mark(), | |
1274 ) | |
1275 self.reader.forward() | |
1276 elif ch in '0123456789': | |
1277 increment = int(ch) | |
1278 if increment == 0: | |
1279 raise ScannerError( | |
1280 'while scanning a block scalar', | |
1281 start_mark, | |
1282 'expected indentation indicator in the range 1-9, ' 'but found 0', | |
1283 self.reader.get_mark(), | |
1284 ) | |
1285 self.reader.forward() | |
1286 ch = srp() | |
1287 if ch in '+-': | |
1288 if ch == '+': | |
1289 chomping = True | |
1290 else: | |
1291 chomping = False | |
1292 self.reader.forward() | |
1293 ch = srp() | |
1294 if ch not in '\0 \r\n\x85\u2028\u2029': | |
1295 raise ScannerError( | |
1296 'while scanning a block scalar', | |
1297 start_mark, | |
1298 'expected chomping or indentation indicators, but found %r' % utf8(ch), | |
1299 self.reader.get_mark(), | |
1300 ) | |
1301 return chomping, increment | |
1302 | |
1303 def scan_block_scalar_ignored_line(self, start_mark): | |
1304 # type: (Any) -> Any | |
1305 # See the specification for details. | |
1306 srp = self.reader.peek | |
1307 srf = self.reader.forward | |
1308 prefix = '' | |
1309 comment = None | |
1310 while srp() == ' ': | |
1311 prefix += srp() | |
1312 srf() | |
1313 if srp() == '#': | |
1314 comment = prefix | |
1315 while srp() not in _THE_END: | |
1316 comment += srp() | |
1317 srf() | |
1318 ch = srp() | |
1319 if ch not in _THE_END: | |
1320 raise ScannerError( | |
1321 'while scanning a block scalar', | |
1322 start_mark, | |
1323 'expected a comment or a line break, but found %r' % utf8(ch), | |
1324 self.reader.get_mark(), | |
1325 ) | |
1326 self.scan_line_break() | |
1327 return comment | |
1328 | |
1329 def scan_block_scalar_indentation(self): | |
1330 # type: () -> Any | |
1331 # See the specification for details. | |
1332 srp = self.reader.peek | |
1333 srf = self.reader.forward | |
1334 chunks = [] | |
1335 max_indent = 0 | |
1336 end_mark = self.reader.get_mark() | |
1337 while srp() in ' \r\n\x85\u2028\u2029': | |
1338 if srp() != ' ': | |
1339 chunks.append(self.scan_line_break()) | |
1340 end_mark = self.reader.get_mark() | |
1341 else: | |
1342 srf() | |
1343 if self.reader.column > max_indent: | |
1344 max_indent = self.reader.column | |
1345 return chunks, max_indent, end_mark | |
1346 | |
1347 def scan_block_scalar_breaks(self, indent): | |
1348 # type: (int) -> Any | |
1349 # See the specification for details. | |
1350 chunks = [] | |
1351 srp = self.reader.peek | |
1352 srf = self.reader.forward | |
1353 end_mark = self.reader.get_mark() | |
1354 while self.reader.column < indent and srp() == ' ': | |
1355 srf() | |
1356 while srp() in '\r\n\x85\u2028\u2029': | |
1357 chunks.append(self.scan_line_break()) | |
1358 end_mark = self.reader.get_mark() | |
1359 while self.reader.column < indent and srp() == ' ': | |
1360 srf() | |
1361 return chunks, end_mark | |
1362 | |
1363 def scan_flow_scalar(self, style): | |
1364 # type: (Any) -> Any | |
1365 # See the specification for details. | |
1366 # Note that we loose indentation rules for quoted scalars. Quoted | |
1367 # scalars don't need to adhere indentation because " and ' clearly | |
1368 # mark the beginning and the end of them. Therefore we are less | |
1369 # restrictive then the specification requires. We only need to check | |
1370 # that document separators are not included in scalars. | |
1371 if style == '"': | |
1372 double = True | |
1373 else: | |
1374 double = False | |
1375 srp = self.reader.peek | |
1376 chunks = [] # type: List[Any] | |
1377 start_mark = self.reader.get_mark() | |
1378 quote = srp() | |
1379 self.reader.forward() | |
1380 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) | |
1381 while srp() != quote: | |
1382 chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) | |
1383 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) | |
1384 self.reader.forward() | |
1385 end_mark = self.reader.get_mark() | |
1386 return ScalarToken("".join(chunks), False, start_mark, end_mark, style) | |
1387 | |
1388 ESCAPE_REPLACEMENTS = { | |
1389 '0': '\0', | |
1390 'a': '\x07', | |
1391 'b': '\x08', | |
1392 't': '\x09', | |
1393 '\t': '\x09', | |
1394 'n': '\x0A', | |
1395 'v': '\x0B', | |
1396 'f': '\x0C', | |
1397 'r': '\x0D', | |
1398 'e': '\x1B', | |
1399 ' ': '\x20', | |
1400 '"': '"', | |
1401 '/': '/', # as per http://www.json.org/ | |
1402 '\\': '\\', | |
1403 'N': '\x85', | |
1404 '_': '\xA0', | |
1405 'L': '\u2028', | |
1406 'P': '\u2029', | |
1407 } | |
1408 | |
1409 ESCAPE_CODES = {'x': 2, 'u': 4, 'U': 8} | |
1410 | |
1411 def scan_flow_scalar_non_spaces(self, double, start_mark): | |
1412 # type: (Any, Any) -> Any | |
1413 # See the specification for details. | |
1414 chunks = [] # type: List[Any] | |
1415 srp = self.reader.peek | |
1416 srf = self.reader.forward | |
1417 while True: | |
1418 length = 0 | |
1419 while srp(length) not in ' \n\'"\\\0\t\r\x85\u2028\u2029': | |
1420 length += 1 | |
1421 if length != 0: | |
1422 chunks.append(self.reader.prefix(length)) | |
1423 srf(length) | |
1424 ch = srp() | |
1425 if not double and ch == "'" and srp(1) == "'": | |
1426 chunks.append("'") | |
1427 srf(2) | |
1428 elif (double and ch == "'") or (not double and ch in '"\\'): | |
1429 chunks.append(ch) | |
1430 srf() | |
1431 elif double and ch == '\\': | |
1432 srf() | |
1433 ch = srp() | |
1434 if ch in self.ESCAPE_REPLACEMENTS: | |
1435 chunks.append(self.ESCAPE_REPLACEMENTS[ch]) | |
1436 srf() | |
1437 elif ch in self.ESCAPE_CODES: | |
1438 length = self.ESCAPE_CODES[ch] | |
1439 srf() | |
1440 for k in range(length): | |
1441 if srp(k) not in '0123456789ABCDEFabcdef': | |
1442 raise ScannerError( | |
1443 'while scanning a double-quoted scalar', | |
1444 start_mark, | |
1445 'expected escape sequence of %d hexdecimal ' | |
1446 'numbers, but found %r' % (length, utf8(srp(k))), | |
1447 self.reader.get_mark(), | |
1448 ) | |
1449 code = int(self.reader.prefix(length), 16) | |
1450 chunks.append(unichr(code)) | |
1451 srf(length) | |
1452 elif ch in '\n\r\x85\u2028\u2029': | |
1453 self.scan_line_break() | |
1454 chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) | |
1455 else: | |
1456 raise ScannerError( | |
1457 'while scanning a double-quoted scalar', | |
1458 start_mark, | |
1459 'found unknown escape character %r' % utf8(ch), | |
1460 self.reader.get_mark(), | |
1461 ) | |
1462 else: | |
1463 return chunks | |
1464 | |
1465 def scan_flow_scalar_spaces(self, double, start_mark): | |
1466 # type: (Any, Any) -> Any | |
1467 # See the specification for details. | |
1468 srp = self.reader.peek | |
1469 chunks = [] | |
1470 length = 0 | |
1471 while srp(length) in ' \t': | |
1472 length += 1 | |
1473 whitespaces = self.reader.prefix(length) | |
1474 self.reader.forward(length) | |
1475 ch = srp() | |
1476 if ch == '\0': | |
1477 raise ScannerError( | |
1478 'while scanning a quoted scalar', | |
1479 start_mark, | |
1480 'found unexpected end of stream', | |
1481 self.reader.get_mark(), | |
1482 ) | |
1483 elif ch in '\r\n\x85\u2028\u2029': | |
1484 line_break = self.scan_line_break() | |
1485 breaks = self.scan_flow_scalar_breaks(double, start_mark) | |
1486 if line_break != '\n': | |
1487 chunks.append(line_break) | |
1488 elif not breaks: | |
1489 chunks.append(' ') | |
1490 chunks.extend(breaks) | |
1491 else: | |
1492 chunks.append(whitespaces) | |
1493 return chunks | |
1494 | |
1495 def scan_flow_scalar_breaks(self, double, start_mark): | |
1496 # type: (Any, Any) -> Any | |
1497 # See the specification for details. | |
1498 chunks = [] # type: List[Any] | |
1499 srp = self.reader.peek | |
1500 srf = self.reader.forward | |
1501 while True: | |
1502 # Instead of checking indentation, we check for document | |
1503 # separators. | |
1504 prefix = self.reader.prefix(3) | |
1505 if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: | |
1506 raise ScannerError( | |
1507 'while scanning a quoted scalar', | |
1508 start_mark, | |
1509 'found unexpected document separator', | |
1510 self.reader.get_mark(), | |
1511 ) | |
1512 while srp() in ' \t': | |
1513 srf() | |
1514 if srp() in '\r\n\x85\u2028\u2029': | |
1515 chunks.append(self.scan_line_break()) | |
1516 else: | |
1517 return chunks | |
1518 | |
1519 def scan_plain(self): | |
1520 # type: () -> Any | |
1521 # See the specification for details. | |
1522 # We add an additional restriction for the flow context: | |
1523 # plain scalars in the flow context cannot contain ',', ': ' and '?'. | |
1524 # We also keep track of the `allow_simple_key` flag here. | |
1525 # Indentation rules are loosed for the flow context. | |
1526 srp = self.reader.peek | |
1527 srf = self.reader.forward | |
1528 chunks = [] # type: List[Any] | |
1529 start_mark = self.reader.get_mark() | |
1530 end_mark = start_mark | |
1531 indent = self.indent + 1 | |
1532 # We allow zero indentation for scalars, but then we need to check for | |
1533 # document separators at the beginning of the line. | |
1534 # if indent == 0: | |
1535 # indent = 1 | |
1536 spaces = [] # type: List[Any] | |
1537 while True: | |
1538 length = 0 | |
1539 if srp() == '#': | |
1540 break | |
1541 while True: | |
1542 ch = srp(length) | |
1543 if ch == ':' and srp(length + 1) not in _THE_END_SPACE_TAB: | |
1544 pass | |
1545 elif ch == '?' and self.scanner_processing_version != (1, 1): | |
1546 pass | |
1547 elif ( | |
1548 ch in _THE_END_SPACE_TAB | |
1549 or ( | |
1550 not self.flow_level | |
1551 and ch == ':' | |
1552 and srp(length + 1) in _THE_END_SPACE_TAB | |
1553 ) | |
1554 or (self.flow_level and ch in ',:?[]{}') | |
1555 ): | |
1556 break | |
1557 length += 1 | |
1558 # It's not clear what we should do with ':' in the flow context. | |
1559 if ( | |
1560 self.flow_level | |
1561 and ch == ':' | |
1562 and srp(length + 1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}' | |
1563 ): | |
1564 srf(length) | |
1565 raise ScannerError( | |
1566 'while scanning a plain scalar', | |
1567 start_mark, | |
1568 "found unexpected ':'", | |
1569 self.reader.get_mark(), | |
1570 'Please check ' | |
1571 'http://pyyaml.org/wiki/YAMLColonInFlowContext ' | |
1572 'for details.', | |
1573 ) | |
1574 if length == 0: | |
1575 break | |
1576 self.allow_simple_key = False | |
1577 chunks.extend(spaces) | |
1578 chunks.append(self.reader.prefix(length)) | |
1579 srf(length) | |
1580 end_mark = self.reader.get_mark() | |
1581 spaces = self.scan_plain_spaces(indent, start_mark) | |
1582 if ( | |
1583 not spaces | |
1584 or srp() == '#' | |
1585 or (not self.flow_level and self.reader.column < indent) | |
1586 ): | |
1587 break | |
1588 | |
1589 token = ScalarToken("".join(chunks), True, start_mark, end_mark) | |
1590 if spaces and spaces[0] == '\n': | |
1591 # Create a comment token to preserve the trailing line breaks. | |
1592 comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark) | |
1593 token.add_post_comment(comment) | |
1594 return token | |
1595 | |
1596 def scan_plain_spaces(self, indent, start_mark): | |
1597 # type: (Any, Any) -> Any | |
1598 # See the specification for details. | |
1599 # The specification is really confusing about tabs in plain scalars. | |
1600 # We just forbid them completely. Do not use tabs in YAML! | |
1601 srp = self.reader.peek | |
1602 srf = self.reader.forward | |
1603 chunks = [] | |
1604 length = 0 | |
1605 while srp(length) in ' ': | |
1606 length += 1 | |
1607 whitespaces = self.reader.prefix(length) | |
1608 self.reader.forward(length) | |
1609 ch = srp() | |
1610 if ch in '\r\n\x85\u2028\u2029': | |
1611 line_break = self.scan_line_break() | |
1612 self.allow_simple_key = True | |
1613 prefix = self.reader.prefix(3) | |
1614 if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: | |
1615 return | |
1616 breaks = [] | |
1617 while srp() in ' \r\n\x85\u2028\u2029': | |
1618 if srp() == ' ': | |
1619 srf() | |
1620 else: | |
1621 breaks.append(self.scan_line_break()) | |
1622 prefix = self.reader.prefix(3) | |
1623 if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB: | |
1624 return | |
1625 if line_break != '\n': | |
1626 chunks.append(line_break) | |
1627 elif not breaks: | |
1628 chunks.append(' ') | |
1629 chunks.extend(breaks) | |
1630 elif whitespaces: | |
1631 chunks.append(whitespaces) | |
1632 return chunks | |
1633 | |
1634 def scan_tag_handle(self, name, start_mark): | |
1635 # type: (Any, Any) -> Any | |
1636 # See the specification for details. | |
1637 # For some strange reasons, the specification does not allow '_' in | |
1638 # tag handles. I have allowed it anyway. | |
1639 srp = self.reader.peek | |
1640 ch = srp() | |
1641 if ch != '!': | |
1642 raise ScannerError( | |
1643 'while scanning a %s' % (name,), | |
1644 start_mark, | |
1645 "expected '!', but found %r" % utf8(ch), | |
1646 self.reader.get_mark(), | |
1647 ) | |
1648 length = 1 | |
1649 ch = srp(length) | |
1650 if ch != ' ': | |
1651 while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_': | |
1652 length += 1 | |
1653 ch = srp(length) | |
1654 if ch != '!': | |
1655 self.reader.forward(length) | |
1656 raise ScannerError( | |
1657 'while scanning a %s' % (name,), | |
1658 start_mark, | |
1659 "expected '!', but found %r" % utf8(ch), | |
1660 self.reader.get_mark(), | |
1661 ) | |
1662 length += 1 | |
1663 value = self.reader.prefix(length) | |
1664 self.reader.forward(length) | |
1665 return value | |
1666 | |
1667 def scan_tag_uri(self, name, start_mark): | |
1668 # type: (Any, Any) -> Any | |
1669 # See the specification for details. | |
1670 # Note: we do not check if URI is well-formed. | |
1671 srp = self.reader.peek | |
1672 chunks = [] | |
1673 length = 0 | |
1674 ch = srp(length) | |
1675 while ( | |
1676 '0' <= ch <= '9' | |
1677 or 'A' <= ch <= 'Z' | |
1678 or 'a' <= ch <= 'z' | |
1679 or ch in "-;/?:@&=+$,_.!~*'()[]%" | |
1680 or ((self.scanner_processing_version > (1, 1)) and ch == '#') | |
1681 ): | |
1682 if ch == '%': | |
1683 chunks.append(self.reader.prefix(length)) | |
1684 self.reader.forward(length) | |
1685 length = 0 | |
1686 chunks.append(self.scan_uri_escapes(name, start_mark)) | |
1687 else: | |
1688 length += 1 | |
1689 ch = srp(length) | |
1690 if length != 0: | |
1691 chunks.append(self.reader.prefix(length)) | |
1692 self.reader.forward(length) | |
1693 length = 0 | |
1694 if not chunks: | |
1695 raise ScannerError( | |
1696 'while parsing a %s' % (name,), | |
1697 start_mark, | |
1698 'expected URI, but found %r' % utf8(ch), | |
1699 self.reader.get_mark(), | |
1700 ) | |
1701 return "".join(chunks) | |
1702 | |
1703 def scan_uri_escapes(self, name, start_mark): | |
1704 # type: (Any, Any) -> Any | |
1705 # See the specification for details. | |
1706 srp = self.reader.peek | |
1707 srf = self.reader.forward | |
1708 code_bytes = [] # type: List[Any] | |
1709 mark = self.reader.get_mark() | |
1710 while srp() == '%': | |
1711 srf() | |
1712 for k in range(2): | |
1713 if srp(k) not in '0123456789ABCDEFabcdef': | |
1714 raise ScannerError( | |
1715 'while scanning a %s' % (name,), | |
1716 start_mark, | |
1717 'expected URI escape sequence of 2 hexdecimal numbers,' | |
1718 ' but found %r' % utf8(srp(k)), | |
1719 self.reader.get_mark(), | |
1720 ) | |
1721 if PY3: | |
1722 code_bytes.append(int(self.reader.prefix(2), 16)) | |
1723 else: | |
1724 code_bytes.append(chr(int(self.reader.prefix(2), 16))) | |
1725 srf(2) | |
1726 try: | |
1727 if PY3: | |
1728 value = bytes(code_bytes).decode('utf-8') | |
1729 else: | |
1730 value = unicode(b"".join(code_bytes), 'utf-8') | |
1731 except UnicodeDecodeError as exc: | |
1732 raise ScannerError('while scanning a %s' % (name,), start_mark, str(exc), mark) | |
1733 return value | |
1734 | |
1735 def scan_line_break(self): | |
1736 # type: () -> Any | |
1737 # Transforms: | |
1738 # '\r\n' : '\n' | |
1739 # '\r' : '\n' | |
1740 # '\n' : '\n' | |
1741 # '\x85' : '\n' | |
1742 # '\u2028' : '\u2028' | |
1743 # '\u2029 : '\u2029' | |
1744 # default : '' | |
1745 ch = self.reader.peek() | |
1746 if ch in '\r\n\x85': | |
1747 if self.reader.prefix(2) == '\r\n': | |
1748 self.reader.forward(2) | |
1749 else: | |
1750 self.reader.forward() | |
1751 return '\n' | |
1752 elif ch in '\u2028\u2029': | |
1753 self.reader.forward() | |
1754 return ch | |
1755 return "" | |
1756 | |
1757 | |
1758 class RoundTripScanner(Scanner): | |
1759 def check_token(self, *choices): | |
1760 # type: (Any) -> bool | |
1761 # Check if the next token is one of the given types. | |
1762 while self.need_more_tokens(): | |
1763 self.fetch_more_tokens() | |
1764 self._gather_comments() | |
1765 if bool(self.tokens): | |
1766 if not choices: | |
1767 return True | |
1768 for choice in choices: | |
1769 if isinstance(self.tokens[0], choice): | |
1770 return True | |
1771 return False | |
1772 | |
1773 def peek_token(self): | |
1774 # type: () -> Any | |
1775 # Return the next token, but do not delete if from the queue. | |
1776 while self.need_more_tokens(): | |
1777 self.fetch_more_tokens() | |
1778 self._gather_comments() | |
1779 if bool(self.tokens): | |
1780 return self.tokens[0] | |
1781 return None | |
1782 | |
1783 def _gather_comments(self): | |
1784 # type: () -> Any | |
1785 """combine multiple comment lines""" | |
1786 comments = [] # type: List[Any] | |
1787 if not self.tokens: | |
1788 return comments | |
1789 if isinstance(self.tokens[0], CommentToken): | |
1790 comment = self.tokens.pop(0) | |
1791 self.tokens_taken += 1 | |
1792 comments.append(comment) | |
1793 while self.need_more_tokens(): | |
1794 self.fetch_more_tokens() | |
1795 if not self.tokens: | |
1796 return comments | |
1797 if isinstance(self.tokens[0], CommentToken): | |
1798 self.tokens_taken += 1 | |
1799 comment = self.tokens.pop(0) | |
1800 # nprint('dropping2', comment) | |
1801 comments.append(comment) | |
1802 if len(comments) >= 1: | |
1803 self.tokens[0].add_pre_comments(comments) | |
1804 # pull in post comment on e.g. ':' | |
1805 if not self.done and len(self.tokens) < 2: | |
1806 self.fetch_more_tokens() | |
1807 | |
1808 def get_token(self): | |
1809 # type: () -> Any | |
1810 # Return the next token. | |
1811 while self.need_more_tokens(): | |
1812 self.fetch_more_tokens() | |
1813 self._gather_comments() | |
1814 if bool(self.tokens): | |
1815 # nprint('tk', self.tokens) | |
1816 # only add post comment to single line tokens: | |
1817 # scalar, value token. FlowXEndToken, otherwise | |
1818 # hidden streamtokens could get them (leave them and they will be | |
1819 # pre comments for the next map/seq | |
1820 if ( | |
1821 len(self.tokens) > 1 | |
1822 and isinstance( | |
1823 self.tokens[0], | |
1824 (ScalarToken, ValueToken, FlowSequenceEndToken, FlowMappingEndToken), | |
1825 ) | |
1826 and isinstance(self.tokens[1], CommentToken) | |
1827 and self.tokens[0].end_mark.line == self.tokens[1].start_mark.line | |
1828 ): | |
1829 self.tokens_taken += 1 | |
1830 c = self.tokens.pop(1) | |
1831 self.fetch_more_tokens() | |
1832 while len(self.tokens) > 1 and isinstance(self.tokens[1], CommentToken): | |
1833 self.tokens_taken += 1 | |
1834 c1 = self.tokens.pop(1) | |
1835 c.value = c.value + (' ' * c1.start_mark.column) + c1.value | |
1836 self.fetch_more_tokens() | |
1837 self.tokens[0].add_post_comment(c) | |
1838 elif ( | |
1839 len(self.tokens) > 1 | |
1840 and isinstance(self.tokens[0], ScalarToken) | |
1841 and isinstance(self.tokens[1], CommentToken) | |
1842 and self.tokens[0].end_mark.line != self.tokens[1].start_mark.line | |
1843 ): | |
1844 self.tokens_taken += 1 | |
1845 c = self.tokens.pop(1) | |
1846 c.value = ( | |
1847 '\n' * (c.start_mark.line - self.tokens[0].end_mark.line) | |
1848 + (' ' * c.start_mark.column) | |
1849 + c.value | |
1850 ) | |
1851 self.tokens[0].add_post_comment(c) | |
1852 self.fetch_more_tokens() | |
1853 while len(self.tokens) > 1 and isinstance(self.tokens[1], CommentToken): | |
1854 self.tokens_taken += 1 | |
1855 c1 = self.tokens.pop(1) | |
1856 c.value = c.value + (' ' * c1.start_mark.column) + c1.value | |
1857 self.fetch_more_tokens() | |
1858 self.tokens_taken += 1 | |
1859 return self.tokens.pop(0) | |
1860 return None | |
1861 | |
1862 def fetch_comment(self, comment): | |
1863 # type: (Any) -> None | |
1864 value, start_mark, end_mark = comment | |
1865 while value and value[-1] == ' ': | |
1866 # empty line within indented key context | |
1867 # no need to update end-mark, that is not used | |
1868 value = value[:-1] | |
1869 self.tokens.append(CommentToken(value, start_mark, end_mark)) | |
1870 | |
1871 # scanner | |
1872 | |
1873 def scan_to_next_token(self): | |
1874 # type: () -> Any | |
1875 # We ignore spaces, line breaks and comments. | |
1876 # If we find a line break in the block context, we set the flag | |
1877 # `allow_simple_key` on. | |
1878 # The byte order mark is stripped if it's the first character in the | |
1879 # stream. We do not yet support BOM inside the stream as the | |
1880 # specification requires. Any such mark will be considered as a part | |
1881 # of the document. | |
1882 # | |
1883 # TODO: We need to make tab handling rules more sane. A good rule is | |
1884 # Tabs cannot precede tokens | |
1885 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, | |
1886 # KEY(block), VALUE(block), BLOCK-ENTRY | |
1887 # So the checking code is | |
1888 # if <TAB>: | |
1889 # self.allow_simple_keys = False | |
1890 # We also need to add the check for `allow_simple_keys == True` to | |
1891 # `unwind_indent` before issuing BLOCK-END. | |
1892 # Scanners for block, flow, and plain scalars need to be modified. | |
1893 | |
1894 srp = self.reader.peek | |
1895 srf = self.reader.forward | |
1896 if self.reader.index == 0 and srp() == '\uFEFF': | |
1897 srf() | |
1898 found = False | |
1899 while not found: | |
1900 while srp() == ' ': | |
1901 srf() | |
1902 ch = srp() | |
1903 if ch == '#': | |
1904 start_mark = self.reader.get_mark() | |
1905 comment = ch | |
1906 srf() | |
1907 while ch not in _THE_END: | |
1908 ch = srp() | |
1909 if ch == '\0': # don't gobble the end-of-stream character | |
1910 # but add an explicit newline as "YAML processors should terminate | |
1911 # the stream with an explicit line break | |
1912 # https://yaml.org/spec/1.2/spec.html#id2780069 | |
1913 comment += '\n' | |
1914 break | |
1915 comment += ch | |
1916 srf() | |
1917 # gather any blank lines following the comment too | |
1918 ch = self.scan_line_break() | |
1919 while len(ch) > 0: | |
1920 comment += ch | |
1921 ch = self.scan_line_break() | |
1922 end_mark = self.reader.get_mark() | |
1923 if not self.flow_level: | |
1924 self.allow_simple_key = True | |
1925 return comment, start_mark, end_mark | |
1926 if bool(self.scan_line_break()): | |
1927 start_mark = self.reader.get_mark() | |
1928 if not self.flow_level: | |
1929 self.allow_simple_key = True | |
1930 ch = srp() | |
1931 if ch == '\n': # empty toplevel lines | |
1932 start_mark = self.reader.get_mark() | |
1933 comment = "" | |
1934 while ch: | |
1935 ch = self.scan_line_break(empty_line=True) | |
1936 comment += ch | |
1937 if srp() == '#': | |
1938 # empty line followed by indented real comment | |
1939 comment = comment.rsplit('\n', 1)[0] + '\n' | |
1940 end_mark = self.reader.get_mark() | |
1941 return comment, start_mark, end_mark | |
1942 else: | |
1943 found = True | |
1944 return None | |
1945 | |
1946 def scan_line_break(self, empty_line=False): | |
1947 # type: (bool) -> Text | |
1948 # Transforms: | |
1949 # '\r\n' : '\n' | |
1950 # '\r' : '\n' | |
1951 # '\n' : '\n' | |
1952 # '\x85' : '\n' | |
1953 # '\u2028' : '\u2028' | |
1954 # '\u2029 : '\u2029' | |
1955 # default : '' | |
1956 ch = self.reader.peek() # type: Text | |
1957 if ch in '\r\n\x85': | |
1958 if self.reader.prefix(2) == '\r\n': | |
1959 self.reader.forward(2) | |
1960 else: | |
1961 self.reader.forward() | |
1962 return '\n' | |
1963 elif ch in '\u2028\u2029': | |
1964 self.reader.forward() | |
1965 return ch | |
1966 elif empty_line and ch in '\t ': | |
1967 self.reader.forward() | |
1968 return ch | |
1969 return "" | |
1970 | |
1971 def scan_block_scalar(self, style, rt=True): | |
1972 # type: (Any, Optional[bool]) -> Any | |
1973 return Scanner.scan_block_scalar(self, style, rt=rt) | |
1974 | |
1975 | |
1976 # try: | |
1977 # import psyco | |
1978 # psyco.bind(Scanner) | |
1979 # except ImportError: | |
1980 # pass |