comparison env/lib/python3.9/site-packages/jinja2/lexer.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """Implements a Jinja / Python combination lexer. The ``Lexer`` class
3 is used to do some preprocessing. It filters out invalid operators like
4 the bitshift operators we don't allow in templates. It separates
5 template code and python code in expressions.
6 """
7 import re
8 from ast import literal_eval
9 from collections import deque
10 from operator import itemgetter
11
12 from ._compat import implements_iterator
13 from ._compat import intern
14 from ._compat import iteritems
15 from ._compat import text_type
16 from .exceptions import TemplateSyntaxError
17 from .utils import LRUCache
18
19 # cache for the lexers. Exists in order to be able to have multiple
20 # environments with the same lexer
21 _lexer_cache = LRUCache(50)
22
23 # static regular expressions
24 whitespace_re = re.compile(r"\s+", re.U)
25 newline_re = re.compile(r"(\r\n|\r|\n)")
26 string_re = re.compile(
27 r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
28 )
29 integer_re = re.compile(r"(\d+_)*\d+")
30 float_re = re.compile(
31 r"""
32 (?<!\.) # doesn't start with a .
33 (\d+_)*\d+ # digits, possibly _ separated
34 (
35 (\.(\d+_)*\d+)? # optional fractional part
36 e[+\-]?(\d+_)*\d+ # exponent part
37 |
38 \.(\d+_)*\d+ # required fractional part
39 )
40 """,
41 re.IGNORECASE | re.VERBOSE,
42 )
43
44 try:
45 # check if this Python supports Unicode identifiers
46 compile("föö", "<unknown>", "eval")
47 except SyntaxError:
48 # Python 2, no Unicode support, use ASCII identifiers
49 name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
50 check_ident = False
51 else:
52 # Unicode support, import generated re pattern and set flag to use
53 # str.isidentifier to validate during lexing.
54 from ._identifier import pattern as name_re
55
56 check_ident = True
57
58 # internal the tokens and keep references to them
59 TOKEN_ADD = intern("add")
60 TOKEN_ASSIGN = intern("assign")
61 TOKEN_COLON = intern("colon")
62 TOKEN_COMMA = intern("comma")
63 TOKEN_DIV = intern("div")
64 TOKEN_DOT = intern("dot")
65 TOKEN_EQ = intern("eq")
66 TOKEN_FLOORDIV = intern("floordiv")
67 TOKEN_GT = intern("gt")
68 TOKEN_GTEQ = intern("gteq")
69 TOKEN_LBRACE = intern("lbrace")
70 TOKEN_LBRACKET = intern("lbracket")
71 TOKEN_LPAREN = intern("lparen")
72 TOKEN_LT = intern("lt")
73 TOKEN_LTEQ = intern("lteq")
74 TOKEN_MOD = intern("mod")
75 TOKEN_MUL = intern("mul")
76 TOKEN_NE = intern("ne")
77 TOKEN_PIPE = intern("pipe")
78 TOKEN_POW = intern("pow")
79 TOKEN_RBRACE = intern("rbrace")
80 TOKEN_RBRACKET = intern("rbracket")
81 TOKEN_RPAREN = intern("rparen")
82 TOKEN_SEMICOLON = intern("semicolon")
83 TOKEN_SUB = intern("sub")
84 TOKEN_TILDE = intern("tilde")
85 TOKEN_WHITESPACE = intern("whitespace")
86 TOKEN_FLOAT = intern("float")
87 TOKEN_INTEGER = intern("integer")
88 TOKEN_NAME = intern("name")
89 TOKEN_STRING = intern("string")
90 TOKEN_OPERATOR = intern("operator")
91 TOKEN_BLOCK_BEGIN = intern("block_begin")
92 TOKEN_BLOCK_END = intern("block_end")
93 TOKEN_VARIABLE_BEGIN = intern("variable_begin")
94 TOKEN_VARIABLE_END = intern("variable_end")
95 TOKEN_RAW_BEGIN = intern("raw_begin")
96 TOKEN_RAW_END = intern("raw_end")
97 TOKEN_COMMENT_BEGIN = intern("comment_begin")
98 TOKEN_COMMENT_END = intern("comment_end")
99 TOKEN_COMMENT = intern("comment")
100 TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")
101 TOKEN_LINESTATEMENT_END = intern("linestatement_end")
102 TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")
103 TOKEN_LINECOMMENT_END = intern("linecomment_end")
104 TOKEN_LINECOMMENT = intern("linecomment")
105 TOKEN_DATA = intern("data")
106 TOKEN_INITIAL = intern("initial")
107 TOKEN_EOF = intern("eof")
108
109 # bind operators to token types
110 operators = {
111 "+": TOKEN_ADD,
112 "-": TOKEN_SUB,
113 "/": TOKEN_DIV,
114 "//": TOKEN_FLOORDIV,
115 "*": TOKEN_MUL,
116 "%": TOKEN_MOD,
117 "**": TOKEN_POW,
118 "~": TOKEN_TILDE,
119 "[": TOKEN_LBRACKET,
120 "]": TOKEN_RBRACKET,
121 "(": TOKEN_LPAREN,
122 ")": TOKEN_RPAREN,
123 "{": TOKEN_LBRACE,
124 "}": TOKEN_RBRACE,
125 "==": TOKEN_EQ,
126 "!=": TOKEN_NE,
127 ">": TOKEN_GT,
128 ">=": TOKEN_GTEQ,
129 "<": TOKEN_LT,
130 "<=": TOKEN_LTEQ,
131 "=": TOKEN_ASSIGN,
132 ".": TOKEN_DOT,
133 ":": TOKEN_COLON,
134 "|": TOKEN_PIPE,
135 ",": TOKEN_COMMA,
136 ";": TOKEN_SEMICOLON,
137 }
138
139 reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
140 assert len(operators) == len(reverse_operators), "operators dropped"
141 operator_re = re.compile(
142 "(%s)" % "|".join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))
143 )
144
145 ignored_tokens = frozenset(
146 [
147 TOKEN_COMMENT_BEGIN,
148 TOKEN_COMMENT,
149 TOKEN_COMMENT_END,
150 TOKEN_WHITESPACE,
151 TOKEN_LINECOMMENT_BEGIN,
152 TOKEN_LINECOMMENT_END,
153 TOKEN_LINECOMMENT,
154 ]
155 )
156 ignore_if_empty = frozenset(
157 [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
158 )
159
160
161 def _describe_token_type(token_type):
162 if token_type in reverse_operators:
163 return reverse_operators[token_type]
164 return {
165 TOKEN_COMMENT_BEGIN: "begin of comment",
166 TOKEN_COMMENT_END: "end of comment",
167 TOKEN_COMMENT: "comment",
168 TOKEN_LINECOMMENT: "comment",
169 TOKEN_BLOCK_BEGIN: "begin of statement block",
170 TOKEN_BLOCK_END: "end of statement block",
171 TOKEN_VARIABLE_BEGIN: "begin of print statement",
172 TOKEN_VARIABLE_END: "end of print statement",
173 TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",
174 TOKEN_LINESTATEMENT_END: "end of line statement",
175 TOKEN_DATA: "template data / text",
176 TOKEN_EOF: "end of template",
177 }.get(token_type, token_type)
178
179
180 def describe_token(token):
181 """Returns a description of the token."""
182 if token.type == TOKEN_NAME:
183 return token.value
184 return _describe_token_type(token.type)
185
186
187 def describe_token_expr(expr):
188 """Like `describe_token` but for token expressions."""
189 if ":" in expr:
190 type, value = expr.split(":", 1)
191 if type == TOKEN_NAME:
192 return value
193 else:
194 type = expr
195 return _describe_token_type(type)
196
197
198 def count_newlines(value):
199 """Count the number of newline characters in the string. This is
200 useful for extensions that filter a stream.
201 """
202 return len(newline_re.findall(value))
203
204
205 def compile_rules(environment):
206 """Compiles all the rules from the environment into a list of rules."""
207 e = re.escape
208 rules = [
209 (
210 len(environment.comment_start_string),
211 TOKEN_COMMENT_BEGIN,
212 e(environment.comment_start_string),
213 ),
214 (
215 len(environment.block_start_string),
216 TOKEN_BLOCK_BEGIN,
217 e(environment.block_start_string),
218 ),
219 (
220 len(environment.variable_start_string),
221 TOKEN_VARIABLE_BEGIN,
222 e(environment.variable_start_string),
223 ),
224 ]
225
226 if environment.line_statement_prefix is not None:
227 rules.append(
228 (
229 len(environment.line_statement_prefix),
230 TOKEN_LINESTATEMENT_BEGIN,
231 r"^[ \t\v]*" + e(environment.line_statement_prefix),
232 )
233 )
234 if environment.line_comment_prefix is not None:
235 rules.append(
236 (
237 len(environment.line_comment_prefix),
238 TOKEN_LINECOMMENT_BEGIN,
239 r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),
240 )
241 )
242
243 return [x[1:] for x in sorted(rules, reverse=True)]
244
245
246 class Failure(object):
247 """Class that raises a `TemplateSyntaxError` if called.
248 Used by the `Lexer` to specify known errors.
249 """
250
251 def __init__(self, message, cls=TemplateSyntaxError):
252 self.message = message
253 self.error_class = cls
254
255 def __call__(self, lineno, filename):
256 raise self.error_class(self.message, lineno, filename)
257
258
259 class Token(tuple):
260 """Token class."""
261
262 __slots__ = ()
263 lineno, type, value = (property(itemgetter(x)) for x in range(3))
264
265 def __new__(cls, lineno, type, value):
266 return tuple.__new__(cls, (lineno, intern(str(type)), value))
267
268 def __str__(self):
269 if self.type in reverse_operators:
270 return reverse_operators[self.type]
271 elif self.type == "name":
272 return self.value
273 return self.type
274
275 def test(self, expr):
276 """Test a token against a token expression. This can either be a
277 token type or ``'token_type:token_value'``. This can only test
278 against string values and types.
279 """
280 # here we do a regular string equality check as test_any is usually
281 # passed an iterable of not interned strings.
282 if self.type == expr:
283 return True
284 elif ":" in expr:
285 return expr.split(":", 1) == [self.type, self.value]
286 return False
287
288 def test_any(self, *iterable):
289 """Test against multiple token expressions."""
290 for expr in iterable:
291 if self.test(expr):
292 return True
293 return False
294
295 def __repr__(self):
296 return "Token(%r, %r, %r)" % (self.lineno, self.type, self.value)
297
298
299 @implements_iterator
300 class TokenStreamIterator(object):
301 """The iterator for tokenstreams. Iterate over the stream
302 until the eof token is reached.
303 """
304
305 def __init__(self, stream):
306 self.stream = stream
307
308 def __iter__(self):
309 return self
310
311 def __next__(self):
312 token = self.stream.current
313 if token.type is TOKEN_EOF:
314 self.stream.close()
315 raise StopIteration()
316 next(self.stream)
317 return token
318
319
320 @implements_iterator
321 class TokenStream(object):
322 """A token stream is an iterable that yields :class:`Token`\\s. The
323 parser however does not iterate over it but calls :meth:`next` to go
324 one token ahead. The current active token is stored as :attr:`current`.
325 """
326
327 def __init__(self, generator, name, filename):
328 self._iter = iter(generator)
329 self._pushed = deque()
330 self.name = name
331 self.filename = filename
332 self.closed = False
333 self.current = Token(1, TOKEN_INITIAL, "")
334 next(self)
335
336 def __iter__(self):
337 return TokenStreamIterator(self)
338
339 def __bool__(self):
340 return bool(self._pushed) or self.current.type is not TOKEN_EOF
341
342 __nonzero__ = __bool__ # py2
343
344 @property
345 def eos(self):
346 """Are we at the end of the stream?"""
347 return not self
348
349 def push(self, token):
350 """Push a token back to the stream."""
351 self._pushed.append(token)
352
353 def look(self):
354 """Look at the next token."""
355 old_token = next(self)
356 result = self.current
357 self.push(result)
358 self.current = old_token
359 return result
360
361 def skip(self, n=1):
362 """Got n tokens ahead."""
363 for _ in range(n):
364 next(self)
365
366 def next_if(self, expr):
367 """Perform the token test and return the token if it matched.
368 Otherwise the return value is `None`.
369 """
370 if self.current.test(expr):
371 return next(self)
372
373 def skip_if(self, expr):
374 """Like :meth:`next_if` but only returns `True` or `False`."""
375 return self.next_if(expr) is not None
376
377 def __next__(self):
378 """Go one token ahead and return the old one.
379
380 Use the built-in :func:`next` instead of calling this directly.
381 """
382 rv = self.current
383 if self._pushed:
384 self.current = self._pushed.popleft()
385 elif self.current.type is not TOKEN_EOF:
386 try:
387 self.current = next(self._iter)
388 except StopIteration:
389 self.close()
390 return rv
391
392 def close(self):
393 """Close the stream."""
394 self.current = Token(self.current.lineno, TOKEN_EOF, "")
395 self._iter = None
396 self.closed = True
397
398 def expect(self, expr):
399 """Expect a given token type and return it. This accepts the same
400 argument as :meth:`jinja2.lexer.Token.test`.
401 """
402 if not self.current.test(expr):
403 expr = describe_token_expr(expr)
404 if self.current.type is TOKEN_EOF:
405 raise TemplateSyntaxError(
406 "unexpected end of template, expected %r." % expr,
407 self.current.lineno,
408 self.name,
409 self.filename,
410 )
411 raise TemplateSyntaxError(
412 "expected token %r, got %r" % (expr, describe_token(self.current)),
413 self.current.lineno,
414 self.name,
415 self.filename,
416 )
417 try:
418 return self.current
419 finally:
420 next(self)
421
422
423 def get_lexer(environment):
424 """Return a lexer which is probably cached."""
425 key = (
426 environment.block_start_string,
427 environment.block_end_string,
428 environment.variable_start_string,
429 environment.variable_end_string,
430 environment.comment_start_string,
431 environment.comment_end_string,
432 environment.line_statement_prefix,
433 environment.line_comment_prefix,
434 environment.trim_blocks,
435 environment.lstrip_blocks,
436 environment.newline_sequence,
437 environment.keep_trailing_newline,
438 )
439 lexer = _lexer_cache.get(key)
440 if lexer is None:
441 lexer = Lexer(environment)
442 _lexer_cache[key] = lexer
443 return lexer
444
445
446 class OptionalLStrip(tuple):
447 """A special tuple for marking a point in the state that can have
448 lstrip applied.
449 """
450
451 __slots__ = ()
452
453 # Even though it looks like a no-op, creating instances fails
454 # without this.
455 def __new__(cls, *members, **kwargs):
456 return super(OptionalLStrip, cls).__new__(cls, members)
457
458
459 class Lexer(object):
460 """Class that implements a lexer for a given environment. Automatically
461 created by the environment class, usually you don't have to do that.
462
463 Note that the lexer is not automatically bound to an environment.
464 Multiple environments can share the same lexer.
465 """
466
467 def __init__(self, environment):
468 # shortcuts
469 e = re.escape
470
471 def c(x):
472 return re.compile(x, re.M | re.S)
473
474 # lexing rules for tags
475 tag_rules = [
476 (whitespace_re, TOKEN_WHITESPACE, None),
477 (float_re, TOKEN_FLOAT, None),
478 (integer_re, TOKEN_INTEGER, None),
479 (name_re, TOKEN_NAME, None),
480 (string_re, TOKEN_STRING, None),
481 (operator_re, TOKEN_OPERATOR, None),
482 ]
483
484 # assemble the root lexing rule. because "|" is ungreedy
485 # we have to sort by length so that the lexer continues working
486 # as expected when we have parsing rules like <% for block and
487 # <%= for variables. (if someone wants asp like syntax)
488 # variables are just part of the rules if variable processing
489 # is required.
490 root_tag_rules = compile_rules(environment)
491
492 # block suffix if trimming is enabled
493 block_suffix_re = environment.trim_blocks and "\\n?" or ""
494
495 # If lstrip is enabled, it should not be applied if there is any
496 # non-whitespace between the newline and block.
497 self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None
498
499 self.newline_sequence = environment.newline_sequence
500 self.keep_trailing_newline = environment.keep_trailing_newline
501
502 # global lexing rules
503 self.rules = {
504 "root": [
505 # directives
506 (
507 c(
508 "(.*?)(?:%s)"
509 % "|".join(
510 [
511 r"(?P<raw_begin>%s(\-|\+|)\s*raw\s*(?:\-%s\s*|%s))"
512 % (
513 e(environment.block_start_string),
514 e(environment.block_end_string),
515 e(environment.block_end_string),
516 )
517 ]
518 + [
519 r"(?P<%s>%s(\-|\+|))" % (n, r)
520 for n, r in root_tag_rules
521 ]
522 )
523 ),
524 OptionalLStrip(TOKEN_DATA, "#bygroup"),
525 "#bygroup",
526 ),
527 # data
528 (c(".+"), TOKEN_DATA, None),
529 ],
530 # comments
531 TOKEN_COMMENT_BEGIN: [
532 (
533 c(
534 r"(.*?)((?:\-%s\s*|%s)%s)"
535 % (
536 e(environment.comment_end_string),
537 e(environment.comment_end_string),
538 block_suffix_re,
539 )
540 ),
541 (TOKEN_COMMENT, TOKEN_COMMENT_END),
542 "#pop",
543 ),
544 (c("(.)"), (Failure("Missing end of comment tag"),), None),
545 ],
546 # blocks
547 TOKEN_BLOCK_BEGIN: [
548 (
549 c(
550 r"(?:\-%s\s*|%s)%s"
551 % (
552 e(environment.block_end_string),
553 e(environment.block_end_string),
554 block_suffix_re,
555 )
556 ),
557 TOKEN_BLOCK_END,
558 "#pop",
559 ),
560 ]
561 + tag_rules,
562 # variables
563 TOKEN_VARIABLE_BEGIN: [
564 (
565 c(
566 r"\-%s\s*|%s"
567 % (
568 e(environment.variable_end_string),
569 e(environment.variable_end_string),
570 )
571 ),
572 TOKEN_VARIABLE_END,
573 "#pop",
574 )
575 ]
576 + tag_rules,
577 # raw block
578 TOKEN_RAW_BEGIN: [
579 (
580 c(
581 r"(.*?)((?:%s(\-|\+|))\s*endraw\s*(?:\-%s\s*|%s%s))"
582 % (
583 e(environment.block_start_string),
584 e(environment.block_end_string),
585 e(environment.block_end_string),
586 block_suffix_re,
587 )
588 ),
589 OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END),
590 "#pop",
591 ),
592 (c("(.)"), (Failure("Missing end of raw directive"),), None),
593 ],
594 # line statements
595 TOKEN_LINESTATEMENT_BEGIN: [
596 (c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")
597 ]
598 + tag_rules,
599 # line comments
600 TOKEN_LINECOMMENT_BEGIN: [
601 (
602 c(r"(.*?)()(?=\n|$)"),
603 (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),
604 "#pop",
605 )
606 ],
607 }
608
609 def _normalize_newlines(self, value):
610 """Called for strings and template data to normalize it to unicode."""
611 return newline_re.sub(self.newline_sequence, value)
612
613 def tokenize(self, source, name=None, filename=None, state=None):
614 """Calls tokeniter + tokenize and wraps it in a token stream."""
615 stream = self.tokeniter(source, name, filename, state)
616 return TokenStream(self.wrap(stream, name, filename), name, filename)
617
618 def wrap(self, stream, name=None, filename=None):
619 """This is called with the stream as returned by `tokenize` and wraps
620 every token in a :class:`Token` and converts the value.
621 """
622 for lineno, token, value in stream:
623 if token in ignored_tokens:
624 continue
625 elif token == TOKEN_LINESTATEMENT_BEGIN:
626 token = TOKEN_BLOCK_BEGIN
627 elif token == TOKEN_LINESTATEMENT_END:
628 token = TOKEN_BLOCK_END
629 # we are not interested in those tokens in the parser
630 elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
631 continue
632 elif token == TOKEN_DATA:
633 value = self._normalize_newlines(value)
634 elif token == "keyword":
635 token = value
636 elif token == TOKEN_NAME:
637 value = str(value)
638 if check_ident and not value.isidentifier():
639 raise TemplateSyntaxError(
640 "Invalid character in identifier", lineno, name, filename
641 )
642 elif token == TOKEN_STRING:
643 # try to unescape string
644 try:
645 value = (
646 self._normalize_newlines(value[1:-1])
647 .encode("ascii", "backslashreplace")
648 .decode("unicode-escape")
649 )
650 except Exception as e:
651 msg = str(e).split(":")[-1].strip()
652 raise TemplateSyntaxError(msg, lineno, name, filename)
653 elif token == TOKEN_INTEGER:
654 value = int(value.replace("_", ""))
655 elif token == TOKEN_FLOAT:
656 # remove all "_" first to support more Python versions
657 value = literal_eval(value.replace("_", ""))
658 elif token == TOKEN_OPERATOR:
659 token = operators[value]
660 yield Token(lineno, token, value)
661
662 def tokeniter(self, source, name, filename=None, state=None):
663 """This method tokenizes the text and returns the tokens in a
664 generator. Use this method if you just want to tokenize a template.
665 """
666 source = text_type(source)
667 lines = source.splitlines()
668 if self.keep_trailing_newline and source:
669 for newline in ("\r\n", "\r", "\n"):
670 if source.endswith(newline):
671 lines.append("")
672 break
673 source = "\n".join(lines)
674 pos = 0
675 lineno = 1
676 stack = ["root"]
677 if state is not None and state != "root":
678 assert state in ("variable", "block"), "invalid state"
679 stack.append(state + "_begin")
680 statetokens = self.rules[stack[-1]]
681 source_length = len(source)
682 balancing_stack = []
683 lstrip_unless_re = self.lstrip_unless_re
684 newlines_stripped = 0
685 line_starting = True
686
687 while 1:
688 # tokenizer loop
689 for regex, tokens, new_state in statetokens:
690 m = regex.match(source, pos)
691 # if no match we try again with the next rule
692 if m is None:
693 continue
694
695 # we only match blocks and variables if braces / parentheses
696 # are balanced. continue parsing with the lower rule which
697 # is the operator rule. do this only if the end tags look
698 # like operators
699 if balancing_stack and tokens in (
700 TOKEN_VARIABLE_END,
701 TOKEN_BLOCK_END,
702 TOKEN_LINESTATEMENT_END,
703 ):
704 continue
705
706 # tuples support more options
707 if isinstance(tokens, tuple):
708 groups = m.groups()
709
710 if isinstance(tokens, OptionalLStrip):
711 # Rule supports lstrip. Match will look like
712 # text, block type, whitespace control, type, control, ...
713 text = groups[0]
714
715 # Skipping the text and first type, every other group is the
716 # whitespace control for each type. One of the groups will be
717 # -, +, or empty string instead of None.
718 strip_sign = next(g for g in groups[2::2] if g is not None)
719
720 if strip_sign == "-":
721 # Strip all whitespace between the text and the tag.
722 stripped = text.rstrip()
723 newlines_stripped = text[len(stripped) :].count("\n")
724 groups = (stripped,) + groups[1:]
725 elif (
726 # Not marked for preserving whitespace.
727 strip_sign != "+"
728 # lstrip is enabled.
729 and lstrip_unless_re is not None
730 # Not a variable expression.
731 and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)
732 ):
733 # The start of text between the last newline and the tag.
734 l_pos = text.rfind("\n") + 1
735 if l_pos > 0 or line_starting:
736 # If there's only whitespace between the newline and the
737 # tag, strip it.
738 if not lstrip_unless_re.search(text, l_pos):
739 groups = (text[:l_pos],) + groups[1:]
740
741 for idx, token in enumerate(tokens):
742 # failure group
743 if token.__class__ is Failure:
744 raise token(lineno, filename)
745 # bygroup is a bit more complex, in that case we
746 # yield for the current token the first named
747 # group that matched
748 elif token == "#bygroup":
749 for key, value in iteritems(m.groupdict()):
750 if value is not None:
751 yield lineno, key, value
752 lineno += value.count("\n")
753 break
754 else:
755 raise RuntimeError(
756 "%r wanted to resolve "
757 "the token dynamically"
758 " but no group matched" % regex
759 )
760 # normal group
761 else:
762 data = groups[idx]
763 if data or token not in ignore_if_empty:
764 yield lineno, token, data
765 lineno += data.count("\n") + newlines_stripped
766 newlines_stripped = 0
767
768 # strings as token just are yielded as it.
769 else:
770 data = m.group()
771 # update brace/parentheses balance
772 if tokens == TOKEN_OPERATOR:
773 if data == "{":
774 balancing_stack.append("}")
775 elif data == "(":
776 balancing_stack.append(")")
777 elif data == "[":
778 balancing_stack.append("]")
779 elif data in ("}", ")", "]"):
780 if not balancing_stack:
781 raise TemplateSyntaxError(
782 "unexpected '%s'" % data, lineno, name, filename
783 )
784 expected_op = balancing_stack.pop()
785 if expected_op != data:
786 raise TemplateSyntaxError(
787 "unexpected '%s', "
788 "expected '%s'" % (data, expected_op),
789 lineno,
790 name,
791 filename,
792 )
793 # yield items
794 if data or tokens not in ignore_if_empty:
795 yield lineno, tokens, data
796 lineno += data.count("\n")
797
798 line_starting = m.group()[-1:] == "\n"
799
800 # fetch new position into new variable so that we can check
801 # if there is a internal parsing error which would result
802 # in an infinite loop
803 pos2 = m.end()
804
805 # handle state changes
806 if new_state is not None:
807 # remove the uppermost state
808 if new_state == "#pop":
809 stack.pop()
810 # resolve the new state by group checking
811 elif new_state == "#bygroup":
812 for key, value in iteritems(m.groupdict()):
813 if value is not None:
814 stack.append(key)
815 break
816 else:
817 raise RuntimeError(
818 "%r wanted to resolve the "
819 "new state dynamically but"
820 " no group matched" % regex
821 )
822 # direct state name given
823 else:
824 stack.append(new_state)
825 statetokens = self.rules[stack[-1]]
826 # we are still at the same position and no stack change.
827 # this means a loop without break condition, avoid that and
828 # raise error
829 elif pos2 == pos:
830 raise RuntimeError(
831 "%r yielded empty string without stack change" % regex
832 )
833 # publish new function and start again
834 pos = pos2
835 break
836 # if loop terminated without break we haven't found a single match
837 # either we are at the end of the file or we have a problem
838 else:
839 # end of text
840 if pos >= source_length:
841 return
842 # something went wrong
843 raise TemplateSyntaxError(
844 "unexpected char %r at %d" % (source[pos], pos),
845 lineno,
846 name,
847 filename,
848 )