diff env/lib/python3.9/site-packages/docutils/utils/code_analyzer.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/docutils/utils/code_analyzer.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,142 @@
+#!/usr/bin/python
+# coding: utf-8
+
+"""Lexical analysis of formal languages (i.e. code) using Pygments."""
+
+# :Author: Georg Brandl; Felix Wiemann; Günter Milde
+# :Date: $Date: 2019-08-26 18:46:50 +0200 (Mo, 26. Aug 2019) $
+# :Copyright: This module has been placed in the public domain.
+
+from docutils import ApplicationError
+try:
+    from pkg_resources import DistributionNotFound as ResourceError
+except (ImportError, RuntimeError):
+    class ResourceError(ApplicationError):
+        pass # stub
+try:
+    import pygments
+    from pygments.lexers import get_lexer_by_name
+    from pygments.formatters.html import _get_ttype_class
+    with_pygments = True
+except ImportError:
+    with_pygments = False
+
+# Filter the following token types from the list of class arguments:
+unstyled_tokens = ['token', # Token (base token type)
+                   'text',  # Token.Text
+                   '']      # short name for Token and Text
+# (Add, e.g., Token.Punctuation with ``unstyled_tokens += 'punctuation'``.)
+
+class LexerError(ApplicationError):
+    pass
+
+class Lexer(object):
+    """Parse `code` lines and yield "classified" tokens.
+
+    Arguments
+
+      code       -- string of source code to parse,
+      language   -- formal language the code is written in,
+      tokennames -- either 'long', 'short', or '' (see below).
+
+    Merge subsequent tokens of the same token-type.
+
+    Iterating over an instance yields the tokens as ``(tokentype, value)``
+    tuples. The value of `tokennames` configures the naming of the tokentype:
+
+      'long':  downcased full token type name,
+      'short': short name defined by pygments.token.STANDARD_TYPES
+               (= class argument used in pygments html output),
+      'none':      skip lexical analysis.
+    """
+
+    def __init__(self, code, language, tokennames='short'):
+        """
+        Set up a lexical analyzer for `code` in `language`.
+        """
+        self.code = code
+        self.language = language
+        self.tokennames = tokennames
+        self.lexer = None
+        # get lexical analyzer for `language`:
+        if language in ('', 'text') or tokennames == 'none':
+            return
+        if not with_pygments:
+            raise LexerError('Cannot analyze code. '
+                                    'Pygments package not found.')
+        try:
+            self.lexer = get_lexer_by_name(self.language)
+        except (pygments.util.ClassNotFound, ResourceError):
+            raise LexerError('Cannot analyze code. '
+                'No Pygments lexer found for "%s".' % language)
+        # self.lexer.add_filter('tokenmerge')
+        # Since version 1.2. (released Jan 01, 2010) Pygments has a
+        # TokenMergeFilter. # ``self.merge(tokens)`` in __iter__ could
+        # be replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__.
+        # However, `merge` below also strips a final newline added by pygments.
+        #
+        # self.lexer.add_filter('tokenmerge')
+
+    def merge(self, tokens):
+        """Merge subsequent tokens of same token-type.
+
+           Also strip the final newline (added by pygments).
+        """
+        tokens = iter(tokens)
+        (lasttype, lastval) = next(tokens)
+        for ttype, value in tokens:
+            if ttype is lasttype:
+                lastval += value
+            else:
+                yield(lasttype, lastval)
+                (lasttype, lastval) = (ttype, value)
+        if lastval.endswith('\n'):
+            lastval = lastval[:-1]
+        if lastval:
+            yield(lasttype, lastval)
+
+    def __iter__(self):
+        """Parse self.code and yield "classified" tokens.
+        """
+        if self.lexer is None:
+            yield ([], self.code)
+            return
+        tokens = pygments.lex(self.code, self.lexer)
+        for tokentype, value in self.merge(tokens):
+            if self.tokennames == 'long': # long CSS class args
+                classes = str(tokentype).lower().split('.')
+            else: # short CSS class args
+                classes = [_get_ttype_class(tokentype)]
+            classes = [cls for cls in classes if cls not in unstyled_tokens]
+            yield (classes, value)
+
+
+class NumberLines(object):
+    """Insert linenumber-tokens at the start of every code line.
+
+    Arguments
+
+       tokens    -- iterable of ``(classes, value)`` tuples
+       startline -- first line number
+       endline   -- last line number
+
+    Iterating over an instance yields the tokens with a
+    ``(['ln'], '<the line number>')`` token added for every code line.
+    Multi-line tokens are splitted."""
+
+    def __init__(self, tokens, startline, endline):
+        self.tokens = tokens
+        self.startline = startline
+        # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d '
+        self.fmt_str = '%%%dd ' % len(str(endline))
+
+    def __iter__(self):
+        lineno = self.startline
+        yield (['ln'], self.fmt_str % lineno)
+        for ttype, value in self.tokens:
+            lines = value.split('\n')
+            for line in lines[:-1]:
+                yield (ttype, line + '\n')
+                lineno += 1
+                yield (['ln'], self.fmt_str % lineno)
+            yield (ttype, lines[-1])