Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/docutils/utils/code_analyzer.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # coding: utf-8 | |
| 3 | |
| 4 """Lexical analysis of formal languages (i.e. code) using Pygments.""" | |
| 5 | |
| 6 # :Author: Georg Brandl; Felix Wiemann; Günter Milde | |
| 7 # :Date: $Date: 2019-08-26 18:46:50 +0200 (Mo, 26. Aug 2019) $ | |
| 8 # :Copyright: This module has been placed in the public domain. | |
| 9 | |
| 10 from docutils import ApplicationError | |
| 11 try: | |
| 12 from pkg_resources import DistributionNotFound as ResourceError | |
| 13 except (ImportError, RuntimeError): | |
| 14 class ResourceError(ApplicationError): | |
| 15 pass # stub | |
| 16 try: | |
| 17 import pygments | |
| 18 from pygments.lexers import get_lexer_by_name | |
| 19 from pygments.formatters.html import _get_ttype_class | |
| 20 with_pygments = True | |
| 21 except ImportError: | |
| 22 with_pygments = False | |
| 23 | |
| 24 # Filter the following token types from the list of class arguments: | |
| 25 unstyled_tokens = ['token', # Token (base token type) | |
| 26 'text', # Token.Text | |
| 27 ''] # short name for Token and Text | |
| 28 # (Add, e.g., Token.Punctuation with ``unstyled_tokens += 'punctuation'``.) | |
| 29 | |
| 30 class LexerError(ApplicationError): | |
| 31 pass | |
| 32 | |
| 33 class Lexer(object): | |
| 34 """Parse `code` lines and yield "classified" tokens. | |
| 35 | |
| 36 Arguments | |
| 37 | |
| 38 code -- string of source code to parse, | |
| 39 language -- formal language the code is written in, | |
| 40 tokennames -- either 'long', 'short', or '' (see below). | |
| 41 | |
| 42 Merge subsequent tokens of the same token-type. | |
| 43 | |
| 44 Iterating over an instance yields the tokens as ``(tokentype, value)`` | |
| 45 tuples. The value of `tokennames` configures the naming of the tokentype: | |
| 46 | |
| 47 'long': downcased full token type name, | |
| 48 'short': short name defined by pygments.token.STANDARD_TYPES | |
| 49 (= class argument used in pygments html output), | |
| 50 'none': skip lexical analysis. | |
| 51 """ | |
| 52 | |
| 53 def __init__(self, code, language, tokennames='short'): | |
| 54 """ | |
| 55 Set up a lexical analyzer for `code` in `language`. | |
| 56 """ | |
| 57 self.code = code | |
| 58 self.language = language | |
| 59 self.tokennames = tokennames | |
| 60 self.lexer = None | |
| 61 # get lexical analyzer for `language`: | |
| 62 if language in ('', 'text') or tokennames == 'none': | |
| 63 return | |
| 64 if not with_pygments: | |
| 65 raise LexerError('Cannot analyze code. ' | |
| 66 'Pygments package not found.') | |
| 67 try: | |
| 68 self.lexer = get_lexer_by_name(self.language) | |
| 69 except (pygments.util.ClassNotFound, ResourceError): | |
| 70 raise LexerError('Cannot analyze code. ' | |
| 71 'No Pygments lexer found for "%s".' % language) | |
| 72 # self.lexer.add_filter('tokenmerge') | |
| 73 # Since version 1.2. (released Jan 01, 2010) Pygments has a | |
| 74 # TokenMergeFilter. # ``self.merge(tokens)`` in __iter__ could | |
| 75 # be replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__. | |
| 76 # However, `merge` below also strips a final newline added by pygments. | |
| 77 # | |
| 78 # self.lexer.add_filter('tokenmerge') | |
| 79 | |
| 80 def merge(self, tokens): | |
| 81 """Merge subsequent tokens of same token-type. | |
| 82 | |
| 83 Also strip the final newline (added by pygments). | |
| 84 """ | |
| 85 tokens = iter(tokens) | |
| 86 (lasttype, lastval) = next(tokens) | |
| 87 for ttype, value in tokens: | |
| 88 if ttype is lasttype: | |
| 89 lastval += value | |
| 90 else: | |
| 91 yield(lasttype, lastval) | |
| 92 (lasttype, lastval) = (ttype, value) | |
| 93 if lastval.endswith('\n'): | |
| 94 lastval = lastval[:-1] | |
| 95 if lastval: | |
| 96 yield(lasttype, lastval) | |
| 97 | |
| 98 def __iter__(self): | |
| 99 """Parse self.code and yield "classified" tokens. | |
| 100 """ | |
| 101 if self.lexer is None: | |
| 102 yield ([], self.code) | |
| 103 return | |
| 104 tokens = pygments.lex(self.code, self.lexer) | |
| 105 for tokentype, value in self.merge(tokens): | |
| 106 if self.tokennames == 'long': # long CSS class args | |
| 107 classes = str(tokentype).lower().split('.') | |
| 108 else: # short CSS class args | |
| 109 classes = [_get_ttype_class(tokentype)] | |
| 110 classes = [cls for cls in classes if cls not in unstyled_tokens] | |
| 111 yield (classes, value) | |
| 112 | |
| 113 | |
| 114 class NumberLines(object): | |
| 115 """Insert linenumber-tokens at the start of every code line. | |
| 116 | |
| 117 Arguments | |
| 118 | |
| 119 tokens -- iterable of ``(classes, value)`` tuples | |
| 120 startline -- first line number | |
| 121 endline -- last line number | |
| 122 | |
| 123 Iterating over an instance yields the tokens with a | |
| 124 ``(['ln'], '<the line number>')`` token added for every code line. | |
| 125 Multi-line tokens are splitted.""" | |
| 126 | |
| 127 def __init__(self, tokens, startline, endline): | |
| 128 self.tokens = tokens | |
| 129 self.startline = startline | |
| 130 # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d ' | |
| 131 self.fmt_str = '%%%dd ' % len(str(endline)) | |
| 132 | |
| 133 def __iter__(self): | |
| 134 lineno = self.startline | |
| 135 yield (['ln'], self.fmt_str % lineno) | |
| 136 for ttype, value in self.tokens: | |
| 137 lines = value.split('\n') | |
| 138 for line in lines[:-1]: | |
| 139 yield (ttype, line + '\n') | |
| 140 lineno += 1 | |
| 141 yield (['ln'], self.fmt_str % lineno) | |
| 142 yield (ttype, lines[-1]) |
