comparison env/lib/python3.9/site-packages/coloredlogs/converter/__init__.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # Program to convert text with ANSI escape sequences to HTML.
2 #
3 # Author: Peter Odding <peter@peterodding.com>
4 # Last Change: February 14, 2020
5 # URL: https://coloredlogs.readthedocs.io
6
7 """Convert text with ANSI escape sequences to HTML."""
8
9 # Standard library modules.
10 import codecs
11 import os
12 import pipes
13 import re
14 import subprocess
15 import tempfile
16
17 # External dependencies.
18 from humanfriendly.terminal import (
19 ANSI_CSI,
20 ANSI_TEXT_STYLES,
21 clean_terminal_output,
22 output,
23 )
24
25 # Modules included in our package.
26 from coloredlogs.converter.colors import (
27 BRIGHT_COLOR_PALETTE,
28 EIGHT_COLOR_PALETTE,
29 EXTENDED_COLOR_PALETTE,
30 )
31
32 # Compiled regular expression that matches leading spaces (indentation).
33 INDENT_PATTERN = re.compile('^ +', re.MULTILINE)
34
35 # Compiled regular expression that matches a tag followed by a space at the start of a line.
36 TAG_INDENT_PATTERN = re.compile('^(<[^>]+>) ', re.MULTILINE)
37
38 # Compiled regular expression that matches strings we want to convert. Used to
39 # separate all special strings and literal output in a single pass (this allows
40 # us to properly encode the output without resorting to nasty hacks).
41 TOKEN_PATTERN = re.compile(r'''
42 # Wrap the pattern in a capture group so that re.split() includes the
43 # substrings that match the pattern in the resulting list of strings.
44 (
45 # Match URLs with supported schemes and domain names.
46 (?: https?:// | www\\. )
47 # Scan until the end of the URL by matching non-whitespace characters
48 # that are also not escape characters.
49 [^\s\x1b]+
50 # Alternatively ...
51 |
52 # Match (what looks like) ANSI escape sequences.
53 \x1b \[ .*? m
54 )
55 ''', re.UNICODE | re.VERBOSE)
56
57
58 def capture(command, encoding='UTF-8'):
59 """
60 Capture the output of an external command as if it runs in an interactive terminal.
61
62 :param command: The command name and its arguments (a list of strings).
63 :param encoding: The encoding to use to decode the output (a string).
64 :returns: The output of the command.
65
66 This function runs an external command under ``script`` (emulating an
67 interactive terminal) to capture the output of the command as if it was
68 running in an interactive terminal (including ANSI escape sequences).
69 """
70 with open(os.devnull, 'wb') as dev_null:
71 # We start by invoking the `script' program in a form that is supported
72 # by the Linux implementation [1] but fails command line validation on
73 # the MacOS (BSD) implementation [2]: The command is specified using
74 # the -c option and the typescript file is /dev/null.
75 #
76 # [1] http://man7.org/linux/man-pages/man1/script.1.html
77 # [2] https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man1/script.1.html
78 command_line = ['script', '-qc', ' '.join(map(pipes.quote, command)), '/dev/null']
79 script = subprocess.Popen(command_line, stdout=subprocess.PIPE, stderr=dev_null)
80 stdout, stderr = script.communicate()
81 if script.returncode == 0:
82 # If `script' succeeded we assume that it understood our command line
83 # invocation which means it's the Linux implementation (in this case
84 # we can use standard output instead of a temporary file).
85 output = stdout.decode(encoding)
86 else:
87 # If `script' failed we assume that it didn't understand our command
88 # line invocation which means it's the MacOS (BSD) implementation
89 # (in this case we need a temporary file because the command line
90 # interface requires it).
91 fd, temporary_file = tempfile.mkstemp(prefix='coloredlogs-', suffix='-capture.txt')
92 try:
93 command_line = ['script', '-q', temporary_file] + list(command)
94 subprocess.Popen(command_line, stdout=dev_null, stderr=dev_null).wait()
95 with codecs.open(temporary_file, 'rb') as handle:
96 output = handle.read()
97 finally:
98 os.unlink(temporary_file)
99 # On MacOS when standard input is /dev/null I've observed
100 # the captured output starting with the characters '^D':
101 #
102 # $ script -q capture.txt echo example </dev/null
103 # example
104 # $ xxd capture.txt
105 # 00000000: 5e44 0808 6578 616d 706c 650d 0a ^D..example..
106 #
107 # I'm not sure why this is here, although I suppose it has to do
108 # with ^D in caret notation signifying end-of-file [1]. What I do
109 # know is that this is an implementation detail that callers of the
110 # capture() function shouldn't be bothered with, so we strip it.
111 #
112 # [1] https://en.wikipedia.org/wiki/End-of-file
113 if output.startswith(b'^D'):
114 output = output[2:]
115 output = output.decode(encoding)
116 # Clean up backspace and carriage return characters and the 'erase line'
117 # ANSI escape sequence and return the output as a Unicode string.
118 return u'\n'.join(clean_terminal_output(output))
119
120
121 def convert(text, code=True, tabsize=4):
122 """
123 Convert text with ANSI escape sequences to HTML.
124
125 :param text: The text with ANSI escape sequences (a string).
126 :param code: Whether to wrap the returned HTML fragment in a
127 ``<code>...</code>`` element (a boolean, defaults
128 to :data:`True`).
129 :param tabsize: Refer to :func:`str.expandtabs()` for details.
130 :returns: The text converted to HTML (a string).
131 """
132 output = []
133 in_span = False
134 compatible_text_styles = {
135 # The following ANSI text styles have an obvious mapping to CSS.
136 ANSI_TEXT_STYLES['bold']: {'font-weight': 'bold'},
137 ANSI_TEXT_STYLES['strike_through']: {'text-decoration': 'line-through'},
138 ANSI_TEXT_STYLES['underline']: {'text-decoration': 'underline'},
139 }
140 for token in TOKEN_PATTERN.split(text):
141 if token.startswith(('http://', 'https://', 'www.')):
142 url = token if '://' in token else ('http://' + token)
143 token = u'<a href="%s" style="color:inherit">%s</a>' % (html_encode(url), html_encode(token))
144 elif token.startswith(ANSI_CSI):
145 ansi_codes = token[len(ANSI_CSI):-1].split(';')
146 if all(c.isdigit() for c in ansi_codes):
147 ansi_codes = list(map(int, ansi_codes))
148 # First we check for a reset code to close the previous <span>
149 # element. As explained on Wikipedia [1] an absence of codes
150 # implies a reset code as well: "No parameters at all in ESC[m acts
151 # like a 0 reset code".
152 # [1] https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_sequences
153 if in_span and (0 in ansi_codes or not ansi_codes):
154 output.append('</span>')
155 in_span = False
156 # Now we're ready to generate the next <span> element (if any) in
157 # the knowledge that we're emitting opening <span> and closing
158 # </span> tags in the correct order.
159 styles = {}
160 is_faint = (ANSI_TEXT_STYLES['faint'] in ansi_codes)
161 is_inverse = (ANSI_TEXT_STYLES['inverse'] in ansi_codes)
162 while ansi_codes:
163 number = ansi_codes.pop(0)
164 # Try to match a compatible text style.
165 if number in compatible_text_styles:
166 styles.update(compatible_text_styles[number])
167 continue
168 # Try to extract a text and/or background color.
169 text_color = None
170 background_color = None
171 if 30 <= number <= 37:
172 # 30-37 sets the text color from the eight color palette.
173 text_color = EIGHT_COLOR_PALETTE[number - 30]
174 elif 40 <= number <= 47:
175 # 40-47 sets the background color from the eight color palette.
176 background_color = EIGHT_COLOR_PALETTE[number - 40]
177 elif 90 <= number <= 97:
178 # 90-97 sets the text color from the high-intensity eight color palette.
179 text_color = BRIGHT_COLOR_PALETTE[number - 90]
180 elif 100 <= number <= 107:
181 # 100-107 sets the background color from the high-intensity eight color palette.
182 background_color = BRIGHT_COLOR_PALETTE[number - 100]
183 elif number in (38, 39) and len(ansi_codes) >= 2 and ansi_codes[0] == 5:
184 # 38;5;N is a text color in the 256 color mode palette,
185 # 39;5;N is a background color in the 256 color mode palette.
186 try:
187 # Consume the 5 following 38 or 39.
188 ansi_codes.pop(0)
189 # Consume the 256 color mode color index.
190 color_index = ansi_codes.pop(0)
191 # Set the variable to the corresponding HTML/CSS color.
192 if number == 38:
193 text_color = EXTENDED_COLOR_PALETTE[color_index]
194 elif number == 39:
195 background_color = EXTENDED_COLOR_PALETTE[color_index]
196 except (ValueError, IndexError):
197 pass
198 # Apply the 'faint' or 'inverse' text style
199 # by manipulating the selected color(s).
200 if text_color and is_inverse:
201 # Use the text color as the background color and pick a
202 # text color that will be visible on the resulting
203 # background color.
204 background_color = text_color
205 text_color = select_text_color(*parse_hex_color(text_color))
206 if text_color and is_faint:
207 # Because I wasn't sure how to implement faint colors
208 # based on normal colors I looked at how gnome-terminal
209 # (my terminal of choice) handles this and it appears
210 # to just pick a somewhat darker color.
211 text_color = '#%02X%02X%02X' % tuple(
212 max(0, n - 40) for n in parse_hex_color(text_color)
213 )
214 if text_color:
215 styles['color'] = text_color
216 if background_color:
217 styles['background-color'] = background_color
218 if styles:
219 token = '<span style="%s">' % ';'.join(k + ':' + v for k, v in sorted(styles.items()))
220 in_span = True
221 else:
222 token = ''
223 else:
224 token = html_encode(token)
225 output.append(token)
226 html = ''.join(output)
227 html = encode_whitespace(html, tabsize)
228 if code:
229 html = '<code>%s</code>' % html
230 return html
231
232
233 def encode_whitespace(text, tabsize=4):
234 """
235 Encode whitespace so that web browsers properly render it.
236
237 :param text: The plain text (a string).
238 :param tabsize: Refer to :func:`str.expandtabs()` for details.
239 :returns: The text converted to HTML (a string).
240
241 The purpose of this function is to encode whitespace in such a way that web
242 browsers render the same whitespace regardless of whether 'preformatted'
243 styling is used (by wrapping the text in a ``<pre>...</pre>`` element).
244
245 .. note:: While the string manipulation performed by this function is
246 specifically intended not to corrupt the HTML generated by
247 :func:`convert()` it definitely does have the potential to
248 corrupt HTML from other sources. You have been warned :-).
249 """
250 # Convert Windows line endings (CR+LF) to UNIX line endings (LF).
251 text = text.replace('\r\n', '\n')
252 # Convert UNIX line endings (LF) to HTML line endings (<br>).
253 text = text.replace('\n', '<br>\n')
254 # Convert tabs to spaces.
255 text = text.expandtabs(tabsize)
256 # Convert leading spaces (that is to say spaces at the start of the string
257 # and/or directly after a line ending) into non-breaking spaces, otherwise
258 # HTML rendering engines will simply ignore these spaces.
259 text = re.sub(INDENT_PATTERN, encode_whitespace_cb, text)
260 # The conversion of leading spaces we just did misses a corner case where a
261 # line starts with an HTML tag but the first visible text is a space. Web
262 # browsers seem to ignore these spaces, so we need to convert them.
263 text = re.sub(TAG_INDENT_PATTERN, r'\1&nbsp;', text)
264 # Convert runs of multiple spaces into non-breaking spaces to avoid HTML
265 # rendering engines from visually collapsing runs of spaces into a single
266 # space. We specifically don't replace single spaces for several reasons:
267 # 1. We'd break the HTML emitted by convert() by replacing spaces
268 # inside HTML elements (for example the spaces that separate
269 # element names from attribute names).
270 # 2. If every single space is replaced by a non-breaking space,
271 # web browsers perform awkwardly unintuitive word wrapping.
272 # 3. The HTML output would be bloated for no good reason.
273 text = re.sub(' {2,}', encode_whitespace_cb, text)
274 return text
275
276
277 def encode_whitespace_cb(match):
278 """
279 Replace runs of multiple spaces with non-breaking spaces.
280
281 :param match: A regular expression match object.
282 :returns: The replacement string.
283
284 This function is used by func:`encode_whitespace()` as a callback for
285 replacement using a regular expression pattern.
286 """
287 return '&nbsp;' * len(match.group(0))
288
289
290 def html_encode(text):
291 """
292 Encode characters with a special meaning as HTML.
293
294 :param text: The plain text (a string).
295 :returns: The text converted to HTML (a string).
296 """
297 text = text.replace('&', '&amp;')
298 text = text.replace('<', '&lt;')
299 text = text.replace('>', '&gt;')
300 text = text.replace('"', '&quot;')
301 return text
302
303
304 def parse_hex_color(value):
305 """
306 Convert a CSS color in hexadecimal notation into its R, G, B components.
307
308 :param value: A CSS color in hexadecimal notation (a string like '#000000').
309 :return: A tuple with three integers (with values between 0 and 255)
310 corresponding to the R, G and B components of the color.
311 :raises: :exc:`~exceptions.ValueError` on values that can't be parsed.
312 """
313 if value.startswith('#'):
314 value = value[1:]
315 if len(value) == 3:
316 return (
317 int(value[0] * 2, 16),
318 int(value[1] * 2, 16),
319 int(value[2] * 2, 16),
320 )
321 elif len(value) == 6:
322 return (
323 int(value[0:2], 16),
324 int(value[2:4], 16),
325 int(value[4:6], 16),
326 )
327 else:
328 raise ValueError()
329
330
331 def select_text_color(r, g, b):
332 """
333 Choose a suitable color for the inverse text style.
334
335 :param r: The amount of red (an integer between 0 and 255).
336 :param g: The amount of green (an integer between 0 and 255).
337 :param b: The amount of blue (an integer between 0 and 255).
338 :returns: A CSS color in hexadecimal notation (a string).
339
340 In inverse mode the color that is normally used for the text is instead
341 used for the background, however this can render the text unreadable. The
342 purpose of :func:`select_text_color()` is to make an effort to select a
343 suitable text color. Based on http://stackoverflow.com/a/3943023/112731.
344 """
345 return '#000' if (r * 0.299 + g * 0.587 + b * 0.114) > 186 else '#FFF'
346
347
348 class ColoredCronMailer(object):
349
350 """
351 Easy to use integration between :mod:`coloredlogs` and the UNIX ``cron`` daemon.
352
353 By using :class:`ColoredCronMailer` as a context manager in the command
354 line interface of your Python program you make it trivially easy for users
355 of your program to opt in to HTML output under ``cron``: The only thing the
356 user needs to do is set ``CONTENT_TYPE="text/html"`` in their crontab!
357
358 Under the hood this requires quite a bit of magic and I must admit that I
359 developed this code simply because I was curious whether it could even be
360 done :-). It requires my :mod:`capturer` package which you can install
361 using ``pip install 'coloredlogs[cron]'``. The ``[cron]`` extra will pull
362 in the :mod:`capturer` 2.4 or newer which is required to capture the output
363 while silencing it - otherwise you'd get duplicate output in the emails
364 sent by ``cron``.
365 """
366
367 def __init__(self):
368 """Initialize output capturing when running under ``cron`` with the correct configuration."""
369 self.is_enabled = 'text/html' in os.environ.get('CONTENT_TYPE', 'text/plain')
370 self.is_silent = False
371 if self.is_enabled:
372 # We import capturer here so that the coloredlogs[cron] extra
373 # isn't required to use the other functions in this module.
374 from capturer import CaptureOutput
375 self.capturer = CaptureOutput(merged=True, relay=False)
376
377 def __enter__(self):
378 """Start capturing output (when applicable)."""
379 if self.is_enabled:
380 self.capturer.__enter__()
381 return self
382
383 def __exit__(self, exc_type=None, exc_value=None, traceback=None):
384 """Stop capturing output and convert the output to HTML (when applicable)."""
385 if self.is_enabled:
386 if not self.is_silent:
387 # Only call output() when we captured something useful.
388 text = self.capturer.get_text()
389 if text and not text.isspace():
390 output(convert(text))
391 self.capturer.__exit__(exc_type, exc_value, traceback)
392
393 def silence(self):
394 """
395 Tell :func:`__exit__()` to swallow all output (things will be silent).
396
397 This can be useful when a Python program is written in such a way that
398 it has already produced output by the time it becomes apparent that
399 nothing useful can be done (say in a cron job that runs every few
400 minutes :-p). By calling :func:`silence()` the output can be swallowed
401 retroactively, avoiding useless emails from ``cron``.
402 """
403 self.is_silent = True