Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/coloredlogs/converter/__init__.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # Program to convert text with ANSI escape sequences to HTML. | |
2 # | |
3 # Author: Peter Odding <peter@peterodding.com> | |
4 # Last Change: February 14, 2020 | |
5 # URL: https://coloredlogs.readthedocs.io | |
6 | |
7 """Convert text with ANSI escape sequences to HTML.""" | |
8 | |
9 # Standard library modules. | |
10 import codecs | |
11 import os | |
12 import pipes | |
13 import re | |
14 import subprocess | |
15 import tempfile | |
16 | |
17 # External dependencies. | |
18 from humanfriendly.terminal import ( | |
19 ANSI_CSI, | |
20 ANSI_TEXT_STYLES, | |
21 clean_terminal_output, | |
22 output, | |
23 ) | |
24 | |
25 # Modules included in our package. | |
26 from coloredlogs.converter.colors import ( | |
27 BRIGHT_COLOR_PALETTE, | |
28 EIGHT_COLOR_PALETTE, | |
29 EXTENDED_COLOR_PALETTE, | |
30 ) | |
31 | |
32 # Compiled regular expression that matches leading spaces (indentation). | |
33 INDENT_PATTERN = re.compile('^ +', re.MULTILINE) | |
34 | |
35 # Compiled regular expression that matches a tag followed by a space at the start of a line. | |
36 TAG_INDENT_PATTERN = re.compile('^(<[^>]+>) ', re.MULTILINE) | |
37 | |
38 # Compiled regular expression that matches strings we want to convert. Used to | |
39 # separate all special strings and literal output in a single pass (this allows | |
40 # us to properly encode the output without resorting to nasty hacks). | |
41 TOKEN_PATTERN = re.compile(r''' | |
42 # Wrap the pattern in a capture group so that re.split() includes the | |
43 # substrings that match the pattern in the resulting list of strings. | |
44 ( | |
45 # Match URLs with supported schemes and domain names. | |
46 (?: https?:// | www\\. ) | |
47 # Scan until the end of the URL by matching non-whitespace characters | |
48 # that are also not escape characters. | |
49 [^\s\x1b]+ | |
50 # Alternatively ... | |
51 | | |
52 # Match (what looks like) ANSI escape sequences. | |
53 \x1b \[ .*? m | |
54 ) | |
55 ''', re.UNICODE | re.VERBOSE) | |
56 | |
57 | |
58 def capture(command, encoding='UTF-8'): | |
59 """ | |
60 Capture the output of an external command as if it runs in an interactive terminal. | |
61 | |
62 :param command: The command name and its arguments (a list of strings). | |
63 :param encoding: The encoding to use to decode the output (a string). | |
64 :returns: The output of the command. | |
65 | |
66 This function runs an external command under ``script`` (emulating an | |
67 interactive terminal) to capture the output of the command as if it was | |
68 running in an interactive terminal (including ANSI escape sequences). | |
69 """ | |
70 with open(os.devnull, 'wb') as dev_null: | |
71 # We start by invoking the `script' program in a form that is supported | |
72 # by the Linux implementation [1] but fails command line validation on | |
73 # the MacOS (BSD) implementation [2]: The command is specified using | |
74 # the -c option and the typescript file is /dev/null. | |
75 # | |
76 # [1] http://man7.org/linux/man-pages/man1/script.1.html | |
77 # [2] https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man1/script.1.html | |
78 command_line = ['script', '-qc', ' '.join(map(pipes.quote, command)), '/dev/null'] | |
79 script = subprocess.Popen(command_line, stdout=subprocess.PIPE, stderr=dev_null) | |
80 stdout, stderr = script.communicate() | |
81 if script.returncode == 0: | |
82 # If `script' succeeded we assume that it understood our command line | |
83 # invocation which means it's the Linux implementation (in this case | |
84 # we can use standard output instead of a temporary file). | |
85 output = stdout.decode(encoding) | |
86 else: | |
87 # If `script' failed we assume that it didn't understand our command | |
88 # line invocation which means it's the MacOS (BSD) implementation | |
89 # (in this case we need a temporary file because the command line | |
90 # interface requires it). | |
91 fd, temporary_file = tempfile.mkstemp(prefix='coloredlogs-', suffix='-capture.txt') | |
92 try: | |
93 command_line = ['script', '-q', temporary_file] + list(command) | |
94 subprocess.Popen(command_line, stdout=dev_null, stderr=dev_null).wait() | |
95 with codecs.open(temporary_file, 'rb') as handle: | |
96 output = handle.read() | |
97 finally: | |
98 os.unlink(temporary_file) | |
99 # On MacOS when standard input is /dev/null I've observed | |
100 # the captured output starting with the characters '^D': | |
101 # | |
102 # $ script -q capture.txt echo example </dev/null | |
103 # example | |
104 # $ xxd capture.txt | |
105 # 00000000: 5e44 0808 6578 616d 706c 650d 0a ^D..example.. | |
106 # | |
107 # I'm not sure why this is here, although I suppose it has to do | |
108 # with ^D in caret notation signifying end-of-file [1]. What I do | |
109 # know is that this is an implementation detail that callers of the | |
110 # capture() function shouldn't be bothered with, so we strip it. | |
111 # | |
112 # [1] https://en.wikipedia.org/wiki/End-of-file | |
113 if output.startswith(b'^D'): | |
114 output = output[2:] | |
115 output = output.decode(encoding) | |
116 # Clean up backspace and carriage return characters and the 'erase line' | |
117 # ANSI escape sequence and return the output as a Unicode string. | |
118 return u'\n'.join(clean_terminal_output(output)) | |
119 | |
120 | |
121 def convert(text, code=True, tabsize=4): | |
122 """ | |
123 Convert text with ANSI escape sequences to HTML. | |
124 | |
125 :param text: The text with ANSI escape sequences (a string). | |
126 :param code: Whether to wrap the returned HTML fragment in a | |
127 ``<code>...</code>`` element (a boolean, defaults | |
128 to :data:`True`). | |
129 :param tabsize: Refer to :func:`str.expandtabs()` for details. | |
130 :returns: The text converted to HTML (a string). | |
131 """ | |
132 output = [] | |
133 in_span = False | |
134 compatible_text_styles = { | |
135 # The following ANSI text styles have an obvious mapping to CSS. | |
136 ANSI_TEXT_STYLES['bold']: {'font-weight': 'bold'}, | |
137 ANSI_TEXT_STYLES['strike_through']: {'text-decoration': 'line-through'}, | |
138 ANSI_TEXT_STYLES['underline']: {'text-decoration': 'underline'}, | |
139 } | |
140 for token in TOKEN_PATTERN.split(text): | |
141 if token.startswith(('http://', 'https://', 'www.')): | |
142 url = token if '://' in token else ('http://' + token) | |
143 token = u'<a href="%s" style="color:inherit">%s</a>' % (html_encode(url), html_encode(token)) | |
144 elif token.startswith(ANSI_CSI): | |
145 ansi_codes = token[len(ANSI_CSI):-1].split(';') | |
146 if all(c.isdigit() for c in ansi_codes): | |
147 ansi_codes = list(map(int, ansi_codes)) | |
148 # First we check for a reset code to close the previous <span> | |
149 # element. As explained on Wikipedia [1] an absence of codes | |
150 # implies a reset code as well: "No parameters at all in ESC[m acts | |
151 # like a 0 reset code". | |
152 # [1] https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_sequences | |
153 if in_span and (0 in ansi_codes or not ansi_codes): | |
154 output.append('</span>') | |
155 in_span = False | |
156 # Now we're ready to generate the next <span> element (if any) in | |
157 # the knowledge that we're emitting opening <span> and closing | |
158 # </span> tags in the correct order. | |
159 styles = {} | |
160 is_faint = (ANSI_TEXT_STYLES['faint'] in ansi_codes) | |
161 is_inverse = (ANSI_TEXT_STYLES['inverse'] in ansi_codes) | |
162 while ansi_codes: | |
163 number = ansi_codes.pop(0) | |
164 # Try to match a compatible text style. | |
165 if number in compatible_text_styles: | |
166 styles.update(compatible_text_styles[number]) | |
167 continue | |
168 # Try to extract a text and/or background color. | |
169 text_color = None | |
170 background_color = None | |
171 if 30 <= number <= 37: | |
172 # 30-37 sets the text color from the eight color palette. | |
173 text_color = EIGHT_COLOR_PALETTE[number - 30] | |
174 elif 40 <= number <= 47: | |
175 # 40-47 sets the background color from the eight color palette. | |
176 background_color = EIGHT_COLOR_PALETTE[number - 40] | |
177 elif 90 <= number <= 97: | |
178 # 90-97 sets the text color from the high-intensity eight color palette. | |
179 text_color = BRIGHT_COLOR_PALETTE[number - 90] | |
180 elif 100 <= number <= 107: | |
181 # 100-107 sets the background color from the high-intensity eight color palette. | |
182 background_color = BRIGHT_COLOR_PALETTE[number - 100] | |
183 elif number in (38, 39) and len(ansi_codes) >= 2 and ansi_codes[0] == 5: | |
184 # 38;5;N is a text color in the 256 color mode palette, | |
185 # 39;5;N is a background color in the 256 color mode palette. | |
186 try: | |
187 # Consume the 5 following 38 or 39. | |
188 ansi_codes.pop(0) | |
189 # Consume the 256 color mode color index. | |
190 color_index = ansi_codes.pop(0) | |
191 # Set the variable to the corresponding HTML/CSS color. | |
192 if number == 38: | |
193 text_color = EXTENDED_COLOR_PALETTE[color_index] | |
194 elif number == 39: | |
195 background_color = EXTENDED_COLOR_PALETTE[color_index] | |
196 except (ValueError, IndexError): | |
197 pass | |
198 # Apply the 'faint' or 'inverse' text style | |
199 # by manipulating the selected color(s). | |
200 if text_color and is_inverse: | |
201 # Use the text color as the background color and pick a | |
202 # text color that will be visible on the resulting | |
203 # background color. | |
204 background_color = text_color | |
205 text_color = select_text_color(*parse_hex_color(text_color)) | |
206 if text_color and is_faint: | |
207 # Because I wasn't sure how to implement faint colors | |
208 # based on normal colors I looked at how gnome-terminal | |
209 # (my terminal of choice) handles this and it appears | |
210 # to just pick a somewhat darker color. | |
211 text_color = '#%02X%02X%02X' % tuple( | |
212 max(0, n - 40) for n in parse_hex_color(text_color) | |
213 ) | |
214 if text_color: | |
215 styles['color'] = text_color | |
216 if background_color: | |
217 styles['background-color'] = background_color | |
218 if styles: | |
219 token = '<span style="%s">' % ';'.join(k + ':' + v for k, v in sorted(styles.items())) | |
220 in_span = True | |
221 else: | |
222 token = '' | |
223 else: | |
224 token = html_encode(token) | |
225 output.append(token) | |
226 html = ''.join(output) | |
227 html = encode_whitespace(html, tabsize) | |
228 if code: | |
229 html = '<code>%s</code>' % html | |
230 return html | |
231 | |
232 | |
233 def encode_whitespace(text, tabsize=4): | |
234 """ | |
235 Encode whitespace so that web browsers properly render it. | |
236 | |
237 :param text: The plain text (a string). | |
238 :param tabsize: Refer to :func:`str.expandtabs()` for details. | |
239 :returns: The text converted to HTML (a string). | |
240 | |
241 The purpose of this function is to encode whitespace in such a way that web | |
242 browsers render the same whitespace regardless of whether 'preformatted' | |
243 styling is used (by wrapping the text in a ``<pre>...</pre>`` element). | |
244 | |
245 .. note:: While the string manipulation performed by this function is | |
246 specifically intended not to corrupt the HTML generated by | |
247 :func:`convert()` it definitely does have the potential to | |
248 corrupt HTML from other sources. You have been warned :-). | |
249 """ | |
250 # Convert Windows line endings (CR+LF) to UNIX line endings (LF). | |
251 text = text.replace('\r\n', '\n') | |
252 # Convert UNIX line endings (LF) to HTML line endings (<br>). | |
253 text = text.replace('\n', '<br>\n') | |
254 # Convert tabs to spaces. | |
255 text = text.expandtabs(tabsize) | |
256 # Convert leading spaces (that is to say spaces at the start of the string | |
257 # and/or directly after a line ending) into non-breaking spaces, otherwise | |
258 # HTML rendering engines will simply ignore these spaces. | |
259 text = re.sub(INDENT_PATTERN, encode_whitespace_cb, text) | |
260 # The conversion of leading spaces we just did misses a corner case where a | |
261 # line starts with an HTML tag but the first visible text is a space. Web | |
262 # browsers seem to ignore these spaces, so we need to convert them. | |
263 text = re.sub(TAG_INDENT_PATTERN, r'\1 ', text) | |
264 # Convert runs of multiple spaces into non-breaking spaces to avoid HTML | |
265 # rendering engines from visually collapsing runs of spaces into a single | |
266 # space. We specifically don't replace single spaces for several reasons: | |
267 # 1. We'd break the HTML emitted by convert() by replacing spaces | |
268 # inside HTML elements (for example the spaces that separate | |
269 # element names from attribute names). | |
270 # 2. If every single space is replaced by a non-breaking space, | |
271 # web browsers perform awkwardly unintuitive word wrapping. | |
272 # 3. The HTML output would be bloated for no good reason. | |
273 text = re.sub(' {2,}', encode_whitespace_cb, text) | |
274 return text | |
275 | |
276 | |
277 def encode_whitespace_cb(match): | |
278 """ | |
279 Replace runs of multiple spaces with non-breaking spaces. | |
280 | |
281 :param match: A regular expression match object. | |
282 :returns: The replacement string. | |
283 | |
284 This function is used by func:`encode_whitespace()` as a callback for | |
285 replacement using a regular expression pattern. | |
286 """ | |
287 return ' ' * len(match.group(0)) | |
288 | |
289 | |
290 def html_encode(text): | |
291 """ | |
292 Encode characters with a special meaning as HTML. | |
293 | |
294 :param text: The plain text (a string). | |
295 :returns: The text converted to HTML (a string). | |
296 """ | |
297 text = text.replace('&', '&') | |
298 text = text.replace('<', '<') | |
299 text = text.replace('>', '>') | |
300 text = text.replace('"', '"') | |
301 return text | |
302 | |
303 | |
304 def parse_hex_color(value): | |
305 """ | |
306 Convert a CSS color in hexadecimal notation into its R, G, B components. | |
307 | |
308 :param value: A CSS color in hexadecimal notation (a string like '#000000'). | |
309 :return: A tuple with three integers (with values between 0 and 255) | |
310 corresponding to the R, G and B components of the color. | |
311 :raises: :exc:`~exceptions.ValueError` on values that can't be parsed. | |
312 """ | |
313 if value.startswith('#'): | |
314 value = value[1:] | |
315 if len(value) == 3: | |
316 return ( | |
317 int(value[0] * 2, 16), | |
318 int(value[1] * 2, 16), | |
319 int(value[2] * 2, 16), | |
320 ) | |
321 elif len(value) == 6: | |
322 return ( | |
323 int(value[0:2], 16), | |
324 int(value[2:4], 16), | |
325 int(value[4:6], 16), | |
326 ) | |
327 else: | |
328 raise ValueError() | |
329 | |
330 | |
331 def select_text_color(r, g, b): | |
332 """ | |
333 Choose a suitable color for the inverse text style. | |
334 | |
335 :param r: The amount of red (an integer between 0 and 255). | |
336 :param g: The amount of green (an integer between 0 and 255). | |
337 :param b: The amount of blue (an integer between 0 and 255). | |
338 :returns: A CSS color in hexadecimal notation (a string). | |
339 | |
340 In inverse mode the color that is normally used for the text is instead | |
341 used for the background, however this can render the text unreadable. The | |
342 purpose of :func:`select_text_color()` is to make an effort to select a | |
343 suitable text color. Based on http://stackoverflow.com/a/3943023/112731. | |
344 """ | |
345 return '#000' if (r * 0.299 + g * 0.587 + b * 0.114) > 186 else '#FFF' | |
346 | |
347 | |
348 class ColoredCronMailer(object): | |
349 | |
350 """ | |
351 Easy to use integration between :mod:`coloredlogs` and the UNIX ``cron`` daemon. | |
352 | |
353 By using :class:`ColoredCronMailer` as a context manager in the command | |
354 line interface of your Python program you make it trivially easy for users | |
355 of your program to opt in to HTML output under ``cron``: The only thing the | |
356 user needs to do is set ``CONTENT_TYPE="text/html"`` in their crontab! | |
357 | |
358 Under the hood this requires quite a bit of magic and I must admit that I | |
359 developed this code simply because I was curious whether it could even be | |
360 done :-). It requires my :mod:`capturer` package which you can install | |
361 using ``pip install 'coloredlogs[cron]'``. The ``[cron]`` extra will pull | |
362 in the :mod:`capturer` 2.4 or newer which is required to capture the output | |
363 while silencing it - otherwise you'd get duplicate output in the emails | |
364 sent by ``cron``. | |
365 """ | |
366 | |
367 def __init__(self): | |
368 """Initialize output capturing when running under ``cron`` with the correct configuration.""" | |
369 self.is_enabled = 'text/html' in os.environ.get('CONTENT_TYPE', 'text/plain') | |
370 self.is_silent = False | |
371 if self.is_enabled: | |
372 # We import capturer here so that the coloredlogs[cron] extra | |
373 # isn't required to use the other functions in this module. | |
374 from capturer import CaptureOutput | |
375 self.capturer = CaptureOutput(merged=True, relay=False) | |
376 | |
377 def __enter__(self): | |
378 """Start capturing output (when applicable).""" | |
379 if self.is_enabled: | |
380 self.capturer.__enter__() | |
381 return self | |
382 | |
383 def __exit__(self, exc_type=None, exc_value=None, traceback=None): | |
384 """Stop capturing output and convert the output to HTML (when applicable).""" | |
385 if self.is_enabled: | |
386 if not self.is_silent: | |
387 # Only call output() when we captured something useful. | |
388 text = self.capturer.get_text() | |
389 if text and not text.isspace(): | |
390 output(convert(text)) | |
391 self.capturer.__exit__(exc_type, exc_value, traceback) | |
392 | |
393 def silence(self): | |
394 """ | |
395 Tell :func:`__exit__()` to swallow all output (things will be silent). | |
396 | |
397 This can be useful when a Python program is written in such a way that | |
398 it has already produced output by the time it becomes apparent that | |
399 nothing useful can be done (say in a cron job that runs every few | |
400 minutes :-p). By calling :func:`silence()` the output can be swallowed | |
401 retroactively, avoiding useless emails from ``cron``. | |
402 """ | |
403 self.is_silent = True |