Mercurial > repos > guerler > springsuite
annotate planemo/lib/python3.7/site-packages/humanfriendly/terminal/html.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler | 
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 1 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 1 # Human friendly input/output in Python. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 2 # | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 3 # Author: Peter Odding <peter@peterodding.com> | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 4 # Last Change: February 29, 2020 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 5 # URL: https://humanfriendly.readthedocs.io | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 6 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 7 """Convert HTML with simple text formatting to text with ANSI escape sequences.""" | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 8 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 9 # Standard library modules. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 10 import re | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 11 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 12 # Modules included in our package. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 13 from humanfriendly.compat import HTMLParser, StringIO, name2codepoint, unichr | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 14 from humanfriendly.text import compact_empty_lines | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 15 from humanfriendly.terminal import ANSI_COLOR_CODES, ANSI_RESET, ansi_style | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 16 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 17 # Public identifiers that require documentation. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 18 __all__ = ('HTMLConverter', 'html_to_ansi') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 19 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 20 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 21 def html_to_ansi(data, callback=None): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 22 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 23 Convert HTML with simple text formatting to text with ANSI escape sequences. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 24 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 25 :param data: The HTML to convert (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 26 :param callback: Optional callback to pass to :class:`HTMLConverter`. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 27 :returns: Text with ANSI escape sequences (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 28 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 29 Please refer to the documentation of the :class:`HTMLConverter` class for | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 30 details about the conversion process (like which tags are supported) and an | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 31 example with a screenshot. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 32 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 33 converter = HTMLConverter(callback=callback) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 34 return converter(data) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 35 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 36 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 37 class HTMLConverter(HTMLParser): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 38 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 39 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 40 Convert HTML with simple text formatting to text with ANSI escape sequences. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 41 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 42 The following text styles are supported: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 43 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 44 - Bold: ``<b>``, ``<strong>`` and ``<span style="font-weight: bold;">`` | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 45 - Italic: ``<i>``, ``<em>`` and ``<span style="font-style: italic;">`` | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 46 - Strike-through: ``<del>``, ``<s>`` and ``<span style="text-decoration: line-through;">`` | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 47 - Underline: ``<ins>``, ``<u>`` and ``<span style="text-decoration: underline">`` | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 48 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 49 Colors can be specified as follows: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 50 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 51 - Foreground color: ``<span style="color: #RRGGBB;">`` | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 52 - Background color: ``<span style="background-color: #RRGGBB;">`` | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 53 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 54 Here's a small demonstration: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 55 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 56 .. code-block:: python | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 57 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 58 from humanfriendly.text import dedent | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 59 from humanfriendly.terminal import html_to_ansi | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 60 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 61 print(html_to_ansi(dedent(''' | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 62 <b>Hello world!</b> | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 63 <i>Is this thing on?</i> | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 64 I guess I can <u>underline</u> or <s>strike-through</s> text? | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 65 And what about <span style="color: red">color</span>? | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 66 '''))) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 67 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 68 rainbow_colors = [ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 69 '#FF0000', '#E2571E', '#FF7F00', '#FFFF00', '#00FF00', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 70 '#96BF33', '#0000FF', '#4B0082', '#8B00FF', '#FFFFFF', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 71 ] | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 72 html_rainbow = "".join('<span style="color: %s">o</span>' % c for c in rainbow_colors) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 73 print(html_to_ansi("Let's try a rainbow: %s" % html_rainbow)) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 74 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 75 Here's what the results look like: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 76 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 77 .. image:: images/html-to-ansi.png | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 78 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 79 Some more details: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 80 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 81 - Nested tags are supported, within reasonable limits. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 82 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 83 - Text in ``<code>`` and ``<pre>`` tags will be highlighted in a | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 84 different color from the main text (currently this is yellow). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 85 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 86 - ``<a href="URL">TEXT</a>`` is converted to the format "TEXT (URL)" where | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 87 the uppercase symbols are highlighted in light blue with an underline. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 88 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 89 - ``<div>``, ``<p>`` and ``<pre>`` tags are considered block level tags | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 90 and are wrapped in vertical whitespace to prevent their content from | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 91 "running into" surrounding text. This may cause runs of multiple empty | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 92 lines to be emitted. As a *workaround* the :func:`__call__()` method | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 93 will automatically call :func:`.compact_empty_lines()` on the generated | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 94 output before returning it to the caller. Of course this won't work | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 95 when `output` is set to something like :data:`sys.stdout`. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 96 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 97 - ``<br>`` is converted to a single plain text line break. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 98 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 99 Implementation notes: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 100 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 101 - A list of dictionaries with style information is used as a stack where | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 102 new styling can be pushed and a pop will restore the previous styling. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 103 When new styling is pushed, it is merged with (but overrides) the current | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 104 styling. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 105 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 106 - If you're going to be converting a lot of HTML it might be useful from | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 107 a performance standpoint to re-use an existing :class:`HTMLConverter` | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 108 object for unrelated HTML fragments, in this case take a look at the | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 109 :func:`__call__()` method (it makes this use case very easy). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 110 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 111 .. versionadded:: 4.15 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 112 :class:`humanfriendly.terminal.HTMLConverter` was added to the | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 113 `humanfriendly` package during the initial development of my new | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 114 `chat-archive <https://chat-archive.readthedocs.io/>`_ project, whose | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 115 command line interface makes for a great demonstration of the | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 116 flexibility that this feature provides (hint: check out how the search | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 117 keyword highlighting combines with the regular highlighting). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 118 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 119 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 120 BLOCK_TAGS = ('div', 'p', 'pre') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 121 """The names of tags that are padded with vertical whitespace.""" | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 122 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 123 def __init__(self, *args, **kw): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 124 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 125 Initialize an :class:`HTMLConverter` object. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 126 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 127 :param callback: Optional keyword argument to specify a function that | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 128 will be called to process text fragments before they | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 129 are emitted on the output stream. Note that link text | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 130 and preformatted text fragments are not processed by | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 131 this callback. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 132 :param output: Optional keyword argument to redirect the output to the | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 133 given file-like object. If this is not given a new | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 134 :class:`~python3:io.StringIO` object is created. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 135 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 136 # Hide our optional keyword arguments from the superclass. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 137 self.callback = kw.pop("callback", None) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 138 self.output = kw.pop("output", None) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 139 # Initialize the superclass. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 140 HTMLParser.__init__(self, *args, **kw) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 141 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 142 def __call__(self, data): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 143 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 144 Reset the parser, convert some HTML and get the text with ANSI escape sequences. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 145 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 146 :param data: The HTML to convert to text (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 147 :returns: The converted text (only in case `output` is | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 148 a :class:`~python3:io.StringIO` object). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 149 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 150 self.reset() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 151 self.feed(data) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 152 self.close() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 153 if isinstance(self.output, StringIO): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 154 return compact_empty_lines(self.output.getvalue()) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 155 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 156 @property | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 157 def current_style(self): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 158 """Get the current style from the top of the stack (a dictionary).""" | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 159 return self.stack[-1] if self.stack else {} | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 160 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 161 def close(self): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 162 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 163 Close previously opened ANSI escape sequences. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 164 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 165 This method overrides the same method in the superclass to ensure that | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 166 an :data:`.ANSI_RESET` code is emitted when parsing reaches the end of | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 167 the input but a style is still active. This is intended to prevent | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 168 malformed HTML from messing up terminal output. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 169 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 170 if any(self.stack): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 171 self.output.write(ANSI_RESET) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 172 self.stack = [] | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 173 HTMLParser.close(self) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 174 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 175 def emit_style(self, style=None): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 176 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 177 Emit an ANSI escape sequence for the given or current style to the output stream. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 178 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 179 :param style: A dictionary with arguments for :func:`.ansi_style()` or | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 180 :data:`None`, in which case the style at the top of the | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 181 stack is emitted. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 182 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 183 # Clear the current text styles. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 184 self.output.write(ANSI_RESET) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 185 # Apply a new text style? | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 186 style = self.current_style if style is None else style | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 187 if style: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 188 self.output.write(ansi_style(**style)) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 189 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 190 def handle_charref(self, value): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 191 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 192 Process a decimal or hexadecimal numeric character reference. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 193 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 194 :param value: The decimal or hexadecimal value (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 195 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 196 self.output.write(unichr(int(value[1:], 16) if value.startswith('x') else int(value))) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 197 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 198 def handle_data(self, data): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 199 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 200 Process textual data. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 201 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 202 :param data: The decoded text (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 203 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 204 if self.link_url: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 205 # Link text is captured literally so that we can reliably check | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 206 # whether the text and the URL of the link are the same string. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 207 self.link_text = data | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 208 elif self.callback and self.preformatted_text_level == 0: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 209 # Text that is not part of a link and not preformatted text is | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 210 # passed to the user defined callback to allow for arbitrary | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 211 # pre-processing. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 212 data = self.callback(data) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 213 # All text is emitted unmodified on the output stream. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 214 self.output.write(data) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 215 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 216 def handle_endtag(self, tag): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 217 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 218 Process the end of an HTML tag. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 219 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 220 :param tag: The name of the tag (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 221 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 222 if tag in ('a', 'b', 'code', 'del', 'em', 'i', 'ins', 'pre', 's', 'strong', 'span', 'u'): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 223 old_style = self.current_style | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 224 # The following conditional isn't necessary for well formed | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 225 # HTML but prevents raising exceptions on malformed HTML. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 226 if self.stack: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 227 self.stack.pop(-1) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 228 new_style = self.current_style | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 229 if tag == 'a': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 230 if self.urls_match(self.link_text, self.link_url): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 231 # Don't render the URL when it's part of the link text. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 232 self.emit_style(new_style) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 233 else: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 234 self.emit_style(new_style) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 235 self.output.write(' (') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 236 self.emit_style(old_style) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 237 self.output.write(self.render_url(self.link_url)) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 238 self.emit_style(new_style) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 239 self.output.write(')') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 240 else: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 241 self.emit_style(new_style) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 242 if tag in ('code', 'pre'): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 243 self.preformatted_text_level -= 1 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 244 if tag in self.BLOCK_TAGS: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 245 # Emit an empty line after block level tags. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 246 self.output.write('\n\n') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 247 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 248 def handle_entityref(self, name): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 249 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 250 Process a named character reference. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 251 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 252 :param name: The name of the character reference (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 253 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 254 self.output.write(unichr(name2codepoint[name])) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 255 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 256 def handle_starttag(self, tag, attrs): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 257 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 258 Process the start of an HTML tag. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 259 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 260 :param tag: The name of the tag (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 261 :param attrs: A list of tuples with two strings each. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 262 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 263 if tag in self.BLOCK_TAGS: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 264 # Emit an empty line before block level tags. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 265 self.output.write('\n\n') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 266 if tag == 'a': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 267 self.push_styles(color='blue', bright=True, underline=True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 268 # Store the URL that the link points to for later use, so that we | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 269 # can render the link text before the URL (with the reasoning that | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 270 # this is the most intuitive way to present a link in a plain text | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 271 # interface). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 272 self.link_url = next((v for n, v in attrs if n == 'href'), '') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 273 elif tag == 'b' or tag == 'strong': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 274 self.push_styles(bold=True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 275 elif tag == 'br': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 276 self.output.write('\n') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 277 elif tag == 'code' or tag == 'pre': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 278 self.push_styles(color='yellow') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 279 self.preformatted_text_level += 1 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 280 elif tag == 'del' or tag == 's': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 281 self.push_styles(strike_through=True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 282 elif tag == 'em' or tag == 'i': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 283 self.push_styles(italic=True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 284 elif tag == 'ins' or tag == 'u': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 285 self.push_styles(underline=True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 286 elif tag == 'span': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 287 styles = {} | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 288 css = next((v for n, v in attrs if n == 'style'), "") | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 289 for rule in css.split(';'): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 290 name, _, value = rule.partition(':') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 291 name = name.strip() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 292 value = value.strip() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 293 if name == 'background-color': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 294 styles['background'] = self.parse_color(value) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 295 elif name == 'color': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 296 styles['color'] = self.parse_color(value) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 297 elif name == 'font-style' and value == 'italic': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 298 styles['italic'] = True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 299 elif name == 'font-weight' and value == 'bold': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 300 styles['bold'] = True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 301 elif name == 'text-decoration' and value == 'line-through': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 302 styles['strike_through'] = True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 303 elif name == 'text-decoration' and value == 'underline': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 304 styles['underline'] = True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 305 self.push_styles(**styles) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 306 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 307 def normalize_url(self, url): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 308 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 309 Normalize a URL to enable string equality comparison. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 310 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 311 :param url: The URL to normalize (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 312 :returns: The normalized URL (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 313 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 314 return re.sub('^mailto:', '', url) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 315 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 316 def parse_color(self, value): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 317 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 318 Convert a CSS color to something that :func:`.ansi_style()` understands. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 319 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 320 :param value: A string like ``rgb(1,2,3)``, ``#AABBCC`` or ``yellow``. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 321 :returns: A color value supported by :func:`.ansi_style()` or :data:`None`. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 322 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 323 # Parse an 'rgb(N,N,N)' expression. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 324 if value.startswith('rgb'): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 325 tokens = re.findall(r'\d+', value) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 326 if len(tokens) == 3: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 327 return tuple(map(int, tokens)) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 328 # Parse an '#XXXXXX' expression. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 329 elif value.startswith('#'): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 330 value = value[1:] | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 331 length = len(value) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 332 if length == 6: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 333 # Six hex digits (proper notation). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 334 return ( | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 335 int(value[:2], 16), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 336 int(value[2:4], 16), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 337 int(value[4:6], 16), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 338 ) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 339 elif length == 3: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 340 # Three hex digits (shorthand). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 341 return ( | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 342 int(value[0], 16), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 343 int(value[1], 16), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 344 int(value[2], 16), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 345 ) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 346 # Try to recognize a named color. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 347 value = value.lower() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 348 if value in ANSI_COLOR_CODES: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 349 return value | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 350 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 351 def push_styles(self, **changes): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 352 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 353 Push new style information onto the stack. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 354 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 355 :param changes: Any keyword arguments are passed on to :func:`.ansi_style()`. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 356 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 357 This method is a helper for :func:`handle_starttag()` | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 358 that does the following: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 359 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 360 1. Make a copy of the current styles (from the top of the stack), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 361 2. Apply the given `changes` to the copy of the current styles, | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 362 3. Add the new styles to the stack, | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 363 4. Emit the appropriate ANSI escape sequence to the output stream. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 364 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 365 prototype = self.current_style | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 366 if prototype: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 367 new_style = dict(prototype) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 368 new_style.update(changes) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 369 else: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 370 new_style = changes | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 371 self.stack.append(new_style) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 372 self.emit_style(new_style) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 373 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 374 def render_url(self, url): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 375 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 376 Prepare a URL for rendering on the terminal. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 377 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 378 :param url: The URL to simplify (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 379 :returns: The simplified URL (a string). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 380 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 381 This method pre-processes a URL before rendering on the terminal. The | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 382 following modifications are made: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 383 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 384 - The ``mailto:`` prefix is stripped. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 385 - Spaces are converted to ``%20``. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 386 - A trailing parenthesis is converted to ``%29``. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 387 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 388 url = re.sub('^mailto:', '', url) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 389 url = re.sub(' ', '%20', url) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 390 url = re.sub(r'\)$', '%29', url) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 391 return url | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 392 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 393 def reset(self): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 394 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 395 Reset the state of the HTML parser and ANSI converter. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 396 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 397 When `output` is a :class:`~python3:io.StringIO` object a new | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 398 instance will be created (and the old one garbage collected). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 399 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 400 # Reset the state of the superclass. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 401 HTMLParser.reset(self) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 402 # Reset our instance variables. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 403 self.link_text = None | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 404 self.link_url = None | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 405 self.preformatted_text_level = 0 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 406 if self.output is None or isinstance(self.output, StringIO): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 407 # If the caller specified something like output=sys.stdout then it | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 408 # doesn't make much sense to negate that choice here in reset(). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 409 self.output = StringIO() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 410 self.stack = [] | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 411 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 412 def urls_match(self, a, b): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 413 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 414 Compare two URLs for equality using :func:`normalize_url()`. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 415 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 416 :param a: A string containing a URL. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 417 :param b: A string containing a URL. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 418 :returns: :data:`True` if the URLs are the same, :data:`False` otherwise. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 419 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 420 This method is used by :func:`handle_endtag()` to omit the URL of a | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 421 hyperlink (``<a href="...">``) when the link text is that same URL. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 422 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 423 return self.normalize_url(a) == self.normalize_url(b) | 
