Mercurial > repos > guerler > springsuite
annotate planemo/lib/python3.7/site-packages/bleach/html5lib_shim.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler | 
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 | 
| parents | d30785e31577 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
1 # flake8: noqa | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
2 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
3 Shim module between Bleach and html5lib. This makes it easier to upgrade the | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
4 html5lib library without having to change a lot of code. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
5 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
6 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
7 from __future__ import unicode_literals | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
8 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
9 import re | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
10 import string | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
11 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
12 import six | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
13 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
14 from bleach._vendor.html5lib import ( | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
15 HTMLParser, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
16 getTreeWalker, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
17 ) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
18 from bleach._vendor.html5lib import constants | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
19 from bleach._vendor.html5lib.constants import ( | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
20 namespaces, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
21 prefixes, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
22 ) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
23 from bleach._vendor.html5lib.constants import _ReparseException as ReparseException | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
24 from bleach._vendor.html5lib.filters.base import Filter | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
25 from bleach._vendor.html5lib.filters.sanitizer import allowed_protocols | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
26 from bleach._vendor.html5lib.filters.sanitizer import Filter as SanitizerFilter | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
27 from bleach._vendor.html5lib._inputstream import HTMLInputStream | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
28 from bleach._vendor.html5lib.serializer import HTMLSerializer | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
29 from bleach._vendor.html5lib._tokenizer import HTMLTokenizer | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
30 from bleach._vendor.html5lib._trie import Trie | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
31 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
32 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
33 #: Map of entity name to expanded entity | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
34 ENTITIES = constants.entities | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
35 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
36 #: Trie of html entity string -> character representation | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
37 ENTITIES_TRIE = Trie(ENTITIES) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
38 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
39 #: Token type constants--these never change | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
40 TAG_TOKEN_TYPES = { | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
41 constants.tokenTypes['StartTag'], | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
42 constants.tokenTypes['EndTag'], | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
43 constants.tokenTypes['EmptyTag'] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
44 } | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
45 CHARACTERS_TYPE = constants.tokenTypes['Characters'] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
46 PARSEERROR_TYPE = constants.tokenTypes['ParseError'] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
47 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
48 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
49 #: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
50 #: https://html.spec.whatwg.org/multipage/indices.html#elements-3 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
51 HTML_TAGS = [ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
52 'a', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
53 'abbr', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
54 'address', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
55 'area', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
56 'article', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
57 'aside', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
58 'audio', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
59 'b', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
60 'base', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
61 'bdi', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
62 'bdo', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
63 'blockquote', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
64 'body', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
65 'br', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
66 'button', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
67 'canvas', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
68 'caption', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
69 'cite', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
70 'code', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
71 'col', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
72 'colgroup', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
73 'data', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
74 'datalist', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
75 'dd', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
76 'del', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
77 'details', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
78 'dfn', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
79 'dialog', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
80 'div', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
81 'dl', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
82 'dt', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
83 'em', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
84 'embed', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
85 'fieldset', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
86 'figcaption', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
87 'figure', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
88 'footer', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
89 'form', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
90 'h1', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
91 'h2', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
92 'h3', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
93 'h4', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
94 'h5', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
95 'h6', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
96 'head', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
97 'header', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
98 'hgroup', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
99 'hr', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
100 'html', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
101 'i', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
102 'iframe', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
103 'img', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
104 'input', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
105 'ins', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
106 'kbd', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
107 'keygen', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
108 'label', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
109 'legend', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
110 'li', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
111 'link', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
112 'map', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
113 'mark', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
114 'menu', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
115 'meta', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
116 'meter', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
117 'nav', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
118 'noscript', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
119 'object', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
120 'ol', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
121 'optgroup', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
122 'option', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
123 'output', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
124 'p', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
125 'param', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
126 'picture', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
127 'pre', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
128 'progress', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
129 'q', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
130 'rp', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
131 'rt', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
132 'ruby', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
133 's', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
134 'samp', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
135 'script', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
136 'section', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
137 'select', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
138 'slot', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
139 'small', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
140 'source', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
141 'span', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
142 'strong', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
143 'style', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
144 'sub', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
145 'summary', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
146 'sup', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
147 'table', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
148 'tbody', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
149 'td', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
150 'template', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
151 'textarea', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
152 'tfoot', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
153 'th', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
154 'thead', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
155 'time', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
156 'title', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
157 'tr', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
158 'track', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
159 'u', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
160 'ul', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
161 'var', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
162 'video', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
163 'wbr', | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
164 ] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
165 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
166 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
167 class InputStreamWithMemory(object): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
168 """Wraps an HTMLInputStream to remember characters since last < | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
169 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
170 This wraps existing HTMLInputStream classes to keep track of the stream | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
171 since the last < which marked an open tag state. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
172 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
173 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
174 def __init__(self, inner_stream): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
175 self._inner_stream = inner_stream | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
176 self.reset = self._inner_stream.reset | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
177 self.position = self._inner_stream.position | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
178 self._buffer = [] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
179 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
180 @property | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
181 def errors(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
182 return self._inner_stream.errors | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
183 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
184 @property | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
185 def charEncoding(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
186 return self._inner_stream.charEncoding | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
187 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
188 @property | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
189 def changeEncoding(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
190 return self._inner_stream.changeEncoding | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
191 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
192 def char(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
193 c = self._inner_stream.char() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
194 # char() can return None if EOF, so ignore that | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
195 if c: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
196 self._buffer.append(c) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
197 return c | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
198 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
199 def charsUntil(self, characters, opposite=False): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
200 chars = self._inner_stream.charsUntil(characters, opposite=opposite) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
201 self._buffer.extend(list(chars)) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
202 return chars | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
203 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
204 def unget(self, char): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
205 if self._buffer: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
206 self._buffer.pop(-1) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
207 return self._inner_stream.unget(char) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
208 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
209 def get_tag(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
210 """Returns the stream history since last '<' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
211 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
212 Since the buffer starts at the last '<' as as seen by tagOpenState(), | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
213 we know that everything from that point to when this method is called | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
214 is the "tag" that is being tokenized. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
215 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
216 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
217 return six.text_type('').join(self._buffer) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
218 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
219 def start_tag(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
220 """Resets stream history to just '<' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
221 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
222 This gets called by tagOpenState() which marks a '<' that denotes an | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
223 open tag. Any time we see that, we reset the buffer. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
224 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
225 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
226 self._buffer = ['<'] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
227 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
228 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
229 class BleachHTMLTokenizer(HTMLTokenizer): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
230 """Tokenizer that doesn't consume character entities""" | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
231 def __init__(self, consume_entities=False, **kwargs): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
232 super(BleachHTMLTokenizer, self).__init__(**kwargs) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
233 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
234 self.consume_entities = consume_entities | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
235 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
236 # Wrap the stream with one that remembers the history | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
237 self.stream = InputStreamWithMemory(self.stream) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
238 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
239 def __iter__(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
240 last_error_token = None | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
241 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
242 for token in super(BleachHTMLTokenizer, self).__iter__(): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
243 if last_error_token is not None: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
244 if ((last_error_token['data'] == 'invalid-character-in-attribute-name' and | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
245 token['type'] in TAG_TOKEN_TYPES and | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
246 token.get('data'))): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
247 # Remove attribute names that have ', " or < in them | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
248 # because those characters are invalid for attribute names. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
249 token['data'] = [ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
250 item for item in token['data'] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
251 if ('"' not in item[0] and | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
252 "'" not in item[0] and | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
253 '<' not in item[0]) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
254 ] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
255 last_error_token = None | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
256 yield token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
257 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
258 elif ((last_error_token['data'] == 'expected-closing-tag-but-got-char' and | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
259 self.parser.tags is not None and | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
260 token['data'].lower().strip() not in self.parser.tags)): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
261 # We've got either a malformed tag or a pseudo-tag or | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
262 # something that html5lib wants to turn into a malformed | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
263 # comment which Bleach clean() will drop so we interfere | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
264 # with the token stream to handle it more correctly. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
265 # | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
266 # If this is an allowed tag, it's malformed and we just let | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
267 # the html5lib parser deal with it--we don't enter into this | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
268 # block. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
269 # | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
270 # If this is not an allowed tag, then we convert it to | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
271 # characters and it'll get escaped in the sanitizer. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
272 token['data'] = self.stream.get_tag() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
273 token['type'] = CHARACTERS_TYPE | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
274 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
275 last_error_token = None | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
276 yield token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
277 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
278 elif token['type'] == PARSEERROR_TYPE: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
279 # If the token is a parse error, then let the last_error_token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
280 # go, and make token the new last_error_token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
281 yield last_error_token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
282 last_error_token = token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
283 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
284 else: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
285 yield last_error_token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
286 yield token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
287 last_error_token = None | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
288 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
289 continue | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
290 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
291 # If the token is a ParseError, we hold on to it so we can get the | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
292 # next token and potentially fix it. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
293 if token['type'] == PARSEERROR_TYPE: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
294 last_error_token = token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
295 continue | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
296 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
297 yield token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
298 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
299 if last_error_token: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
300 yield last_error_token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
301 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
302 def consumeEntity(self, allowedChar=None, fromAttribute=False): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
303 # If this tokenizer is set to consume entities, then we can let the | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
304 # superclass do its thing. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
305 if self.consume_entities: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
306 return super(BleachHTMLTokenizer, self).consumeEntity(allowedChar, fromAttribute) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
307 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
308 # If this tokenizer is set to not consume entities, then we don't want | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
309 # to consume and convert them, so this overrides the html5lib tokenizer's | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
310 # consumeEntity so that it's now a no-op. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
311 # | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
312 # However, when that gets called, it's consumed an &, so we put that back in | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
313 # the stream. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
314 if fromAttribute: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
315 self.currentToken['data'][-1][1] += '&' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
316 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
317 else: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
318 self.tokenQueue.append({"type": CHARACTERS_TYPE, "data": '&'}) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
319 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
320 def tagOpenState(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
321 # This state marks a < that is either a StartTag, EndTag, EmptyTag, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
322 # or ParseError. In all cases, we want to drop any stream history | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
323 # we've collected so far and we do that by calling start_tag() on | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
324 # the input stream wrapper. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
325 self.stream.start_tag() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
326 return super(BleachHTMLTokenizer, self).tagOpenState() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
327 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
328 def emitCurrentToken(self): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
329 token = self.currentToken | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
330 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
331 if ((self.parser.tags is not None and | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
332 token['type'] in TAG_TOKEN_TYPES and | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
333 token['name'].lower() not in self.parser.tags)): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
334 # If this is a start/end/empty tag for a tag that's not in our | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
335 # allowed list, then it gets stripped or escaped. In both of these | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
336 # cases it gets converted to a Characters token. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
337 if self.parser.strip: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
338 # If we're stripping the token, we just throw in an empty | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
339 # string token. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
340 new_data = '' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
341 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
342 else: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
343 # If we're escaping the token, we want to escape the exact | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
344 # original string. Since tokenizing also normalizes data | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
345 # and this is a tag-like thing, we've lost some information. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
346 # So we go back through the stream to get the original | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
347 # string and use that. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
348 new_data = self.stream.get_tag() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
349 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
350 new_token = { | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
351 'type': CHARACTERS_TYPE, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
352 'data': new_data | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
353 } | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
354 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
355 self.currentToken = new_token | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
356 self.tokenQueue.append(new_token) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
357 self.state = self.dataState | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
358 return | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
359 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
360 super(BleachHTMLTokenizer, self).emitCurrentToken() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
361 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
362 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
363 class BleachHTMLParser(HTMLParser): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
364 """Parser that uses BleachHTMLTokenizer""" | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
365 def __init__(self, tags, strip, consume_entities, **kwargs): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
366 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
367 :arg tags: list of allowed tags--everything else is either stripped or | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
368 escaped; if None, then this doesn't look at tags at all | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
369 :arg strip: whether to strip disallowed tags (True) or escape them (False); | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
370 if tags=None, then this doesn't have any effect | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
371 :arg consume_entities: whether to consume entities (default behavior) or | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
372 leave them as is when tokenizing (BleachHTMLTokenizer-added behavior) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
373 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
374 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
375 self.tags = [tag.lower() for tag in tags] if tags is not None else None | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
376 self.strip = strip | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
377 self.consume_entities = consume_entities | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
378 super(BleachHTMLParser, self).__init__(**kwargs) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
379 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
380 def _parse(self, stream, innerHTML=False, container='div', scripting=True, **kwargs): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
381 # set scripting=True to parse <noscript> as though JS is enabled to | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
382 # match the expected context in browsers | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
383 # | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
384 # https://html.spec.whatwg.org/multipage/scripting.html#the-noscript-element | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
385 # | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
386 # Override HTMLParser so we can swap out the tokenizer for our own. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
387 self.innerHTMLMode = innerHTML | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
388 self.container = container | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
389 self.scripting = scripting | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
390 self.tokenizer = BleachHTMLTokenizer( | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
391 stream=stream, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
392 consume_entities=self.consume_entities, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
393 parser=self, | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
394 **kwargs | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
395 ) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
396 self.reset() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
397 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
398 try: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
399 self.mainLoop() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
400 except ReparseException: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
401 self.reset() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
402 self.mainLoop() | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
403 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
404 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
405 def convert_entity(value): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
406 """Convert an entity (minus the & and ; part) into what it represents | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
407 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
408 This handles numeric, hex, and text entities. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
409 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
410 :arg value: the string (minus the ``&`` and ``;`` part) to convert | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
411 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
412 :returns: unicode character or None if it's an ambiguous ampersand that | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
413 doesn't match a character entity | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
414 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
415 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
416 if value[0] == '#': | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
417 if value[1] in ('x', 'X'): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
418 return six.unichr(int(value[2:], 16)) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
419 return six.unichr(int(value[1:], 10)) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
420 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
421 return ENTITIES.get(value, None) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
422 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
423 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
424 def convert_entities(text): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
425 """Converts all found entities in the text | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
426 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
427 :arg text: the text to convert entities in | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
428 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
429 :returns: unicode text with converted entities | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
430 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
431 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
432 if '&' not in text: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
433 return text | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
434 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
435 new_text = [] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
436 for part in next_possible_entity(text): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
437 if not part: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
438 continue | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
439 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
440 if part.startswith('&'): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
441 entity = match_entity(part) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
442 if entity is not None: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
443 converted = convert_entity(entity) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
444 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
445 # If it's not an ambiguous ampersand, then replace with the | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
446 # unicode character. Otherwise, we leave the entity in. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
447 if converted is not None: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
448 new_text.append(converted) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
449 remainder = part[len(entity) + 2:] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
450 if part: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
451 new_text.append(remainder) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
452 continue | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
453 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
454 new_text.append(part) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
455 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
456 return ''.join(new_text) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
457 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
458 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
459 def match_entity(stream): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
460 """Returns first entity in stream or None if no entity exists | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
461 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
462 Note: For Bleach purposes, entities must start with a "&" and end with | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
463 a ";". This ignoresambiguous character entities that have no ";" at the | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
464 end. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
465 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
466 :arg stream: the character stream | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
467 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
468 :returns: ``None`` or the entity string without "&" or ";" | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
469 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
470 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
471 # Nix the & at the beginning | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
472 if stream[0] != '&': | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
473 raise ValueError('Stream should begin with "&"') | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
474 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
475 stream = stream[1:] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
476 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
477 stream = list(stream) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
478 possible_entity = '' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
479 end_characters = '<&=;' + string.whitespace | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
480 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
481 # Handle number entities | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
482 if stream and stream[0] == '#': | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
483 possible_entity = '#' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
484 stream.pop(0) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
485 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
486 if stream and stream[0] in ('x', 'X'): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
487 allowed = '0123456789abcdefABCDEF' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
488 possible_entity += stream.pop(0) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
489 else: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
490 allowed = '0123456789' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
491 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
492 # FIXME(willkg): Do we want to make sure these are valid number | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
493 # entities? This doesn't do that currently. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
494 while stream and stream[0] not in end_characters: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
495 c = stream.pop(0) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
496 if c not in allowed: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
497 break | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
498 possible_entity += c | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
499 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
500 if possible_entity and stream and stream[0] == ';': | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
501 return possible_entity | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
502 return None | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
503 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
504 # Handle character entities | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
505 while stream and stream[0] not in end_characters: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
506 c = stream.pop(0) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
507 if not ENTITIES_TRIE.has_keys_with_prefix(possible_entity): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
508 break | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
509 possible_entity += c | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
510 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
511 if possible_entity and stream and stream[0] == ';': | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
512 return possible_entity | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
513 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
514 return None | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
515 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
516 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
517 AMP_SPLIT_RE = re.compile('(&)') | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
518 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
519 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
520 def next_possible_entity(text): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
521 """Takes a text and generates a list of possible entities | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
522 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
523 :arg text: the text to look at | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
524 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
525 :returns: generator where each part (except the first) starts with an | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
526 "&" | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
527 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
528 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
529 for i, part in enumerate(AMP_SPLIT_RE.split(text)): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
530 if i == 0: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
531 yield part | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
532 elif i % 2 == 0: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
533 yield '&' + part | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
534 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
535 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
536 class BleachHTMLSerializer(HTMLSerializer): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
537 """HTMLSerializer that undoes & -> & in attributes and sets | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
538 escape_rcdata to True | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
539 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
540 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
541 # per the HTMLSerializer.__init__ docstring: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
542 # | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
543 # Whether to escape characters that need to be | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
544 # escaped within normal elements within rcdata elements such as | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
545 # style. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
546 # | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
547 escape_rcdata = True | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
548 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
549 def escape_base_amp(self, stoken): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
550 """Escapes just bare & in HTML attribute values""" | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
551 # First, undo escaping of &. We need to do this because html5lib's | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
552 # HTMLSerializer expected the tokenizer to consume all the character | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
553 # entities and convert them to their respective characters, but the | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
554 # BleachHTMLTokenizer doesn't do that. For example, this fixes | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
555 # &entity; back to &entity; . | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
556 stoken = stoken.replace('&', '&') | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
557 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
558 # However, we do want all bare & that are not marking character | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
559 # entities to be changed to &, so let's do that carefully here. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
560 for part in next_possible_entity(stoken): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
561 if not part: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
562 continue | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
563 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
564 if part.startswith('&'): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
565 entity = match_entity(part) | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
566 # Only leave entities in that are not ambiguous. If they're | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
567 # ambiguous, then we escape the ampersand. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
568 if entity is not None and convert_entity(entity) is not None: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
569 yield '&' + entity + ';' | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
570 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
571 # Length of the entity plus 2--one for & at the beginning | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
572 # and one for ; at the end | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
573 part = part[len(entity) + 2:] | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
574 if part: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
575 yield part | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
576 continue | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
577 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
578 yield part.replace('&', '&') | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
579 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
580 def serialize(self, treewalker, encoding=None): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
581 """Wrap HTMLSerializer.serialize and conver & to & in attribute values | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
582 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
583 Note that this converts & to & in attribute values where the & isn't | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
584 already part of an unambiguous character entity. | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
585 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
586 """ | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
587 in_tag = False | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
588 after_equals = False | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
589 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
590 for stoken in super(BleachHTMLSerializer, self).serialize(treewalker, encoding): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
591 if in_tag: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
592 if stoken == '>': | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
593 in_tag = False | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
594 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
595 elif after_equals: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
596 if stoken != '"': | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
597 for part in self.escape_base_amp(stoken): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
598 yield part | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
599 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
600 after_equals = False | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
601 continue | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
602 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
603 elif stoken == '=': | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
604 after_equals = True | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
605 | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
606 yield stoken | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
607 else: | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
608 if stoken.startswith('<'): | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
609 in_tag = True | 
| 
 
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
610 yield stoken | 
