comparison env/lib/python3.9/site-packages/bleach/_vendor/html5lib/treewalkers/etree.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 from __future__ import absolute_import, division, unicode_literals
2
3 from collections import OrderedDict
4 import re
5
6 from six import string_types
7
8 from . import base
9 from .._utils import moduleFactoryFactory
10
11 tag_regexp = re.compile("{([^}]*)}(.*)")
12
13
14 def getETreeBuilder(ElementTreeImplementation):
15 ElementTree = ElementTreeImplementation
16 ElementTreeCommentType = ElementTree.Comment("asd").tag
17
18 class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable
19 """Given the particular ElementTree representation, this implementation,
20 to avoid using recursion, returns "nodes" as tuples with the following
21 content:
22
23 1. The current element
24
25 2. The index of the element relative to its parent
26
27 3. A stack of ancestor elements
28
29 4. A flag "text", "tail" or None to indicate if the current node is a
30 text node; either the text or tail of the current element (1)
31 """
32 def getNodeDetails(self, node):
33 if isinstance(node, tuple): # It might be the root Element
34 elt, _, _, flag = node
35 if flag in ("text", "tail"):
36 return base.TEXT, getattr(elt, flag)
37 else:
38 node = elt
39
40 if not(hasattr(node, "tag")):
41 node = node.getroot()
42
43 if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
44 return (base.DOCUMENT,)
45
46 elif node.tag == "<!DOCTYPE>":
47 return (base.DOCTYPE, node.text,
48 node.get("publicId"), node.get("systemId"))
49
50 elif node.tag == ElementTreeCommentType:
51 return base.COMMENT, node.text
52
53 else:
54 assert isinstance(node.tag, string_types), type(node.tag)
55 # This is assumed to be an ordinary element
56 match = tag_regexp.match(node.tag)
57 if match:
58 namespace, tag = match.groups()
59 else:
60 namespace = None
61 tag = node.tag
62 attrs = OrderedDict()
63 for name, value in list(node.attrib.items()):
64 match = tag_regexp.match(name)
65 if match:
66 attrs[(match.group(1), match.group(2))] = value
67 else:
68 attrs[(None, name)] = value
69 return (base.ELEMENT, namespace, tag,
70 attrs, len(node) or node.text)
71
72 def getFirstChild(self, node):
73 if isinstance(node, tuple):
74 element, key, parents, flag = node
75 else:
76 element, key, parents, flag = node, None, [], None
77
78 if flag in ("text", "tail"):
79 return None
80 else:
81 if element.text:
82 return element, key, parents, "text"
83 elif len(element):
84 parents.append(element)
85 return element[0], 0, parents, None
86 else:
87 return None
88
89 def getNextSibling(self, node):
90 if isinstance(node, tuple):
91 element, key, parents, flag = node
92 else:
93 return None
94
95 if flag == "text":
96 if len(element):
97 parents.append(element)
98 return element[0], 0, parents, None
99 else:
100 return None
101 else:
102 if element.tail and flag != "tail":
103 return element, key, parents, "tail"
104 elif key < len(parents[-1]) - 1:
105 return parents[-1][key + 1], key + 1, parents, None
106 else:
107 return None
108
109 def getParentNode(self, node):
110 if isinstance(node, tuple):
111 element, key, parents, flag = node
112 else:
113 return None
114
115 if flag == "text":
116 if not parents:
117 return element
118 else:
119 return element, key, parents, None
120 else:
121 parent = parents.pop()
122 if not parents:
123 return parent
124 else:
125 assert list(parents[-1]).count(parent) == 1
126 return parent, list(parents[-1]).index(parent), parents, None
127
128 return locals()
129
130
131 getETreeModule = moduleFactoryFactory(getETreeBuilder)