Mercurial > repos > shellac > sam_consensus_v3
annotate env/lib/python3.9/sitepackages/bleach/_vendor/html5lib/treewalkers/etree_lxml.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author  shellac 

date  Mon, 22 Mar 2021 18:12:50 +0000 
parents  
children 
rev  line source 

0
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

1 from __future__ import absolute_import, division, unicode_literals 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

2 from six import text_type 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

3 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

4 from collections import OrderedDict 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

5 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

6 from lxml import etree 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

7 from ..treebuilders.etree import tag_regexp 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

8 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

9 from . import base 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

10 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

11 from .. import _ihatexml 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

12 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

13 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

14 def ensure_str(s): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

15 if s is None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

16 return None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

17 elif isinstance(s, text_type): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

18 return s 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

19 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

20 return s.decode("ascii", "strict") 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

21 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

22 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

23 class Root(object): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

24 def __init__(self, et): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

25 self.elementtree = et 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

26 self.children = [] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

27 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

28 try: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

29 if et.docinfo.internalDTD: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

30 self.children.append(Doctype(self, 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

31 ensure_str(et.docinfo.root_name), 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

32 ensure_str(et.docinfo.public_id), 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

33 ensure_str(et.docinfo.system_url))) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

34 except AttributeError: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

35 pass 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

36 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

37 try: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

38 node = et.getroot() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

39 except AttributeError: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

40 node = et 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

41 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

42 while node.getprevious() is not None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

43 node = node.getprevious() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

44 while node is not None: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

45 self.children.append(node) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

46 node = node.getnext() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

47 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

48 self.text = None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

49 self.tail = None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

50 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

51 def __getitem__(self, key): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

52 return self.children[key] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

53 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

54 def getnext(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

55 return None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

56 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

57 def __len__(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

58 return 1 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

59 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

60 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

61 class Doctype(object): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

62 def __init__(self, root_node, name, public_id, system_id): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

63 self.root_node = root_node 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

64 self.name = name 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

65 self.public_id = public_id 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

66 self.system_id = system_id 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

67 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

68 self.text = None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

69 self.tail = None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

70 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

71 def getnext(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

72 return self.root_node.children[1] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

73 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

74 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

75 class FragmentRoot(Root): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

76 def __init__(self, children): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

77 self.children = [FragmentWrapper(self, child) for child in children] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

78 self.text = self.tail = None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

79 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

80 def getnext(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

81 return None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

82 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

83 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

84 class FragmentWrapper(object): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

85 def __init__(self, fragment_root, obj): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

86 self.root_node = fragment_root 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

87 self.obj = obj 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

88 if hasattr(self.obj, 'text'): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

89 self.text = ensure_str(self.obj.text) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

90 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

91 self.text = None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

92 if hasattr(self.obj, 'tail'): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

93 self.tail = ensure_str(self.obj.tail) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

94 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

95 self.tail = None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

96 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

97 def __getattr__(self, name): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

98 return getattr(self.obj, name) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

99 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

100 def getnext(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

101 siblings = self.root_node.children 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

102 idx = siblings.index(self) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

103 if idx < len(siblings)  1: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

104 return siblings[idx + 1] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

105 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

106 return None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

107 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

108 def __getitem__(self, key): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

109 return self.obj[key] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

110 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

111 def __bool__(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

112 return bool(self.obj) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

113 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

114 def getparent(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

115 return None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

116 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

117 def __str__(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

118 return str(self.obj) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

119 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

120 def __unicode__(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

121 return str(self.obj) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

122 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

123 def __len__(self): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

124 return len(self.obj) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

125 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

126 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

127 class TreeWalker(base.NonRecursiveTreeWalker): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

128 def __init__(self, tree): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

129 # pylint:disable=redefinedvariabletype 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

130 if isinstance(tree, list): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

131 self.fragmentChildren = set(tree) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

132 tree = FragmentRoot(tree) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

133 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

134 self.fragmentChildren = set() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

135 tree = Root(tree) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

136 base.NonRecursiveTreeWalker.__init__(self, tree) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

137 self.filter = _ihatexml.InfosetFilter() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

138 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

139 def getNodeDetails(self, node): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

140 if isinstance(node, tuple): # Text node 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

141 node, key = node 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

142 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

143 return base.TEXT, ensure_str(getattr(node, key)) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

144 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

145 elif isinstance(node, Root): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

146 return (base.DOCUMENT,) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

147 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

148 elif isinstance(node, Doctype): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

149 return base.DOCTYPE, node.name, node.public_id, node.system_id 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

150 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

151 elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

152 return base.TEXT, ensure_str(node.obj) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

153 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

154 elif node.tag == etree.Comment: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

155 return base.COMMENT, ensure_str(node.text) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

156 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

157 elif node.tag == etree.Entity: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

158 return base.ENTITY, ensure_str(node.text)[1:1] # strip &; 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

159 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

160 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

161 # This is assumed to be an ordinary element 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

162 match = tag_regexp.match(ensure_str(node.tag)) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

163 if match: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

164 namespace, tag = match.groups() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

165 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

166 namespace = None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

167 tag = ensure_str(node.tag) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

168 attrs = OrderedDict() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

169 for name, value in list(node.attrib.items()): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

170 name = ensure_str(name) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

171 value = ensure_str(value) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

172 match = tag_regexp.match(name) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

173 if match: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

174 attrs[(match.group(1), match.group(2))] = value 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

175 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

176 attrs[(None, name)] = value 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

177 return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

178 attrs, len(node) > 0 or node.text) 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

179 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

180 def getFirstChild(self, node): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

181 assert not isinstance(node, tuple), "Text nodes have no children" 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

182 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

183 assert len(node) or node.text, "Node has no children" 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

184 if node.text: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

185 return (node, "text") 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

186 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

187 return node[0] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

188 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

189 def getNextSibling(self, node): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

190 if isinstance(node, tuple): # Text node 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

191 node, key = node 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

192 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

193 if key == "text": 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

194 # XXX: we cannot use a "bool(node) and node[0] or None" construct here 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

195 # because node[0] might evaluate to False if it has no child element 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

196 if len(node): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

197 return node[0] 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

198 else: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

199 return None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

200 else: # tail 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

201 return node.getnext() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

202 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

203 return (node, "tail") if node.tail else node.getnext() 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

204 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

205 def getParentNode(self, node): 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

206 if isinstance(node, tuple): # Text node 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

207 node, key = node 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

208 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

209 if key == "text": 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

210 return node 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

211 # else: fallback to "normal" processing 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

212 elif node in self.fragmentChildren: 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

213 return None 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

214 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
shellac
parents:
diff
changeset

215 return node.getparent() 