comparison env/lib/python3.9/site-packages/bs4/tests/test_lxml.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Tests to ensure that the lxml tree builder generates good trees."""
2
3 import re
4 import warnings
5
6 try:
7 import lxml.etree
8 LXML_PRESENT = True
9 LXML_VERSION = lxml.etree.LXML_VERSION
10 except ImportError as e:
11 LXML_PRESENT = False
12 LXML_VERSION = (0,)
13
14 if LXML_PRESENT:
15 from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
16
17 from bs4 import (
18 BeautifulSoup,
19 BeautifulStoneSoup,
20 )
21 from bs4.element import Comment, Doctype, SoupStrainer
22 from bs4.testing import skipIf
23 from bs4.tests import test_htmlparser
24 from bs4.testing import (
25 HTMLTreeBuilderSmokeTest,
26 XMLTreeBuilderSmokeTest,
27 SoupTest,
28 skipIf,
29 )
30
31 @skipIf(
32 not LXML_PRESENT,
33 "lxml seems not to be present, not testing its tree builder.")
34 class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
35 """See ``HTMLTreeBuilderSmokeTest``."""
36
37 @property
38 def default_builder(self):
39 return LXMLTreeBuilder
40
41 def test_out_of_range_entity(self):
42 self.assertSoupEquals(
43 "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
44 self.assertSoupEquals(
45 "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
46 self.assertSoupEquals(
47 "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
48
49 def test_entities_in_foreign_document_encoding(self):
50 # We can't implement this case correctly because by the time we
51 # hear about markup like "&#147;", it's been (incorrectly) converted into
52 # a string like u'\x93'
53 pass
54
55 # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
56 # test if an old version of lxml is installed.
57
58 @skipIf(
59 not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
60 "Skipping doctype test for old version of lxml to avoid segfault.")
61 def test_empty_doctype(self):
62 soup = self.soup("<!DOCTYPE>")
63 doctype = soup.contents[0]
64 self.assertEqual("", doctype.strip())
65
66 def test_beautifulstonesoup_is_xml_parser(self):
67 # Make sure that the deprecated BSS class uses an xml builder
68 # if one is installed.
69 with warnings.catch_warnings(record=True) as w:
70 soup = BeautifulStoneSoup("<b />")
71 self.assertEqual("<b/>", str(soup.b))
72 self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
73
74 def test_tracking_line_numbers(self):
75 # The lxml TreeBuilder cannot keep track of line numbers from
76 # the original markup. Even if you ask for line numbers, we
77 # don't have 'em.
78 #
79 # This means that if you have a tag like <sourceline> or
80 # <sourcepos>, attribute access will find it rather than
81 # giving you a numeric answer.
82 soup = self.soup(
83 "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>",
84 store_line_numbers=True
85 )
86 self.assertEqual("sourceline", soup.p.sourceline.name)
87 self.assertEqual("sourcepos", soup.p.sourcepos.name)
88
89 @skipIf(
90 not LXML_PRESENT,
91 "lxml seems not to be present, not testing its XML tree builder.")
92 class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
93 """See ``HTMLTreeBuilderSmokeTest``."""
94
95 @property
96 def default_builder(self):
97 return LXMLTreeBuilderForXML
98
99 def test_namespace_indexing(self):
100 # We should not track un-prefixed namespaces as we can only hold one
101 # and it will be recognized as the default namespace by soupsieve,
102 # which may be confusing in some situations. When no namespace is provided
103 # for a selector, the default namespace (if defined) is assumed.
104
105 soup = self.soup(
106 '<?xml version="1.1"?>\n'
107 '<root>'
108 '<tag xmlns="http://unprefixed-namespace.com">content</tag>'
109 '<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
110 '</root>'
111 )
112 self.assertEqual(
113 soup._namespaces,
114 {'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
115 )