Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/bs4/tests/test_lxml.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 """Tests to ensure that the lxml tree builder generates good trees.""" | |
| 2 | |
| 3 import re | |
| 4 import warnings | |
| 5 | |
| 6 try: | |
| 7 import lxml.etree | |
| 8 LXML_PRESENT = True | |
| 9 LXML_VERSION = lxml.etree.LXML_VERSION | |
| 10 except ImportError as e: | |
| 11 LXML_PRESENT = False | |
| 12 LXML_VERSION = (0,) | |
| 13 | |
| 14 if LXML_PRESENT: | |
| 15 from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML | |
| 16 | |
| 17 from bs4 import ( | |
| 18 BeautifulSoup, | |
| 19 BeautifulStoneSoup, | |
| 20 ) | |
| 21 from bs4.element import Comment, Doctype, SoupStrainer | |
| 22 from bs4.testing import skipIf | |
| 23 from bs4.tests import test_htmlparser | |
| 24 from bs4.testing import ( | |
| 25 HTMLTreeBuilderSmokeTest, | |
| 26 XMLTreeBuilderSmokeTest, | |
| 27 SoupTest, | |
| 28 skipIf, | |
| 29 ) | |
| 30 | |
| 31 @skipIf( | |
| 32 not LXML_PRESENT, | |
| 33 "lxml seems not to be present, not testing its tree builder.") | |
| 34 class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): | |
| 35 """See ``HTMLTreeBuilderSmokeTest``.""" | |
| 36 | |
| 37 @property | |
| 38 def default_builder(self): | |
| 39 return LXMLTreeBuilder | |
| 40 | |
| 41 def test_out_of_range_entity(self): | |
| 42 self.assertSoupEquals( | |
| 43 "<p>foo�bar</p>", "<p>foobar</p>") | |
| 44 self.assertSoupEquals( | |
| 45 "<p>foo�bar</p>", "<p>foobar</p>") | |
| 46 self.assertSoupEquals( | |
| 47 "<p>foo�bar</p>", "<p>foobar</p>") | |
| 48 | |
| 49 def test_entities_in_foreign_document_encoding(self): | |
| 50 # We can't implement this case correctly because by the time we | |
| 51 # hear about markup like "“", it's been (incorrectly) converted into | |
| 52 # a string like u'\x93' | |
| 53 pass | |
| 54 | |
| 55 # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this | |
| 56 # test if an old version of lxml is installed. | |
| 57 | |
| 58 @skipIf( | |
| 59 not LXML_PRESENT or LXML_VERSION < (2,3,5,0), | |
| 60 "Skipping doctype test for old version of lxml to avoid segfault.") | |
| 61 def test_empty_doctype(self): | |
| 62 soup = self.soup("<!DOCTYPE>") | |
| 63 doctype = soup.contents[0] | |
| 64 self.assertEqual("", doctype.strip()) | |
| 65 | |
| 66 def test_beautifulstonesoup_is_xml_parser(self): | |
| 67 # Make sure that the deprecated BSS class uses an xml builder | |
| 68 # if one is installed. | |
| 69 with warnings.catch_warnings(record=True) as w: | |
| 70 soup = BeautifulStoneSoup("<b />") | |
| 71 self.assertEqual("<b/>", str(soup.b)) | |
| 72 self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) | |
| 73 | |
| 74 def test_tracking_line_numbers(self): | |
| 75 # The lxml TreeBuilder cannot keep track of line numbers from | |
| 76 # the original markup. Even if you ask for line numbers, we | |
| 77 # don't have 'em. | |
| 78 # | |
| 79 # This means that if you have a tag like <sourceline> or | |
| 80 # <sourcepos>, attribute access will find it rather than | |
| 81 # giving you a numeric answer. | |
| 82 soup = self.soup( | |
| 83 "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>", | |
| 84 store_line_numbers=True | |
| 85 ) | |
| 86 self.assertEqual("sourceline", soup.p.sourceline.name) | |
| 87 self.assertEqual("sourcepos", soup.p.sourcepos.name) | |
| 88 | |
| 89 @skipIf( | |
| 90 not LXML_PRESENT, | |
| 91 "lxml seems not to be present, not testing its XML tree builder.") | |
| 92 class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): | |
| 93 """See ``HTMLTreeBuilderSmokeTest``.""" | |
| 94 | |
| 95 @property | |
| 96 def default_builder(self): | |
| 97 return LXMLTreeBuilderForXML | |
| 98 | |
| 99 def test_namespace_indexing(self): | |
| 100 # We should not track un-prefixed namespaces as we can only hold one | |
| 101 # and it will be recognized as the default namespace by soupsieve, | |
| 102 # which may be confusing in some situations. When no namespace is provided | |
| 103 # for a selector, the default namespace (if defined) is assumed. | |
| 104 | |
| 105 soup = self.soup( | |
| 106 '<?xml version="1.1"?>\n' | |
| 107 '<root>' | |
| 108 '<tag xmlns="http://unprefixed-namespace.com">content</tag>' | |
| 109 '<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>' | |
| 110 '</root>' | |
| 111 ) | |
| 112 self.assertEqual( | |
| 113 soup._namespaces, | |
| 114 {'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'} | |
| 115 ) |
