Mercurial > repos > shellac > sam_consensus_v3
diff env/lib/python3.9/site-packages/bleach/_vendor/html5lib/treebuilders/__init__.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.9/site-packages/bleach/_vendor/html5lib/treebuilders/__init__.py Mon Mar 22 18:12:50 2021 +0000 @@ -0,0 +1,88 @@ +"""A collection of modules for building different kinds of trees from HTML +documents. + +To create a treebuilder for a new type of tree, you need to do +implement several things: + +1. A set of classes for various types of elements: Document, Doctype, Comment, + Element. These must implement the interface of ``base.treebuilders.Node`` + (although comment nodes have a different signature for their constructor, + see ``treebuilders.etree.Comment``) Textual content may also be implemented + as another node type, or not, as your tree implementation requires. + +2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits + from ``treebuilders.base.TreeBuilder``. This has 4 required attributes: + + * ``documentClass`` - the class to use for the bottommost node of a document + * ``elementClass`` - the class to use for HTML Elements + * ``commentClass`` - the class to use for comments + * ``doctypeClass`` - the class to use for doctypes + + It also has one required method: + + * ``getDocument`` - Returns the root node of the complete document tree + +3. If you wish to run the unit tests, you must also create a ``testSerializer`` + method on your treebuilder which accepts a node and returns a string + containing Node and its children serialized according to the format used in + the unittests + +""" + +from __future__ import absolute_import, division, unicode_literals + +from .._utils import default_etree + +treeBuilderCache = {} + + +def getTreeBuilder(treeType, implementation=None, **kwargs): + """Get a TreeBuilder class for various types of trees with built-in support + + :arg treeType: the name of the tree type required (case-insensitive). Supported + values are: + + * "dom" - A generic builder for DOM implementations, defaulting to a + xml.dom.minidom based implementation. + * "etree" - A generic builder for tree implementations exposing an + ElementTree-like interface, defaulting to xml.etree.cElementTree if + available and xml.etree.ElementTree if not. + * "lxml" - A etree-based builder for lxml.etree, handling limitations + of lxml's implementation. + + :arg implementation: (Currently applies to the "etree" and "dom" tree + types). A module implementing the tree type e.g. xml.etree.ElementTree + or xml.etree.cElementTree. + + :arg kwargs: Any additional options to pass to the TreeBuilder when + creating it. + + Example: + + >>> from html5lib.treebuilders import getTreeBuilder + >>> builder = getTreeBuilder('etree') + + """ + + treeType = treeType.lower() + if treeType not in treeBuilderCache: + if treeType == "dom": + from . import dom + # Come up with a sane default (pref. from the stdlib) + if implementation is None: + from xml.dom import minidom + implementation = minidom + # NEVER cache here, caching is done in the dom submodule + return dom.getDomModule(implementation, **kwargs).TreeBuilder + elif treeType == "lxml": + from . import etree_lxml + treeBuilderCache[treeType] = etree_lxml.TreeBuilder + elif treeType == "etree": + from . import etree + if implementation is None: + implementation = default_etree + # NEVER cache here, caching is done in the etree submodule + return etree.getETreeModule(implementation, **kwargs).TreeBuilder + else: + raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) + return treeBuilderCache.get(treeType)