Mercurial > repos > shellac > guppy_basecaller

diff env/lib/python3.7/site-packages/soupsieve/css_match.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author: shellac
date: Mon, 01 Jun 2020 08:59:25 -0400
parents: 79f47841a781
--- a/env/lib/python3.7/site-packages/soupsieve/css_match.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1497 +0,0 @@
-"""CSS matcher."""
-from datetime import datetime
-from . import util
-import re
-from .import css_types as ct
-import unicodedata
-
-# Empty tag pattern (whitespace okay)
-RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
-
-RE_NOT_WS = re.compile('[^ \t\r\n\f]+')
-
-# Relationships
-REL_PARENT = ' '
-REL_CLOSE_PARENT = '>'
-REL_SIBLING = '~'
-REL_CLOSE_SIBLING = '+'
-
-# Relationships for :has() (forward looking)
-REL_HAS_PARENT = ': '
-REL_HAS_CLOSE_PARENT = ':>'
-REL_HAS_SIBLING = ':~'
-REL_HAS_CLOSE_SIBLING = ':+'
-
-NS_XHTML = 'http://www.w3.org/1999/xhtml'
-NS_XML = 'http://www.w3.org/XML/1998/namespace'
-
-DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL
-RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE
-
-DIR_MAP = {
-    'ltr': ct.SEL_DIR_LTR,
-    'rtl': ct.SEL_DIR_RTL,
-    'auto': 0
-}
-
-RE_NUM = re.compile(r"^(?P<value>-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$")
-RE_TIME = re.compile(r'^(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$')
-RE_MONTH = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})$')
-RE_WEEK = re.compile(r'^(?P<year>[0-9]{4,})-W(?P<week>[0-9]{2})$')
-RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})$')
-RE_DATETIME = re.compile(
-    r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
-)
-RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')
-
-MONTHS_30 = (4, 6, 9, 11)  # April, June, September, and November
-FEB = 2
-SHORT_MONTH = 30
-LONG_MONTH = 31
-FEB_MONTH = 28
-FEB_LEAP_MONTH = 29
-DAYS_IN_WEEK = 7
-
-
-class _FakeParent(object):
-    """
-    Fake parent class.
-
-    When we have a fragment with no `BeautifulSoup` document object,
-    we can't evaluate `nth` selectors properly.  Create a temporary
-    fake parent so we can traverse the root element as a child.
-    """
-
-    def __init__(self, element):
-        """Initialize."""
-
-        self.contents = [element]
-
-    def __len__(self):
-        """Length."""
-
-        return len(self.contents)
-
-
-class _DocumentNav(object):
-    """Navigate a Beautiful Soup document."""
-
-    @classmethod
-    def assert_valid_input(cls, tag):
-        """Check if valid input tag or document."""
-
-        # Fail on unexpected types.
-        if not cls.is_tag(tag):
-            raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag)))
-
-    @staticmethod
-    def is_doc(obj):
-        """Is `BeautifulSoup` object."""
-
-        import bs4
-        return isinstance(obj, bs4.BeautifulSoup)
-
-    @staticmethod
-    def is_tag(obj):
-        """Is tag."""
-
-        import bs4
-        return isinstance(obj, bs4.Tag)
-
-    @staticmethod
-    def is_declaration(obj):  # pragma: no cover
-        """Is declaration."""
-
-        import bs4
-        return isinstance(obj, bs4.Declaration)
-
-    @staticmethod
-    def is_cdata(obj):
-        """Is CDATA."""
-
-        import bs4
-        return isinstance(obj, bs4.CData)
-
-    @staticmethod
-    def is_processing_instruction(obj):  # pragma: no cover
-        """Is processing instruction."""
-
-        import bs4
-        return isinstance(obj, bs4.ProcessingInstruction)
-
-    @staticmethod
-    def is_navigable_string(obj):
-        """Is navigable string."""
-
-        import bs4
-        return isinstance(obj, bs4.NavigableString)
-
-    @staticmethod
-    def is_special_string(obj):
-        """Is special string."""
-
-        import bs4
-        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
-
-    @classmethod
-    def is_content_string(cls, obj):
-        """Check if node is content string."""
-
-        return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
-
-    @staticmethod
-    def create_fake_parent(el):
-        """Create fake parent for a given element."""
-
-        return _FakeParent(el)
-
-    @staticmethod
-    def is_xml_tree(el):
-        """Check if element (or document) is from a XML tree."""
-
-        return el._is_xml
-
-    def is_iframe(self, el):
-        """Check if element is an `iframe`."""
-
-        return ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and self.is_html_tag(el)
-
-    def is_root(self, el):
-        """
-        Return whether element is a root element.
-
-        We check that the element is the root of the tree (which we have already pre-calculated),
-        and we check if it is the root element under an `iframe`.
-        """
-
-        root = self.root and self.root is el
-        if not root:
-            parent = self.get_parent(el)
-            root = parent is not None and self.is_html and self.is_iframe(parent)
-        return root
-
-    def get_contents(self, el, no_iframe=False):
-        """Get contents or contents in reverse."""
-        if not no_iframe or not self.is_iframe(el):
-            for content in el.contents:
-                yield content
-
-    def get_children(self, el, start=None, reverse=False, tags=True, no_iframe=False):
-        """Get children."""
-
-        if not no_iframe or not self.is_iframe(el):
-            last = len(el.contents) - 1
-            if start is None:
-                index = last if reverse else 0
-            else:
-                index = start
-            end = -1 if reverse else last + 1
-            incr = -1 if reverse else 1
-
-            if 0 <= index <= last:
-                while index != end:
-                    node = el.contents[index]
-                    index += incr
-                    if not tags or self.is_tag(node):
-                        yield node
-
-    def get_descendants(self, el, tags=True, no_iframe=False):
-        """Get descendants."""
-
-        if not no_iframe or not self.is_iframe(el):
-            next_good = None
-            for child in el.descendants:
-
-                if next_good is not None:
-                    if child is not next_good:
-                        continue
-                    next_good = None
-
-                is_tag = self.is_tag(child)
-
-                if no_iframe and is_tag and self.is_iframe(child):
-                    if child.next_sibling is not None:
-                        next_good = child.next_sibling
-                    else:
-                        last_child = child
-                        while self.is_tag(last_child) and last_child.contents:
-                            last_child = last_child.contents[-1]
-                        next_good = last_child.next_element
-                    yield child
-                    if next_good is None:
-                        break
-                    # Coverage isn't seeing this even though it's executed
-                    continue  # pragma: no cover
-
-                if not tags or is_tag:
-                    yield child
-
-    def get_parent(self, el, no_iframe=False):
-        """Get parent."""
-
-        parent = el.parent
-        if no_iframe and parent is not None and self.is_iframe(parent):
-            parent = None
-        return parent
-
-    @staticmethod
-    def get_tag_name(el):
-        """Get tag."""
-
-        return el.name
-
-    @staticmethod
-    def get_prefix_name(el):
-        """Get prefix."""
-
-        return el.prefix
-
-    @staticmethod
-    def get_uri(el):
-        """Get namespace `URI`."""
-
-        return el.namespace
-
-    @classmethod
-    def get_next(cls, el, tags=True):
-        """Get next sibling tag."""
-
-        sibling = el.next_sibling
-        while tags and not cls.is_tag(sibling) and sibling is not None:
-            sibling = sibling.next_sibling
-        return sibling
-
-    @classmethod
-    def get_previous(cls, el, tags=True):
-        """Get previous sibling tag."""
-
-        sibling = el.previous_sibling
-        while tags and not cls.is_tag(sibling) and sibling is not None:
-            sibling = sibling.previous_sibling
-        return sibling
-
-    @staticmethod
-    def has_html_ns(el):
-        """
-        Check if element has an HTML namespace.
-
-        This is a bit different than whether a element is treated as having an HTML namespace,
-        like we do in the case of `is_html_tag`.
-        """
-
-        ns = getattr(el, 'namespace') if el else None
-        return ns and ns == NS_XHTML
-
-    @staticmethod
-    def split_namespace(el, attr_name):
-        """Return namespace and attribute name without the prefix."""
-
-        return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
-
-    @staticmethod
-    def get_attribute_by_name(el, name, default=None):
-        """Get attribute by name."""
-
-        value = default
-        if el._is_xml:
-            try:
-                value = el.attrs[name]
-            except KeyError:
-                pass
-        else:
-            for k, v in el.attrs.items():
-                if util.lower(k) == name:
-                    value = v
-                    break
-        return value
-
-    @staticmethod
-    def iter_attributes(el):
-        """Iterate attributes."""
-
-        for k, v in el.attrs.items():
-            yield k, v
-
-    @classmethod
-    def get_classes(cls, el):
-        """Get classes."""
-
-        classes = cls.get_attribute_by_name(el, 'class', [])
-        if isinstance(classes, str):
-            classes = RE_NOT_WS.findall(classes)
-        return classes
-
-    def get_text(self, el, no_iframe=False):
-        """Get text."""
-
-        return ''.join(
-            [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
-        )
-
-
-class Inputs(object):
-    """Class for parsing and validating input items."""
-
-    @staticmethod
-    def validate_day(year, month, day):
-        """Validate day."""
-
-        max_days = LONG_MONTH
-        if month == FEB:
-            max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH
-        elif month in MONTHS_30:
-            max_days = SHORT_MONTH
-        return 1 <= day <= max_days
-
-    @staticmethod
-    def validate_week(year, week):
-        """Validate week."""
-
-        max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]
-        if max_week == 1:
-            max_week = 53
-        return 1 <= week <= max_week
-
-    @staticmethod
-    def validate_month(month):
-        """Validate month."""
-
-        return 1 <= month <= 12
-
-    @staticmethod
-    def validate_year(year):
-        """Validate year."""
-
-        return 1 <= year
-
-    @staticmethod
-    def validate_hour(hour):
-        """Validate hour."""
-
-        return 0 <= hour <= 23
-
-    @staticmethod
-    def validate_minutes(minutes):
-        """Validate minutes."""
-
-        return 0 <= minutes <= 59
-
-    @classmethod
-    def parse_value(cls, itype, value):
-        """Parse the input value."""
-
-        parsed = None
-        if itype == "date":
-            m = RE_DATE.match(value)
-            if m:
-                year = int(m.group('year'), 10)
-                month = int(m.group('month'), 10)
-                day = int(m.group('day'), 10)
-                if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day):
-                    parsed = (year, month, day)
-        elif itype == "month":
-            m = RE_MONTH.match(value)
-            if m:
-                year = int(m.group('year'), 10)
-                month = int(m.group('month'), 10)
-                if cls.validate_year(year) and cls.validate_month(month):
-                    parsed = (year, month)
-        elif itype == "week":
-            m = RE_WEEK.match(value)
-            if m:
-                year = int(m.group('year'), 10)
-                week = int(m.group('week'), 10)
-                if cls.validate_year(year) and cls.validate_week(year, week):
-                    parsed = (year, week)
-        elif itype == "time":
-            m = RE_TIME.match(value)
-            if m:
-                hour = int(m.group('hour'), 10)
-                minutes = int(m.group('minutes'), 10)
-                if cls.validate_hour(hour) and cls.validate_minutes(minutes):
-                    parsed = (hour, minutes)
-        elif itype == "datetime-local":
-            m = RE_DATETIME.match(value)
-            if m:
-                year = int(m.group('year'), 10)
-                month = int(m.group('month'), 10)
-                day = int(m.group('day'), 10)
-                hour = int(m.group('hour'), 10)
-                minutes = int(m.group('minutes'), 10)
-                if (
-                    cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and
-                    cls.validate_hour(hour) and cls.validate_minutes(minutes)
-                ):
-                    parsed = (year, month, day, hour, minutes)
-        elif itype in ("number", "range"):
-            m = RE_NUM.match(value)
-            if m:
-                parsed = float(m.group('value'))
-        return parsed
-
-
-class _Match(object):
-    """Perform CSS matching."""
-
-    def __init__(self, selectors, scope, namespaces, flags):
-        """Initialize."""
-
-        self.assert_valid_input(scope)
-        self.tag = scope
-        self.cached_meta_lang = []
-        self.cached_default_forms = []
-        self.cached_indeterminate_forms = []
-        self.selectors = selectors
-        self.namespaces = {} if namespaces is None else namespaces
-        self.flags = flags
-        self.iframe_restrict = False
-
-        # Find the root element for the whole tree
-        doc = scope
-        parent = self.get_parent(doc)
-        while parent:
-            doc = parent
-            parent = self.get_parent(doc)
-        root = None
-        if not self.is_doc(doc):
-            root = doc
-        else:
-            for child in self.get_children(doc):
-                root = child
-                break
-
-        self.root = root
-        self.scope = scope if scope is not doc else root
-        self.has_html_namespace = self.has_html_ns(root)
-
-        # A document can be both XML and HTML (XHTML)
-        self.is_xml = self.is_xml_tree(doc)
-        self.is_html = not self.is_xml or self.has_html_namespace
-
-    def supports_namespaces(self):
-        """Check if namespaces are supported in the HTML type."""
-
-        return self.is_xml or self.has_html_namespace
-
-    def get_tag_ns(self, el):
-        """Get tag namespace."""
-
-        if self.supports_namespaces():
-            namespace = ''
-            ns = self.get_uri(el)
-            if ns:
-                namespace = ns
-        else:
-            namespace = NS_XHTML
-        return namespace
-
-    def is_html_tag(self, el):
-        """Check if tag is in HTML namespace."""
-
-        return self.get_tag_ns(el) == NS_XHTML
-
-    def get_tag(self, el):
-        """Get tag."""
-
-        name = self.get_tag_name(el)
-        return util.lower(name) if name is not None and not self.is_xml else name
-
-    def get_prefix(self, el):
-        """Get prefix."""
-
-        prefix = self.get_prefix_name(el)
-        return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
-
-    def find_bidi(self, el):
-        """Get directionality from element text."""
-
-        for node in self.get_children(el, tags=False):
-
-            # Analyze child text nodes
-            if self.is_tag(node):
-
-                # Avoid analyzing certain elements specified in the specification.
-                direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None)
-                if (
-                    self.get_tag(node) in ('bdi', 'script', 'style', 'textarea', 'iframe') or
-                    not self.is_html_tag(node) or
-                    direction is not None
-                ):
-                    continue  # pragma: no cover
-
-                # Check directionality of this node's text
-                value = self.find_bidi(node)
-                if value is not None:
-                    return value
-
-                # Direction could not be determined
-                continue  # pragma: no cover
-
-            # Skip `doctype` comments, etc.
-            if self.is_special_string(node):
-                continue
-
-            # Analyze text nodes for directionality.
-            for c in node:
-                bidi = unicodedata.bidirectional(c)
-                if bidi in ('AL', 'R', 'L'):
-                    return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
-        return None
-
-    def extended_language_filter(self, lang_range, lang_tag):
-        """Filter the language tags."""
-
-        match = True
-        lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
-        ranges = lang_range.split('-')
-        subtags = lang_tag.lower().split('-')
-        length = len(ranges)
-        rindex = 0
-        sindex = 0
-        r = ranges[rindex]
-        s = subtags[sindex]
-
-        # Primary tag needs to match
-        if r != '*' and r != s:
-            match = False
-
-        rindex += 1
-        sindex += 1
-
-        # Match until we run out of ranges
-        while match and rindex < length:
-            r = ranges[rindex]
-            try:
-                s = subtags[sindex]
-            except IndexError:
-                # Ran out of subtags,
-                # but we still have ranges
-                match = False
-                continue
-
-            # Empty range
-            if not r:
-                match = False
-                continue
-
-            # Matched range
-            elif s == r:
-                rindex += 1
-
-            # Implicit wildcard cannot match
-            # singletons
-            elif len(s) == 1:
-                match = False
-                continue
-
-            # Implicitly matched, so grab next subtag
-            sindex += 1
-
-        return match
-
-    def match_attribute_name(self, el, attr, prefix):
-        """Match attribute name and return value if it exists."""
-
-        value = None
-        if self.supports_namespaces():
-            value = None
-            # If we have not defined namespaces, we can't very well find them, so don't bother trying.
-            if prefix:
-                ns = self.namespaces.get(prefix)
-                if ns is None and prefix != '*':
-                    return None
-            else:
-                ns = None
-
-            for k, v in self.iter_attributes(el):
-
-                # Get attribute parts
-                namespace, name = self.split_namespace(el, k)
-
-                # Can't match a prefix attribute as we haven't specified one to match
-                # Try to match it normally as a whole `p:a` as selector may be trying `p\:a`.
-                if ns is None:
-                    if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)):
-                        value = v
-                        break
-                    # Coverage is not finding this even though it is executed.
-                    # Adding a print statement before this (and erasing coverage) causes coverage to find the line.
-                    # Ignore the false positive message.
-                    continue  # pragma: no cover
-
-                # We can't match our desired prefix attribute as the attribute doesn't have a prefix
-                if namespace is None or ns != namespace and prefix != '*':
-                    continue
-
-                # The attribute doesn't match.
-                if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name):
-                    continue
-
-                value = v
-                break
-        else:
-            for k, v in self.iter_attributes(el):
-                if util.lower(attr) != util.lower(k):
-                    continue
-                value = v
-                break
-        return value
-
-    def match_namespace(self, el, tag):
-        """Match the namespace of the element."""
-
-        match = True
-        namespace = self.get_tag_ns(el)
-        default_namespace = self.namespaces.get('')
-        tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix, None)
-        # We must match the default namespace if one is not provided
-        if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):
-            match = False
-        # If we specified `|tag`, we must not have a namespace.
-        elif (tag.prefix is not None and tag.prefix == '' and namespace):
-            match = False
-        # Verify prefix matches
-        elif (
-            tag.prefix and
-            tag.prefix != '*' and (tag_ns is None or namespace != tag_ns)
-        ):
-            match = False
-        return match
-
-    def match_attributes(self, el, attributes):
-        """Match attributes."""
-
-        match = True
-        if attributes:
-            for a in attributes:
-                value = self.match_attribute_name(el, a.attribute, a.prefix)
-                pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern
-                if isinstance(value, list):
-                    value = ' '.join(value)
-                if value is None:
-                    match = False
-                    break
-                elif pattern is None:
-                    continue
-                elif pattern.match(value) is None:
-                    match = False
-                    break
-        return match
-
-    def match_tagname(self, el, tag):
-        """Match tag name."""
-
-        name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
-        return not (
-            name is not None and
-            name not in (self.get_tag(el), '*')
-        )
-
-    def match_tag(self, el, tag):
-        """Match the tag."""
-
-        match = True
-        if tag is not None:
-            # Verify namespace
-            if not self.match_namespace(el, tag):
-                match = False
-            if not self.match_tagname(el, tag):
-                match = False
-        return match
-
-    def match_past_relations(self, el, relation):
-        """Match past relationship."""
-
-        found = False
-        if relation[0].rel_type == REL_PARENT:
-            parent = self.get_parent(el, no_iframe=self.iframe_restrict)
-            while not found and parent:
-                found = self.match_selectors(parent, relation)
-                parent = self.get_parent(parent, no_iframe=self.iframe_restrict)
-        elif relation[0].rel_type == REL_CLOSE_PARENT:
-            parent = self.get_parent(el, no_iframe=self.iframe_restrict)
-            if parent:
-                found = self.match_selectors(parent, relation)
-        elif relation[0].rel_type == REL_SIBLING:
-            sibling = self.get_previous(el)
-            while not found and sibling:
-                found = self.match_selectors(sibling, relation)
-                sibling = self.get_previous(sibling)
-        elif relation[0].rel_type == REL_CLOSE_SIBLING:
-            sibling = self.get_previous(el)
-            if sibling and self.is_tag(sibling):
-                found = self.match_selectors(sibling, relation)
-        return found
-
-    def match_future_child(self, parent, relation, recursive=False):
-        """Match future child."""
-
-        match = False
-        children = self.get_descendants if recursive else self.get_children
-        for child in children(parent, no_iframe=self.iframe_restrict):
-            match = self.match_selectors(child, relation)
-            if match:
-                break
-        return match
-
-    def match_future_relations(self, el, relation):
-        """Match future relationship."""
-
-        found = False
-        if relation[0].rel_type == REL_HAS_PARENT:
-            found = self.match_future_child(el, relation, True)
-        elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
-            found = self.match_future_child(el, relation)
-        elif relation[0].rel_type == REL_HAS_SIBLING:
-            sibling = self.get_next(el)
-            while not found and sibling:
-                found = self.match_selectors(sibling, relation)
-                sibling = self.get_next(sibling)
-        elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
-            sibling = self.get_next(el)
-            if sibling and self.is_tag(sibling):
-                found = self.match_selectors(sibling, relation)
-        return found
-
-    def match_relations(self, el, relation):
-        """Match relationship to other elements."""
-
-        found = False
-
-        if relation[0].rel_type.startswith(':'):
-            found = self.match_future_relations(el, relation)
-        else:
-            found = self.match_past_relations(el, relation)
-
-        return found
-
-    def match_id(self, el, ids):
-        """Match element's ID."""
-
-        found = True
-        for i in ids:
-            if i != self.get_attribute_by_name(el, 'id', ''):
-                found = False
-                break
-        return found
-
-    def match_classes(self, el, classes):
-        """Match element's classes."""
-
-        current_classes = self.get_classes(el)
-        found = True
-        for c in classes:
-            if c not in current_classes:
-                found = False
-                break
-        return found
-
-    def match_root(self, el):
-        """Match element as root."""
-
-        is_root = self.is_root(el)
-        if is_root:
-            sibling = self.get_previous(el, tags=False)
-            while is_root and sibling is not None:
-                if (
-                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
-                    self.is_cdata(sibling)
-                ):
-                    is_root = False
-                else:
-                    sibling = self.get_previous(sibling, tags=False)
-        if is_root:
-            sibling = self.get_next(el, tags=False)
-            while is_root and sibling is not None:
-                if (
-                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
-                    self.is_cdata(sibling)
-                ):
-                    is_root = False
-                else:
-                    sibling = self.get_next(sibling, tags=False)
-        return is_root
-
-    def match_scope(self, el):
-        """Match element as scope."""
-
-        return self.scope is el
-
-    def match_nth_tag_type(self, el, child):
-        """Match tag type for `nth` matches."""
-
-        return(
-            (self.get_tag(child) == self.get_tag(el)) and
-            (self.get_tag_ns(child) == self.get_tag_ns(el))
-        )
-
-    def match_nth(self, el, nth):
-        """Match `nth` elements."""
-
-        matched = True
-
-        for n in nth:
-            matched = False
-            if n.selectors and not self.match_selectors(el, n.selectors):
-                break
-            parent = self.get_parent(el)
-            if parent is None:
-                parent = self.create_fake_parent(el)
-            last = n.last
-            last_index = len(parent) - 1
-            index = last_index if last else 0
-            relative_index = 0
-            a = n.a
-            b = n.b
-            var = n.n
-            count = 0
-            count_incr = 1
-            factor = -1 if last else 1
-            idx = last_idx = a * count + b if var else a
-
-            # We can only adjust bounds within a variable index
-            if var:
-                # Abort if our nth index is out of bounds and only getting further out of bounds as we increment.
-                # Otherwise, increment to try to get in bounds.
-                adjust = None
-                while idx < 1 or idx > last_index:
-                    if idx < 0:
-                        diff_low = 0 - idx
-                        if adjust is not None and adjust == 1:
-                            break
-                        adjust = -1
-                        count += count_incr
-                        idx = last_idx = a * count + b if var else a
-                        diff = 0 - idx
-                        if diff >= diff_low:
-                            break
-                    else:
-                        diff_high = idx - last_index
-                        if adjust is not None and adjust == -1:
-                            break
-                        adjust = 1
-                        count += count_incr
-                        idx = last_idx = a * count + b if var else a
-                        diff = idx - last_index
-                        if diff >= diff_high:
-                            break
-                        diff_high = diff
-
-                # If a < 0, our count is working backwards, so floor the index by increasing the count.
-                # Find the count that yields the lowest, in bound value and use that.
-                # Lastly reverse count increment so that we'll increase our index.
-                lowest = count
-                if a < 0:
-                    while idx >= 1:
-                        lowest = count
-                        count += count_incr
-                        idx = last_idx = a * count + b if var else a
-                    count_incr = -1
-                count = lowest
-                idx = last_idx = a * count + b if var else a
-
-            # Evaluate elements while our calculated nth index is still in range
-            while 1 <= idx <= last_index + 1:
-                child = None
-                # Evaluate while our child index is still in range.
-                for child in self.get_children(parent, start=index, reverse=factor < 0, tags=False):
-                    index += factor
-                    if not self.is_tag(child):
-                        continue
-                    # Handle `of S` in `nth-child`
-                    if n.selectors and not self.match_selectors(child, n.selectors):
-                        continue
-                    # Handle `of-type`
-                    if n.of_type and not self.match_nth_tag_type(el, child):
-                        continue
-                    relative_index += 1
-                    if relative_index == idx:
-                        if child is el:
-                            matched = True
-                        else:
-                            break
-                    if child is el:
-                        break
-                if child is el:
-                    break
-                last_idx = idx
-                count += count_incr
-                if count < 0:
-                    # Count is counting down and has now ventured into invalid territory.
-                    break
-                idx = a * count + b if var else a
-                if last_idx == idx:
-                    break
-            if not matched:
-                break
-        return matched
-
-    def match_empty(self, el):
-        """Check if element is empty (if requested)."""
-
-        is_empty = True
-        for child in self.get_children(el, tags=False):
-            if self.is_tag(child):
-                is_empty = False
-                break
-            elif self.is_content_string(child) and RE_NOT_EMPTY.search(child):
-                is_empty = False
-                break
-        return is_empty
-
-    def match_subselectors(self, el, selectors):
-        """Match selectors."""
-
-        match = True
-        for sel in selectors:
-            if not self.match_selectors(el, sel):
-                match = False
-        return match
-
-    def match_contains(self, el, contains):
-        """Match element if it contains text."""
-
-        match = True
-        content = None
-        for contain_list in contains:
-            if content is None:
-                content = self.get_text(el, no_iframe=self.is_html)
-            found = False
-            for text in contain_list.text:
-                if text in content:
-                    found = True
-                    break
-            if not found:
-                match = False
-        return match
-
-    def match_default(self, el):
-        """Match default."""
-
-        match = False
-
-        # Find this input's form
-        form = None
-        parent = self.get_parent(el, no_iframe=True)
-        while parent and form is None:
-            if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
-                form = parent
-            else:
-                parent = self.get_parent(parent, no_iframe=True)
-
-        # Look in form cache to see if we've already located its default button
-        found_form = False
-        for f, t in self.cached_default_forms:
-            if f is form:
-                found_form = True
-                if t is el:
-                    match = True
-                break
-
-        # We didn't have the form cached, so look for its default button
-        if not found_form:
-            for child in self.get_descendants(form, no_iframe=True):
-                name = self.get_tag(child)
-                # Can't do nested forms (haven't figured out why we never hit this)
-                if name == 'form':  # pragma: no cover
-                    break
-                if name in ('input', 'button'):
-                    v = self.get_attribute_by_name(child, 'type', '')
-                    if v and util.lower(v) == 'submit':
-                        self.cached_default_forms.append([form, child])
-                        if el is child:
-                            match = True
-                        break
-        return match
-
-    def match_indeterminate(self, el):
-        """Match default."""
-
-        match = False
-        name = self.get_attribute_by_name(el, 'name')
-
-        def get_parent_form(el):
-            """Find this input's form."""
-            form = None
-            parent = self.get_parent(el, no_iframe=True)
-            while form is None:
-                if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
-                    form = parent
-                    break
-                last_parent = parent
-                parent = self.get_parent(parent, no_iframe=True)
-                if parent is None:
-                    form = last_parent
-                    break
-            return form
-
-        form = get_parent_form(el)
-
-        # Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate
-        found_form = False
-        for f, n, i in self.cached_indeterminate_forms:
-            if f is form and n == name:
-                found_form = True
-                if i is True:
-                    match = True
-                break
-
-        # We didn't have the form cached, so validate that the radio button is indeterminate
-        if not found_form:
-            checked = False
-            for child in self.get_descendants(form, no_iframe=True):
-                if child is el:
-                    continue
-                tag_name = self.get_tag(child)
-                if tag_name == 'input':
-                    is_radio = False
-                    check = False
-                    has_name = False
-                    for k, v in self.iter_attributes(child):
-                        if util.lower(k) == 'type' and util.lower(v) == 'radio':
-                            is_radio = True
-                        elif util.lower(k) == 'name' and v == name:
-                            has_name = True
-                        elif util.lower(k) == 'checked':
-                            check = True
-                        if is_radio and check and has_name and get_parent_form(child) is form:
-                            checked = True
-                            break
-                if checked:
-                    break
-            if not checked:
-                match = True
-            self.cached_indeterminate_forms.append([form, name, match])
-
-        return match
-
-    def match_lang(self, el, langs):
-        """Match languages."""
-
-        match = False
-        has_ns = self.supports_namespaces()
-        root = self.root
-        has_html_namespace = self.has_html_namespace
-
-        # Walk parents looking for `lang` (HTML) or `xml:lang` XML property.
-        parent = el
-        found_lang = None
-        last = None
-        while not found_lang:
-            has_html_ns = self.has_html_ns(parent)
-            for k, v in self.iter_attributes(parent):
-                attr_ns, attr = self.split_namespace(parent, k)
-                if (
-                    ((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or
-                    (
-                        has_ns and not has_html_ns and attr_ns == NS_XML and
-                        (util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang'
-                    )
-                ):
-                    found_lang = v
-                    break
-            last = parent
-            parent = self.get_parent(parent, no_iframe=self.is_html)
-
-            if parent is None:
-                root = last
-                has_html_namespace = self.has_html_ns(root)
-                parent = last
-                break
-
-        # Use cached meta language.
-        if not found_lang and self.cached_meta_lang:
-            for cache in self.cached_meta_lang:
-                if root is cache[0]:
-                    found_lang = cache[1]
-
-        # If we couldn't find a language, and the document is HTML, look to meta to determine language.
-        if found_lang is None and (not self.is_xml or (has_html_namespace and root.name == 'html')):
-            # Find head
-            found = False
-            for tag in ('html', 'head'):
-                found = False
-                for child in self.get_children(parent, no_iframe=self.is_html):
-                    if self.get_tag(child) == tag and self.is_html_tag(child):
-                        found = True
-                        parent = child
-                        break
-                if not found:  # pragma: no cover
-                    break
-
-            # Search meta tags
-            if found:
-                for child in parent:
-                    if self.is_tag(child) and self.get_tag(child) == 'meta' and self.is_html_tag(parent):
-                        c_lang = False
-                        content = None
-                        for k, v in self.iter_attributes(child):
-                            if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language':
-                                c_lang = True
-                            if util.lower(k) == 'content':
-                                content = v
-                            if c_lang and content:
-                                found_lang = content
-                                self.cached_meta_lang.append((root, found_lang))
-                                break
-                    if found_lang:
-                        break
-                if not found_lang:
-                    self.cached_meta_lang.append((root, False))
-
-        # If we determined a language, compare.
-        if found_lang:
-            for patterns in langs:
-                match = False
-                for pattern in patterns:
-                    if self.extended_language_filter(pattern, found_lang):
-                        match = True
-                if not match:
-                    break
-
-        return match
-
-    def match_dir(self, el, directionality):
-        """Check directionality."""
-
-        # If we have to match both left and right, we can't match either.
-        if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL:
-            return False
-
-        if el is None or not self.is_html_tag(el):
-            return False
-
-        # Element has defined direction of left to right or right to left
-        direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None)
-        if direction not in (None, 0):
-            return direction == directionality
-
-        # Element is the document element (the root) and no direction assigned, assume left to right.
-        is_root = self.is_root(el)
-        if is_root and direction is None:
-            return ct.SEL_DIR_LTR == directionality
-
-        # If `input[type=telephone]` and no direction is assigned, assume left to right.
-        name = self.get_tag(el)
-        is_input = name == 'input'
-        is_textarea = name == 'textarea'
-        is_bdi = name == 'bdi'
-        itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else ''
-        if is_input and itype == 'tel' and direction is None:
-            return ct.SEL_DIR_LTR == directionality
-
-        # Auto handling for text inputs
-        if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
-            if is_textarea:
-                value = []
-                for node in self.get_contents(el, no_iframe=True):
-                    if self.is_content_string(node):
-                        value.append(node)
-                value = ''.join(value)
-            else:
-                value = self.get_attribute_by_name(el, 'value', '')
-            if value:
-                for c in value:
-                    bidi = unicodedata.bidirectional(c)
-                    if bidi in ('AL', 'R', 'L'):
-                        direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
-                        return direction == directionality
-                # Assume left to right
-                return ct.SEL_DIR_LTR == directionality
-            elif is_root:
-                return ct.SEL_DIR_LTR == directionality
-            return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
-
-        # Auto handling for `bdi` and other non text inputs.
-        if (is_bdi and direction is None) or direction == 0:
-            direction = self.find_bidi(el)
-            if direction is not None:
-                return direction == directionality
-            elif is_root:
-                return ct.SEL_DIR_LTR == directionality
-            return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
-
-        # Match parents direction
-        return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
-
-    def match_range(self, el, condition):
-        """
-        Match range.
-
-        Behavior is modeled after what we see in browsers. Browsers seem to evaluate
-        if the value is out of range, and if not, it is in range. So a missing value
-        will not evaluate out of range; therefore, value is in range. Personally, I
-        feel like this should evaluate as neither in or out of range.
-        """
-
-        out_of_range = False
-
-        itype = util.lower(self.get_attribute_by_name(el, 'type'))
-        mn = self.get_attribute_by_name(el, 'min', None)
-        if mn is not None:
-            mn = Inputs.parse_value(itype, mn)
-        mx = self.get_attribute_by_name(el, 'max', None)
-        if mx is not None:
-            mx = Inputs.parse_value(itype, mx)
-
-        # There is no valid min or max, so we cannot evaluate a range
-        if mn is None and mx is None:
-            return False
-
-        value = self.get_attribute_by_name(el, 'value', None)
-        if value is not None:
-            value = Inputs.parse_value(itype, value)
-        if value is not None:
-            if itype in ("date", "datetime-local", "month", "week", "number", "range"):
-                if mn is not None and value < mn:
-                    out_of_range = True
-                if not out_of_range and mx is not None and value > mx:
-                    out_of_range = True
-            elif itype == "time":
-                if mn is not None and mx is not None and mn > mx:
-                    # Time is periodic, so this is a reversed/discontinuous range
-                    if value < mn and value > mx:
-                        out_of_range = True
-                else:
-                    if mn is not None and value < mn:
-                        out_of_range = True
-                    if not out_of_range and mx is not None and value > mx:
-                        out_of_range = True
-
-        return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
-
-    def match_defined(self, el):
-        """
-        Match defined.
-
-        `:defined` is related to custom elements in a browser.
-
-        - If the document is XML (not XHTML), all tags will match.
-        - Tags that are not custom (don't have a hyphen) are marked defined.
-        - If the tag has a prefix (without or without a namespace), it will not match.
-
-        This is of course requires the parser to provide us with the proper prefix and namespace info,
-        if it doesn't, there is nothing we can do.
-        """
-
-        name = self.get_tag(el)
-        return (
-            name.find('-') == -1 or
-            name.find(':') != -1 or
-            self.get_prefix(el) is not None
-        )
-
-    def match_placeholder_shown(self, el):
-        """
-        Match placeholder shown according to HTML spec.
-
-        - text area should be checked if they have content. A single newline does not count as content.
-
-        """
-
-        match = False
-        content = self.get_text(el)
-        if content in ('', '\n'):
-            match = True
-
-        return match
-
-    def match_selectors(self, el, selectors):
-        """Check if element matches one of the selectors."""
-
-        match = False
-        is_not = selectors.is_not
-        is_html = selectors.is_html
-
-        # Internal selector lists that use the HTML flag, will automatically get the `html` namespace.
-        if is_html:
-            namespaces = self.namespaces
-            iframe_restrict = self.iframe_restrict
-            self.namespaces = {'html': NS_XHTML}
-            self.iframe_restrict = True
-
-        if not is_html or self.is_html:
-            for selector in selectors:
-                match = is_not
-                # We have a un-matchable situation (like `:focus` as you can focus an element in this environment)
-                if isinstance(selector, ct.SelectorNull):
-                    continue
-                # Verify tag matches
-                if not self.match_tag(el, selector.tag):
-                    continue
-                # Verify tag is defined
-                if selector.flags & ct.SEL_DEFINED and not self.match_defined(el):
-                    continue
-                # Verify element is root
-                if selector.flags & ct.SEL_ROOT and not self.match_root(el):
-                    continue
-                # Verify element is scope
-                if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
-                    continue
-                # Verify element has placeholder shown
-                if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
-                    continue
-                # Verify `nth` matches
-                if not self.match_nth(el, selector.nth):
-                    continue
-                if selector.flags & ct.SEL_EMPTY and not self.match_empty(el):
-                    continue
-                # Verify id matches
-                if selector.ids and not self.match_id(el, selector.ids):
-                    continue
-                # Verify classes match
-                if selector.classes and not self.match_classes(el, selector.classes):
-                    continue
-                # Verify attribute(s) match
-                if not self.match_attributes(el, selector.attributes):
-                    continue
-                # Verify ranges
-                if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES):
-                    continue
-                # Verify language patterns
-                if selector.lang and not self.match_lang(el, selector.lang):
-                    continue
-                # Verify pseudo selector patterns
-                if selector.selectors and not self.match_subselectors(el, selector.selectors):
-                    continue
-                # Verify relationship selectors
-                if selector.relation and not self.match_relations(el, selector.relation):
-                    continue
-                # Validate that the current default selector match corresponds to the first submit button in the form
-                if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):
-                    continue
-                # Validate that the unset radio button is among radio buttons with the same name in a form that are
-                # also not set.
-                if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el):
-                    continue
-                # Validate element directionality
-                if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):
-                    continue
-                # Validate that the tag contains the specified text.
-                if not self.match_contains(el, selector.contains):
-                    continue
-                match = not is_not
-                break
-
-        # Restore actual namespaces being used for external selector lists
-        if is_html:
-            self.namespaces = namespaces
-            self.iframe_restrict = iframe_restrict
-
-        return match
-
-    def select(self, limit=0):
-        """Match all tags under the targeted tag."""
-
-        if limit < 1:
-            limit = None
-
-        for child in self.get_descendants(self.tag):
-            if self.match(child):
-                yield child
-                if limit is not None:
-                    limit -= 1
-                    if limit < 1:
-                        break
-
-    def closest(self):
-        """Match closest ancestor."""
-
-        current = self.tag
-        closest = None
-        while closest is None and current is not None:
-            if self.match(current):
-                closest = current
-            else:
-                current = self.get_parent(current)
-        return closest
-
-    def filter(self):  # noqa A001
-        """Filter tag's children."""
-
-        return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
-
-    def match(self, el):
-        """Match."""
-
-        return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
-
-
-class CSSMatch(_DocumentNav, _Match):
-    """The Beautiful Soup CSS match class."""
-
-
-class SoupSieve(ct.Immutable):
-    """Compiled Soup Sieve selector matching object."""
-
-    __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
-
-    def __init__(self, pattern, selectors, namespaces, custom, flags):
-        """Initialize."""
-
-        super(SoupSieve, self).__init__(
-            pattern=pattern,
-            selectors=selectors,
-            namespaces=namespaces,
-            custom=custom,
-            flags=flags
-        )
-
-    def match(self, tag):
-        """Match."""
-
-        return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
-
-    def closest(self, tag):
-        """Match closest ancestor."""
-
-        return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
-
-    def filter(self, iterable):  # noqa A001
-        """
-        Filter.
-
-        `CSSMatch` can cache certain searches for tags of the same document,
-        so if we are given a tag, all tags are from the same document,
-        and we can take advantage of the optimization.
-
-        Any other kind of iterable could have tags from different documents or detached tags,
-        so for those, we use a new `CSSMatch` for each item in the iterable.
-        """
-
-        if CSSMatch.is_tag(iterable):
-            return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter()
-        else:
-            return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
-
-    def select_one(self, tag):
-        """Select a single tag."""
-
-        tags = self.select(tag, limit=1)
-        return tags[0] if tags else None
-
-    def select(self, tag, limit=0):
-        """Select the specified tags."""
-
-        return list(self.iselect(tag, limit))
-
-    def iselect(self, tag, limit=0):
-        """Iterate the specified tags."""
-
-        for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
-            yield el
-
-    def __repr__(self):  # pragma: no cover
-        """Representation."""
-
-        return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(
-            self.pattern,
-            self.namespaces,
-            self.custom,
-            self.flags
-        )
-
-    __str__ = __repr__
-
-
-ct.pickle_register(SoupSieve)
author	shellac
date	Mon, 01 Jun 2020 08:59:25 -0400
parents	79f47841a781
children