Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/soupsieve/css_parser.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 """CSS selector parser.""" | |
| 2 import re | |
| 3 from functools import lru_cache | |
| 4 from . import util | |
| 5 from . import css_match as cm | |
| 6 from . import css_types as ct | |
| 7 from .util import SelectorSyntaxError | |
| 8 import warnings | |
| 9 | |
| 10 UNICODE_REPLACEMENT_CHAR = 0xFFFD | |
| 11 | |
| 12 # Simple pseudo classes that take no parameters | |
| 13 PSEUDO_SIMPLE = { | |
| 14 ":any-link", | |
| 15 ":empty", | |
| 16 ":first-child", | |
| 17 ":first-of-type", | |
| 18 ":in-range", | |
| 19 ":out-of-range", | |
| 20 ":last-child", | |
| 21 ":last-of-type", | |
| 22 ":link", | |
| 23 ":only-child", | |
| 24 ":only-of-type", | |
| 25 ":root", | |
| 26 ':checked', | |
| 27 ':default', | |
| 28 ':disabled', | |
| 29 ':enabled', | |
| 30 ':indeterminate', | |
| 31 ':optional', | |
| 32 ':placeholder-shown', | |
| 33 ':read-only', | |
| 34 ':read-write', | |
| 35 ':required', | |
| 36 ':scope', | |
| 37 ':defined' | |
| 38 } | |
| 39 | |
| 40 # Supported, simple pseudo classes that match nothing in the Soup Sieve environment | |
| 41 PSEUDO_SIMPLE_NO_MATCH = { | |
| 42 ':active', | |
| 43 ':current', | |
| 44 ':focus', | |
| 45 ':focus-visible', | |
| 46 ':focus-within', | |
| 47 ':future', | |
| 48 ':host', | |
| 49 ':hover', | |
| 50 ':local-link', | |
| 51 ':past', | |
| 52 ':paused', | |
| 53 ':playing', | |
| 54 ':target', | |
| 55 ':target-within', | |
| 56 ':user-invalid', | |
| 57 ':visited' | |
| 58 } | |
| 59 | |
| 60 # Complex pseudo classes that take selector lists | |
| 61 PSEUDO_COMPLEX = { | |
| 62 ':contains', | |
| 63 ':-soup-contains', | |
| 64 ':-soup-contains-own', | |
| 65 ':has', | |
| 66 ':is', | |
| 67 ':matches', | |
| 68 ':not', | |
| 69 ':where' | |
| 70 } | |
| 71 | |
| 72 PSEUDO_COMPLEX_NO_MATCH = { | |
| 73 ':current', | |
| 74 ':host', | |
| 75 ':host-context' | |
| 76 } | |
| 77 | |
| 78 # Complex pseudo classes that take very specific parameters and are handled special | |
| 79 PSEUDO_SPECIAL = { | |
| 80 ':dir', | |
| 81 ':lang', | |
| 82 ':nth-child', | |
| 83 ':nth-last-child', | |
| 84 ':nth-last-of-type', | |
| 85 ':nth-of-type' | |
| 86 } | |
| 87 | |
| 88 PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL | |
| 89 | |
| 90 # Sub-patterns parts | |
| 91 # Whitespace | |
| 92 NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' | |
| 93 WS = r'(?:[ \t]|{})'.format(NEWLINE) | |
| 94 # Comments | |
| 95 COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' | |
| 96 # Whitespace with comments included | |
| 97 WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) | |
| 98 # CSS escapes | |
| 99 CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) | |
| 100 CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) | |
| 101 # CSS Identifier | |
| 102 IDENTIFIER = r''' | |
| 103 (?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) | |
| 104 (?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) | |
| 105 '''.format(esc=CSS_ESCAPES) | |
| 106 # `nth` content | |
| 107 NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) | |
| 108 # Value: quoted string or identifier | |
| 109 VALUE = r''' | |
| 110 (?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+) | |
| 111 '''.format(nl=NEWLINE, ident=IDENTIFIER) | |
| 112 # Attribute value comparison. `!=` is handled special as it is non-standard. | |
| 113 ATTR = r''' | |
| 114 (?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\] | |
| 115 '''.format(ws=WSC, value=VALUE) | |
| 116 | |
| 117 # Selector patterns | |
| 118 # IDs (`#id`) | |
| 119 PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) | |
| 120 # Classes (`.class`) | |
| 121 PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) | |
| 122 # Prefix:Tag (`prefix|tag`) | |
| 123 PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER) | |
| 124 # Attributes (`[attr]`, `[attr=value]`, etc.) | |
| 125 PAT_ATTR = r''' | |
| 126 \[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr} | |
| 127 '''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) | |
| 128 # Pseudo class (`:pseudo-class`, `:pseudo-class(`) | |
| 129 PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) | |
| 130 # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. | |
| 131 PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) | |
| 132 # Custom pseudo class (`:--custom-pseudo`) | |
| 133 PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER) | |
| 134 # Closing pseudo group (`)`) | |
| 135 PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) | |
| 136 # Pseudo element (`::pseudo-element`) | |
| 137 PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) | |
| 138 # At rule (`@page`, etc.) (not supported) | |
| 139 PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) | |
| 140 # Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) | |
| 141 PAT_PSEUDO_NTH_CHILD = r''' | |
| 142 (?P<pseudo_nth_child>{name} | |
| 143 (?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) | |
| 144 '''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) | |
| 145 # Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) | |
| 146 PAT_PSEUDO_NTH_TYPE = r''' | |
| 147 (?P<pseudo_nth_type>{name} | |
| 148 (?P<nth_type>{nth}|even|odd)){ws}*\) | |
| 149 '''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) | |
| 150 # Pseudo class language (`:lang("*-de", en)`) | |
| 151 PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( | |
| 152 name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE | |
| 153 ) | |
| 154 # Pseudo class direction (`:dir(ltr)`) | |
| 155 PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) | |
| 156 # Combining characters (`>`, `~`, ` `, `+`, `,`) | |
| 157 PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) | |
| 158 # Extra: Contains (`:contains(text)`) | |
| 159 PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( | |
| 160 name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE | |
| 161 ) | |
| 162 | |
| 163 # Regular expressions | |
| 164 # CSS escape pattern | |
| 165 RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) | |
| 166 RE_CSS_STR_ESC = re.compile( | |
| 167 r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I | |
| 168 ) | |
| 169 # Pattern to break up `nth` specifiers | |
| 170 RE_NTH = re.compile( | |
| 171 r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC), | |
| 172 re.I | |
| 173 ) | |
| 174 # Pattern to iterate multiple values. | |
| 175 RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) | |
| 176 # Whitespace checks | |
| 177 RE_WS = re.compile(WS) | |
| 178 RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) | |
| 179 RE_WS_END = re.compile('{}*$'.format(WSC)) | |
| 180 RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) | |
| 181 | |
| 182 # Constants | |
| 183 # List split token | |
| 184 COMMA_COMBINATOR = ',' | |
| 185 # Relation token for descendant | |
| 186 WS_COMBINATOR = " " | |
| 187 | |
| 188 # Parse flags | |
| 189 FLG_PSEUDO = 0x01 | |
| 190 FLG_NOT = 0x02 | |
| 191 FLG_RELATIVE = 0x04 | |
| 192 FLG_DEFAULT = 0x08 | |
| 193 FLG_HTML = 0x10 | |
| 194 FLG_INDETERMINATE = 0x20 | |
| 195 FLG_OPEN = 0x40 | |
| 196 FLG_IN_RANGE = 0x80 | |
| 197 FLG_OUT_OF_RANGE = 0x100 | |
| 198 FLG_PLACEHOLDER_SHOWN = 0x200 | |
| 199 | |
| 200 # Maximum cached patterns to store | |
| 201 _MAXCACHE = 500 | |
| 202 | |
| 203 | |
| 204 @lru_cache(maxsize=_MAXCACHE) | |
| 205 def _cached_css_compile(pattern, namespaces, custom, flags): | |
| 206 """Cached CSS compile.""" | |
| 207 | |
| 208 custom_selectors = process_custom(custom) | |
| 209 return cm.SoupSieve( | |
| 210 pattern, | |
| 211 CSSParser(pattern, custom=custom_selectors, flags=flags).process_selectors(), | |
| 212 namespaces, | |
| 213 custom, | |
| 214 flags | |
| 215 ) | |
| 216 | |
| 217 | |
| 218 def _purge_cache(): | |
| 219 """Purge the cache.""" | |
| 220 | |
| 221 _cached_css_compile.cache_clear() | |
| 222 | |
| 223 | |
| 224 def process_custom(custom): | |
| 225 """Process custom.""" | |
| 226 | |
| 227 custom_selectors = {} | |
| 228 if custom is not None: | |
| 229 for key, value in custom.items(): | |
| 230 name = util.lower(key) | |
| 231 if RE_CUSTOM.match(name) is None: | |
| 232 raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) | |
| 233 if name in custom_selectors: | |
| 234 raise KeyError("The custom selector '{}' has already been registered".format(name)) | |
| 235 custom_selectors[css_unescape(name)] = value | |
| 236 return custom_selectors | |
| 237 | |
| 238 | |
| 239 def css_unescape(content, string=False): | |
| 240 """ | |
| 241 Unescape CSS value. | |
| 242 | |
| 243 Strings allow for spanning the value on multiple strings by escaping a new line. | |
| 244 """ | |
| 245 | |
| 246 def replace(m): | |
| 247 """Replace with the appropriate substitute.""" | |
| 248 | |
| 249 if m.group(1): | |
| 250 codepoint = int(m.group(1)[1:], 16) | |
| 251 if codepoint == 0: | |
| 252 codepoint = UNICODE_REPLACEMENT_CHAR | |
| 253 value = chr(codepoint) | |
| 254 elif m.group(2): | |
| 255 value = m.group(2)[1:] | |
| 256 elif m.group(3): | |
| 257 value = '\ufffd' | |
| 258 else: | |
| 259 value = '' | |
| 260 | |
| 261 return value | |
| 262 | |
| 263 return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content) | |
| 264 | |
| 265 | |
| 266 def escape(ident): | |
| 267 """Escape identifier.""" | |
| 268 | |
| 269 string = [] | |
| 270 length = len(ident) | |
| 271 start_dash = length > 0 and ident[0] == '-' | |
| 272 if length == 1 and start_dash: | |
| 273 # Need to escape identifier that is a single `-` with no other characters | |
| 274 string.append('\\{}'.format(ident)) | |
| 275 else: | |
| 276 for index, c in enumerate(ident): | |
| 277 codepoint = ord(c) | |
| 278 if codepoint == 0x00: | |
| 279 string.append('\ufffd') | |
| 280 elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: | |
| 281 string.append('\\{:x} '.format(codepoint)) | |
| 282 elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): | |
| 283 string.append('\\{:x} '.format(codepoint)) | |
| 284 elif ( | |
| 285 codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or | |
| 286 (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) | |
| 287 ): | |
| 288 string.append(c) | |
| 289 else: | |
| 290 string.append('\\{}'.format(c)) | |
| 291 return ''.join(string) | |
| 292 | |
| 293 | |
| 294 class SelectorPattern(object): | |
| 295 """Selector pattern.""" | |
| 296 | |
| 297 def __init__(self, name, pattern): | |
| 298 """Initialize.""" | |
| 299 | |
| 300 self.name = name | |
| 301 self.re_pattern = re.compile(pattern, re.I | re.X | re.U) | |
| 302 | |
| 303 def get_name(self): | |
| 304 """Get name.""" | |
| 305 | |
| 306 return self.name | |
| 307 | |
| 308 def match(self, selector, index, flags): | |
| 309 """Match the selector.""" | |
| 310 | |
| 311 return self.re_pattern.match(selector, index) | |
| 312 | |
| 313 | |
| 314 class SpecialPseudoPattern(SelectorPattern): | |
| 315 """Selector pattern.""" | |
| 316 | |
| 317 def __init__(self, patterns): | |
| 318 """Initialize.""" | |
| 319 | |
| 320 self.patterns = {} | |
| 321 for p in patterns: | |
| 322 name = p[0] | |
| 323 pattern = p[3](name, p[2]) | |
| 324 for pseudo in p[1]: | |
| 325 self.patterns[pseudo] = pattern | |
| 326 | |
| 327 self.matched_name = None | |
| 328 self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) | |
| 329 | |
| 330 def get_name(self): | |
| 331 """Get name.""" | |
| 332 | |
| 333 return self.matched_name.get_name() | |
| 334 | |
| 335 def match(self, selector, index, flags): | |
| 336 """Match the selector.""" | |
| 337 | |
| 338 pseudo = None | |
| 339 m = self.re_pseudo_name.match(selector, index) | |
| 340 if m: | |
| 341 name = util.lower(css_unescape(m.group('name'))) | |
| 342 pattern = self.patterns.get(name) | |
| 343 if pattern: | |
| 344 pseudo = pattern.match(selector, index, flags) | |
| 345 if pseudo: | |
| 346 self.matched_name = pattern | |
| 347 | |
| 348 return pseudo | |
| 349 | |
| 350 | |
| 351 class _Selector(object): | |
| 352 """ | |
| 353 Intermediate selector class. | |
| 354 | |
| 355 This stores selector data for a compound selector as we are acquiring them. | |
| 356 Once we are done collecting the data for a compound selector, we freeze | |
| 357 the data in an object that can be pickled and hashed. | |
| 358 """ | |
| 359 | |
| 360 def __init__(self, **kwargs): | |
| 361 """Initialize.""" | |
| 362 | |
| 363 self.tag = kwargs.get('tag', None) | |
| 364 self.ids = kwargs.get('ids', []) | |
| 365 self.classes = kwargs.get('classes', []) | |
| 366 self.attributes = kwargs.get('attributes', []) | |
| 367 self.nth = kwargs.get('nth', []) | |
| 368 self.selectors = kwargs.get('selectors', []) | |
| 369 self.relations = kwargs.get('relations', []) | |
| 370 self.rel_type = kwargs.get('rel_type', None) | |
| 371 self.contains = kwargs.get('contains', []) | |
| 372 self.lang = kwargs.get('lang', []) | |
| 373 self.flags = kwargs.get('flags', 0) | |
| 374 self.no_match = kwargs.get('no_match', False) | |
| 375 | |
| 376 def _freeze_relations(self, relations): | |
| 377 """Freeze relation.""" | |
| 378 | |
| 379 if relations: | |
| 380 sel = relations[0] | |
| 381 sel.relations.extend(relations[1:]) | |
| 382 return ct.SelectorList([sel.freeze()]) | |
| 383 else: | |
| 384 return ct.SelectorList() | |
| 385 | |
| 386 def freeze(self): | |
| 387 """Freeze self.""" | |
| 388 | |
| 389 if self.no_match: | |
| 390 return ct.SelectorNull() | |
| 391 else: | |
| 392 return ct.Selector( | |
| 393 self.tag, | |
| 394 tuple(self.ids), | |
| 395 tuple(self.classes), | |
| 396 tuple(self.attributes), | |
| 397 tuple(self.nth), | |
| 398 tuple(self.selectors), | |
| 399 self._freeze_relations(self.relations), | |
| 400 self.rel_type, | |
| 401 tuple(self.contains), | |
| 402 tuple(self.lang), | |
| 403 self.flags | |
| 404 ) | |
| 405 | |
| 406 def __str__(self): # pragma: no cover | |
| 407 """String representation.""" | |
| 408 | |
| 409 return ( | |
| 410 '_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' | |
| 411 'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' | |
| 412 ).format( | |
| 413 self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, | |
| 414 self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match | |
| 415 ) | |
| 416 | |
| 417 __repr__ = __str__ | |
| 418 | |
| 419 | |
| 420 class CSSParser(object): | |
| 421 """Parse CSS selectors.""" | |
| 422 | |
| 423 css_tokens = ( | |
| 424 SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), | |
| 425 SpecialPseudoPattern( | |
| 426 ( | |
| 427 ( | |
| 428 "pseudo_contains", | |
| 429 (':contains', ':-soup-contains', ':-soup-contains-own'), | |
| 430 PAT_PSEUDO_CONTAINS, | |
| 431 SelectorPattern | |
| 432 ), | |
| 433 ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern), | |
| 434 ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern), | |
| 435 ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern), | |
| 436 ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern) | |
| 437 ) | |
| 438 ), | |
| 439 SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), | |
| 440 SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), | |
| 441 SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), | |
| 442 SelectorPattern("at_rule", PAT_AT_RULE), | |
| 443 SelectorPattern("id", PAT_ID), | |
| 444 SelectorPattern("class", PAT_CLASS), | |
| 445 SelectorPattern("tag", PAT_TAG), | |
| 446 SelectorPattern("attribute", PAT_ATTR), | |
| 447 SelectorPattern("combine", PAT_COMBINE) | |
| 448 ) | |
| 449 | |
| 450 def __init__(self, selector, custom=None, flags=0): | |
| 451 """Initialize.""" | |
| 452 | |
| 453 self.pattern = selector.replace('\x00', '\ufffd') | |
| 454 self.flags = flags | |
| 455 self.debug = self.flags & util.DEBUG | |
| 456 self.custom = {} if custom is None else custom | |
| 457 | |
| 458 def parse_attribute_selector(self, sel, m, has_selector): | |
| 459 """Create attribute selector from the returned regex match.""" | |
| 460 | |
| 461 inverse = False | |
| 462 op = m.group('cmp') | |
| 463 case = util.lower(m.group('case')) if m.group('case') else None | |
| 464 ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else '' | |
| 465 attr = css_unescape(m.group('attr_name')) | |
| 466 is_type = False | |
| 467 pattern2 = None | |
| 468 | |
| 469 if case: | |
| 470 flags = re.I if case == 'i' else 0 | |
| 471 elif util.lower(attr) == 'type': | |
| 472 flags = re.I | |
| 473 is_type = True | |
| 474 else: | |
| 475 flags = 0 | |
| 476 | |
| 477 if op: | |
| 478 if m.group('value').startswith(('"', "'")): | |
| 479 value = css_unescape(m.group('value')[1:-1], True) | |
| 480 else: | |
| 481 value = css_unescape(m.group('value')) | |
| 482 else: | |
| 483 value = None | |
| 484 if not op: | |
| 485 # Attribute name | |
| 486 pattern = None | |
| 487 elif op.startswith('^'): | |
| 488 # Value start with | |
| 489 pattern = re.compile(r'^%s.*' % re.escape(value), flags) | |
| 490 elif op.startswith('$'): | |
| 491 # Value ends with | |
| 492 pattern = re.compile(r'.*?%s$' % re.escape(value), flags) | |
| 493 elif op.startswith('*'): | |
| 494 # Value contains | |
| 495 pattern = re.compile(r'.*?%s.*' % re.escape(value), flags) | |
| 496 elif op.startswith('~'): | |
| 497 # Value contains word within space separated list | |
| 498 # `~=` should match nothing if it is empty or contains whitespace, | |
| 499 # so if either of these cases is present, use `[^\s\S]` which cannot be matched. | |
| 500 value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value) | |
| 501 pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags) | |
| 502 elif op.startswith('|'): | |
| 503 # Value starts with word in dash separated list | |
| 504 pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags) | |
| 505 else: | |
| 506 # Value matches | |
| 507 pattern = re.compile(r'^%s$' % re.escape(value), flags) | |
| 508 if op.startswith('!'): | |
| 509 # Equivalent to `:not([attr=value])` | |
| 510 inverse = True | |
| 511 if is_type and pattern: | |
| 512 pattern2 = re.compile(pattern.pattern) | |
| 513 | |
| 514 # Append the attribute selector | |
| 515 sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2) | |
| 516 if inverse: | |
| 517 # If we are using `!=`, we need to nest the pattern under a `:not()`. | |
| 518 sub_sel = _Selector() | |
| 519 sub_sel.attributes.append(sel_attr) | |
| 520 not_list = ct.SelectorList([sub_sel.freeze()], True, False) | |
| 521 sel.selectors.append(not_list) | |
| 522 else: | |
| 523 sel.attributes.append(sel_attr) | |
| 524 | |
| 525 has_selector = True | |
| 526 return has_selector | |
| 527 | |
| 528 def parse_tag_pattern(self, sel, m, has_selector): | |
| 529 """Parse tag pattern from regex match.""" | |
| 530 | |
| 531 prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None | |
| 532 tag = css_unescape(m.group('tag_name')) | |
| 533 sel.tag = ct.SelectorTag(tag, prefix) | |
| 534 has_selector = True | |
| 535 return has_selector | |
| 536 | |
| 537 def parse_pseudo_class_custom(self, sel, m, has_selector): | |
| 538 """ | |
| 539 Parse custom pseudo class alias. | |
| 540 | |
| 541 Compile custom selectors as we need them. When compiling a custom selector, | |
| 542 set it to `None` in the dictionary so we can avoid an infinite loop. | |
| 543 """ | |
| 544 | |
| 545 pseudo = util.lower(css_unescape(m.group('name'))) | |
| 546 selector = self.custom.get(pseudo) | |
| 547 if selector is None: | |
| 548 raise SelectorSyntaxError( | |
| 549 "Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)), | |
| 550 self.pattern, | |
| 551 m.end(0) | |
| 552 ) | |
| 553 | |
| 554 if not isinstance(selector, ct.SelectorList): | |
| 555 self.custom[pseudo] = None | |
| 556 selector = CSSParser( | |
| 557 selector, custom=self.custom, flags=self.flags | |
| 558 ).process_selectors(flags=FLG_PSEUDO) | |
| 559 self.custom[pseudo] = selector | |
| 560 | |
| 561 sel.selectors.append(selector) | |
| 562 has_selector = True | |
| 563 return has_selector | |
| 564 | |
| 565 def parse_pseudo_class(self, sel, m, has_selector, iselector, is_html): | |
| 566 """Parse pseudo class.""" | |
| 567 | |
| 568 complex_pseudo = False | |
| 569 pseudo = util.lower(css_unescape(m.group('name'))) | |
| 570 if m.group('open'): | |
| 571 complex_pseudo = True | |
| 572 if complex_pseudo and pseudo in PSEUDO_COMPLEX: | |
| 573 has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0)) | |
| 574 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE: | |
| 575 if pseudo == ':root': | |
| 576 sel.flags |= ct.SEL_ROOT | |
| 577 elif pseudo == ':defined': | |
| 578 sel.flags |= ct.SEL_DEFINED | |
| 579 is_html = True | |
| 580 elif pseudo == ':scope': | |
| 581 sel.flags |= ct.SEL_SCOPE | |
| 582 elif pseudo == ':empty': | |
| 583 sel.flags |= ct.SEL_EMPTY | |
| 584 elif pseudo in (':link', ':any-link'): | |
| 585 sel.selectors.append(CSS_LINK) | |
| 586 elif pseudo == ':checked': | |
| 587 sel.selectors.append(CSS_CHECKED) | |
| 588 elif pseudo == ':default': | |
| 589 sel.selectors.append(CSS_DEFAULT) | |
| 590 elif pseudo == ':indeterminate': | |
| 591 sel.selectors.append(CSS_INDETERMINATE) | |
| 592 elif pseudo == ":disabled": | |
| 593 sel.selectors.append(CSS_DISABLED) | |
| 594 elif pseudo == ":enabled": | |
| 595 sel.selectors.append(CSS_ENABLED) | |
| 596 elif pseudo == ":required": | |
| 597 sel.selectors.append(CSS_REQUIRED) | |
| 598 elif pseudo == ":optional": | |
| 599 sel.selectors.append(CSS_OPTIONAL) | |
| 600 elif pseudo == ":read-only": | |
| 601 sel.selectors.append(CSS_READ_ONLY) | |
| 602 elif pseudo == ":read-write": | |
| 603 sel.selectors.append(CSS_READ_WRITE) | |
| 604 elif pseudo == ":in-range": | |
| 605 sel.selectors.append(CSS_IN_RANGE) | |
| 606 elif pseudo == ":out-of-range": | |
| 607 sel.selectors.append(CSS_OUT_OF_RANGE) | |
| 608 elif pseudo == ":placeholder-shown": | |
| 609 sel.selectors.append(CSS_PLACEHOLDER_SHOWN) | |
| 610 elif pseudo == ':first-child': | |
| 611 sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList())) | |
| 612 elif pseudo == ':last-child': | |
| 613 sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())) | |
| 614 elif pseudo == ':first-of-type': | |
| 615 sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList())) | |
| 616 elif pseudo == ':last-of-type': | |
| 617 sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())) | |
| 618 elif pseudo == ':only-child': | |
| 619 sel.nth.extend( | |
| 620 [ | |
| 621 ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()), | |
| 622 ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()) | |
| 623 ] | |
| 624 ) | |
| 625 elif pseudo == ':only-of-type': | |
| 626 sel.nth.extend( | |
| 627 [ | |
| 628 ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()), | |
| 629 ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()) | |
| 630 ] | |
| 631 ) | |
| 632 has_selector = True | |
| 633 elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH: | |
| 634 self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) | |
| 635 sel.no_match = True | |
| 636 has_selector = True | |
| 637 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH: | |
| 638 sel.no_match = True | |
| 639 has_selector = True | |
| 640 elif pseudo in PSEUDO_SUPPORTED: | |
| 641 raise SelectorSyntaxError( | |
| 642 "Invalid syntax for pseudo class '{}'".format(pseudo), | |
| 643 self.pattern, | |
| 644 m.start(0) | |
| 645 ) | |
| 646 else: | |
| 647 raise NotImplementedError( | |
| 648 "'{}' pseudo-class is not implemented at this time".format(pseudo) | |
| 649 ) | |
| 650 | |
| 651 return has_selector, is_html | |
| 652 | |
| 653 def parse_pseudo_nth(self, sel, m, has_selector, iselector): | |
| 654 """Parse `nth` pseudo.""" | |
| 655 | |
| 656 mdict = m.groupdict() | |
| 657 if mdict.get('pseudo_nth_child'): | |
| 658 postfix = '_child' | |
| 659 else: | |
| 660 postfix = '_type' | |
| 661 mdict['name'] = util.lower(css_unescape(mdict['name'])) | |
| 662 content = util.lower(mdict.get('nth' + postfix)) | |
| 663 if content == 'even': | |
| 664 # 2n | |
| 665 s1 = 2 | |
| 666 s2 = 0 | |
| 667 var = True | |
| 668 elif content == 'odd': | |
| 669 # 2n+1 | |
| 670 s1 = 2 | |
| 671 s2 = 1 | |
| 672 var = True | |
| 673 else: | |
| 674 nth_parts = RE_NTH.match(content) | |
| 675 s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else '' | |
| 676 a = nth_parts.group('a') | |
| 677 var = a.endswith('n') | |
| 678 if a.startswith('n'): | |
| 679 s1 += '1' | |
| 680 elif var: | |
| 681 s1 += a[:-1] | |
| 682 else: | |
| 683 s1 += a | |
| 684 s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else '' | |
| 685 if nth_parts.group('b'): | |
| 686 s2 += nth_parts.group('b') | |
| 687 else: | |
| 688 s2 = '0' | |
| 689 s1 = int(s1, 10) | |
| 690 s2 = int(s2, 10) | |
| 691 | |
| 692 pseudo_sel = mdict['name'] | |
| 693 if postfix == '_child': | |
| 694 if m.group('of'): | |
| 695 # Parse the rest of `of S`. | |
| 696 nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) | |
| 697 else: | |
| 698 # Use default `*|*` for `of S`. | |
| 699 nth_sel = CSS_NTH_OF_S_DEFAULT | |
| 700 if pseudo_sel == ':nth-child': | |
| 701 sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel)) | |
| 702 elif pseudo_sel == ':nth-last-child': | |
| 703 sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel)) | |
| 704 else: | |
| 705 if pseudo_sel == ':nth-of-type': | |
| 706 sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList())) | |
| 707 elif pseudo_sel == ':nth-last-of-type': | |
| 708 sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList())) | |
| 709 has_selector = True | |
| 710 return has_selector | |
| 711 | |
| 712 def parse_pseudo_open(self, sel, name, has_selector, iselector, index): | |
| 713 """Parse pseudo with opening bracket.""" | |
| 714 | |
| 715 flags = FLG_PSEUDO | FLG_OPEN | |
| 716 if name == ':not': | |
| 717 flags |= FLG_NOT | |
| 718 if name == ':has': | |
| 719 flags |= FLG_RELATIVE | |
| 720 | |
| 721 sel.selectors.append(self.parse_selectors(iselector, index, flags)) | |
| 722 has_selector = True | |
| 723 return has_selector | |
| 724 | |
| 725 def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index): | |
| 726 """Parse combinator tokens.""" | |
| 727 | |
| 728 combinator = m.group('relation').strip() | |
| 729 if not combinator: | |
| 730 combinator = WS_COMBINATOR | |
| 731 if combinator == COMMA_COMBINATOR: | |
| 732 if not has_selector: | |
| 733 # If we've not captured any selector parts, the comma is either at the beginning of the pattern | |
| 734 # or following another comma, both of which are unexpected. Commas must split selectors. | |
| 735 raise SelectorSyntaxError( | |
| 736 "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index), | |
| 737 self.pattern, | |
| 738 index | |
| 739 ) | |
| 740 sel.rel_type = rel_type | |
| 741 selectors[-1].relations.append(sel) | |
| 742 rel_type = ":" + WS_COMBINATOR | |
| 743 selectors.append(_Selector()) | |
| 744 else: | |
| 745 if has_selector: | |
| 746 # End the current selector and associate the leading combinator with this selector. | |
| 747 sel.rel_type = rel_type | |
| 748 selectors[-1].relations.append(sel) | |
| 749 elif rel_type[1:] != WS_COMBINATOR: | |
| 750 # It's impossible to have two whitespace combinators after each other as the patterns | |
| 751 # will gobble up trailing whitespace. It is also impossible to have a whitespace | |
| 752 # combinator after any other kind for the same reason. But we could have | |
| 753 # multiple non-whitespace combinators. So if the current combinator is not a whitespace, | |
| 754 # then we've hit the multiple combinator case, so we should fail. | |
| 755 raise SelectorSyntaxError( | |
| 756 'The multiple combinators at position {}'.format(index), | |
| 757 self.pattern, | |
| 758 index | |
| 759 ) | |
| 760 # Set the leading combinator for the next selector. | |
| 761 rel_type = ':' + combinator | |
| 762 sel = _Selector() | |
| 763 | |
| 764 has_selector = False | |
| 765 return has_selector, sel, rel_type | |
| 766 | |
| 767 def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index): | |
| 768 """Parse combinator tokens.""" | |
| 769 | |
| 770 combinator = m.group('relation').strip() | |
| 771 if not combinator: | |
| 772 combinator = WS_COMBINATOR | |
| 773 if not has_selector: | |
| 774 raise SelectorSyntaxError( | |
| 775 "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index), | |
| 776 self.pattern, | |
| 777 index | |
| 778 ) | |
| 779 | |
| 780 if combinator == COMMA_COMBINATOR: | |
| 781 if not sel.tag and not is_pseudo: | |
| 782 # Implied `*` | |
| 783 sel.tag = ct.SelectorTag('*', None) | |
| 784 sel.relations.extend(relations) | |
| 785 selectors.append(sel) | |
| 786 del relations[:] | |
| 787 else: | |
| 788 sel.relations.extend(relations) | |
| 789 sel.rel_type = combinator | |
| 790 del relations[:] | |
| 791 relations.append(sel) | |
| 792 sel = _Selector() | |
| 793 | |
| 794 has_selector = False | |
| 795 return has_selector, sel | |
| 796 | |
| 797 def parse_class_id(self, sel, m, has_selector): | |
| 798 """Parse HTML classes and ids.""" | |
| 799 | |
| 800 selector = m.group(0) | |
| 801 if selector.startswith('.'): | |
| 802 sel.classes.append(css_unescape(selector[1:])) | |
| 803 else: | |
| 804 sel.ids.append(css_unescape(selector[1:])) | |
| 805 has_selector = True | |
| 806 return has_selector | |
| 807 | |
| 808 def parse_pseudo_contains(self, sel, m, has_selector): | |
| 809 """Parse contains.""" | |
| 810 | |
| 811 pseudo = util.lower(css_unescape(m.group('name'))) | |
| 812 if pseudo == ":contains": | |
| 813 warnings.warn( | |
| 814 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", | |
| 815 FutureWarning | |
| 816 ) | |
| 817 contains_own = pseudo == ":-soup-contains-own" | |
| 818 values = css_unescape(m.group('values')) | |
| 819 patterns = [] | |
| 820 for token in RE_VALUES.finditer(values): | |
| 821 if token.group('split'): | |
| 822 continue | |
| 823 value = token.group('value') | |
| 824 if value.startswith(("'", '"')): | |
| 825 value = css_unescape(value[1:-1], True) | |
| 826 else: | |
| 827 value = css_unescape(value) | |
| 828 patterns.append(value) | |
| 829 sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own)) | |
| 830 has_selector = True | |
| 831 return has_selector | |
| 832 | |
| 833 def parse_pseudo_lang(self, sel, m, has_selector): | |
| 834 """Parse pseudo language.""" | |
| 835 | |
| 836 values = m.group('values') | |
| 837 patterns = [] | |
| 838 for token in RE_VALUES.finditer(values): | |
| 839 if token.group('split'): | |
| 840 continue | |
| 841 value = token.group('value') | |
| 842 if value.startswith(('"', "'")): | |
| 843 value = css_unescape(value[1:-1], True) | |
| 844 else: | |
| 845 value = css_unescape(value) | |
| 846 | |
| 847 patterns.append(value) | |
| 848 | |
| 849 sel.lang.append(ct.SelectorLang(patterns)) | |
| 850 has_selector = True | |
| 851 | |
| 852 return has_selector | |
| 853 | |
| 854 def parse_pseudo_dir(self, sel, m, has_selector): | |
| 855 """Parse pseudo direction.""" | |
| 856 | |
| 857 value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL | |
| 858 sel.flags |= value | |
| 859 has_selector = True | |
| 860 return has_selector | |
| 861 | |
| 862 def parse_selectors(self, iselector, index=0, flags=0): | |
| 863 """Parse selectors.""" | |
| 864 | |
| 865 sel = _Selector() | |
| 866 selectors = [] | |
| 867 has_selector = False | |
| 868 closed = False | |
| 869 relations = [] | |
| 870 rel_type = ":" + WS_COMBINATOR | |
| 871 is_open = bool(flags & FLG_OPEN) | |
| 872 is_pseudo = bool(flags & FLG_PSEUDO) | |
| 873 is_relative = bool(flags & FLG_RELATIVE) | |
| 874 is_not = bool(flags & FLG_NOT) | |
| 875 is_html = bool(flags & FLG_HTML) | |
| 876 is_default = bool(flags & FLG_DEFAULT) | |
| 877 is_indeterminate = bool(flags & FLG_INDETERMINATE) | |
| 878 is_in_range = bool(flags & FLG_IN_RANGE) | |
| 879 is_out_of_range = bool(flags & FLG_OUT_OF_RANGE) | |
| 880 is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN) | |
| 881 | |
| 882 if self.debug: # pragma: no cover | |
| 883 if is_pseudo: | |
| 884 print(' is_pseudo: True') | |
| 885 if is_open: | |
| 886 print(' is_open: True') | |
| 887 if is_relative: | |
| 888 print(' is_relative: True') | |
| 889 if is_not: | |
| 890 print(' is_not: True') | |
| 891 if is_html: | |
| 892 print(' is_html: True') | |
| 893 if is_default: | |
| 894 print(' is_default: True') | |
| 895 if is_indeterminate: | |
| 896 print(' is_indeterminate: True') | |
| 897 if is_in_range: | |
| 898 print(' is_in_range: True') | |
| 899 if is_out_of_range: | |
| 900 print(' is_out_of_range: True') | |
| 901 if is_placeholder_shown: | |
| 902 print(' is_placeholder_shown: True') | |
| 903 | |
| 904 if is_relative: | |
| 905 selectors.append(_Selector()) | |
| 906 | |
| 907 try: | |
| 908 while True: | |
| 909 key, m = next(iselector) | |
| 910 | |
| 911 # Handle parts | |
| 912 if key == "at_rule": | |
| 913 raise NotImplementedError("At-rules found at position {}".format(m.start(0))) | |
| 914 elif key == 'pseudo_class_custom': | |
| 915 has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) | |
| 916 elif key == 'pseudo_class': | |
| 917 has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) | |
| 918 elif key == 'pseudo_element': | |
| 919 raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) | |
| 920 elif key == 'pseudo_contains': | |
| 921 has_selector = self.parse_pseudo_contains(sel, m, has_selector) | |
| 922 elif key in ('pseudo_nth_type', 'pseudo_nth_child'): | |
| 923 has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector) | |
| 924 elif key == 'pseudo_lang': | |
| 925 has_selector = self.parse_pseudo_lang(sel, m, has_selector) | |
| 926 elif key == 'pseudo_dir': | |
| 927 has_selector = self.parse_pseudo_dir(sel, m, has_selector) | |
| 928 # Currently only supports HTML | |
| 929 is_html = True | |
| 930 elif key == 'pseudo_close': | |
| 931 if not has_selector: | |
| 932 raise SelectorSyntaxError( | |
| 933 "Expected a selector at postion {}".format(m.start(0)), | |
| 934 self.pattern, | |
| 935 m.start(0) | |
| 936 ) | |
| 937 if is_open: | |
| 938 closed = True | |
| 939 break | |
| 940 else: | |
| 941 raise SelectorSyntaxError( | |
| 942 "Unmatched pseudo-class close at postion {}".format(m.start(0)), | |
| 943 self.pattern, | |
| 944 m.start(0) | |
| 945 ) | |
| 946 elif key == 'combine': | |
| 947 if is_relative: | |
| 948 has_selector, sel, rel_type = self.parse_has_combinator( | |
| 949 sel, m, has_selector, selectors, rel_type, index | |
| 950 ) | |
| 951 else: | |
| 952 has_selector, sel = self.parse_combinator( | |
| 953 sel, m, has_selector, selectors, relations, is_pseudo, index | |
| 954 ) | |
| 955 elif key == 'attribute': | |
| 956 has_selector = self.parse_attribute_selector(sel, m, has_selector) | |
| 957 elif key == 'tag': | |
| 958 if has_selector: | |
| 959 raise SelectorSyntaxError( | |
| 960 "Tag name found at position {} instead of at the start".format(m.start(0)), | |
| 961 self.pattern, | |
| 962 m.start(0) | |
| 963 ) | |
| 964 has_selector = self.parse_tag_pattern(sel, m, has_selector) | |
| 965 elif key in ('class', 'id'): | |
| 966 has_selector = self.parse_class_id(sel, m, has_selector) | |
| 967 | |
| 968 index = m.end(0) | |
| 969 except StopIteration: | |
| 970 pass | |
| 971 | |
| 972 if is_open and not closed: | |
| 973 raise SelectorSyntaxError( | |
| 974 "Unclosed pseudo-class at position {}".format(index), | |
| 975 self.pattern, | |
| 976 index | |
| 977 ) | |
| 978 | |
| 979 if has_selector: | |
| 980 if not sel.tag and not is_pseudo: | |
| 981 # Implied `*` | |
| 982 sel.tag = ct.SelectorTag('*', None) | |
| 983 if is_relative: | |
| 984 sel.rel_type = rel_type | |
| 985 selectors[-1].relations.append(sel) | |
| 986 else: | |
| 987 sel.relations.extend(relations) | |
| 988 del relations[:] | |
| 989 selectors.append(sel) | |
| 990 else: | |
| 991 # We will always need to finish a selector when `:has()` is used as it leads with combining. | |
| 992 raise SelectorSyntaxError( | |
| 993 'Expected a selector at position {}'.format(index), | |
| 994 self.pattern, | |
| 995 index | |
| 996 ) | |
| 997 | |
| 998 # Some patterns require additional logic, such as default. We try to make these the | |
| 999 # last pattern, and append the appropriate flag to that selector which communicates | |
| 1000 # to the matcher what additional logic is required. | |
| 1001 if is_default: | |
| 1002 selectors[-1].flags = ct.SEL_DEFAULT | |
| 1003 if is_indeterminate: | |
| 1004 selectors[-1].flags = ct.SEL_INDETERMINATE | |
| 1005 if is_in_range: | |
| 1006 selectors[-1].flags = ct.SEL_IN_RANGE | |
| 1007 if is_out_of_range: | |
| 1008 selectors[-1].flags = ct.SEL_OUT_OF_RANGE | |
| 1009 if is_placeholder_shown: | |
| 1010 selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN | |
| 1011 | |
| 1012 return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html) | |
| 1013 | |
| 1014 def selector_iter(self, pattern): | |
| 1015 """Iterate selector tokens.""" | |
| 1016 | |
| 1017 # Ignore whitespace and comments at start and end of pattern | |
| 1018 m = RE_WS_BEGIN.search(pattern) | |
| 1019 index = m.end(0) if m else 0 | |
| 1020 m = RE_WS_END.search(pattern) | |
| 1021 end = (m.start(0) - 1) if m else (len(pattern) - 1) | |
| 1022 | |
| 1023 if self.debug: # pragma: no cover | |
| 1024 print('## PARSING: {!r}'.format(pattern)) | |
| 1025 while index <= end: | |
| 1026 m = None | |
| 1027 for v in self.css_tokens: | |
| 1028 m = v.match(pattern, index, self.flags) | |
| 1029 if m: | |
| 1030 name = v.get_name() | |
| 1031 if self.debug: # pragma: no cover | |
| 1032 print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) | |
| 1033 index = m.end(0) | |
| 1034 yield name, m | |
| 1035 break | |
| 1036 if m is None: | |
| 1037 c = pattern[index] | |
| 1038 # If the character represents the start of one of the known selector types, | |
| 1039 # throw an exception mentioning that the known selector type is in error; | |
| 1040 # otherwise, report the invalid character. | |
| 1041 if c == '[': | |
| 1042 msg = "Malformed attribute selector at position {}".format(index) | |
| 1043 elif c == '.': | |
| 1044 msg = "Malformed class selector at position {}".format(index) | |
| 1045 elif c == '#': | |
| 1046 msg = "Malformed id selector at position {}".format(index) | |
| 1047 elif c == ':': | |
| 1048 msg = "Malformed pseudo-class selector at position {}".format(index) | |
| 1049 else: | |
| 1050 msg = "Invalid character {!r} position {}".format(c, index) | |
| 1051 raise SelectorSyntaxError(msg, self.pattern, index) | |
| 1052 if self.debug: # pragma: no cover | |
| 1053 print('## END PARSING') | |
| 1054 | |
| 1055 def process_selectors(self, index=0, flags=0): | |
| 1056 """Process selectors.""" | |
| 1057 | |
| 1058 return self.parse_selectors(self.selector_iter(self.pattern), index, flags) | |
| 1059 | |
| 1060 | |
| 1061 # Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern) | |
| 1062 # A few patterns are order dependent as they use patterns previous compiled. | |
| 1063 | |
| 1064 # CSS pattern for `:link` and `:any-link` | |
| 1065 CSS_LINK = CSSParser( | |
| 1066 'html|*:is(a, area)[href]' | |
| 1067 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1068 # CSS pattern for `:checked` | |
| 1069 CSS_CHECKED = CSSParser( | |
| 1070 ''' | |
| 1071 html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected] | |
| 1072 ''' | |
| 1073 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1074 # CSS pattern for `:default` (must compile CSS_CHECKED first) | |
| 1075 CSS_DEFAULT = CSSParser( | |
| 1076 ''' | |
| 1077 :checked, | |
| 1078 | |
| 1079 /* | |
| 1080 This pattern must be at the end. | |
| 1081 Special logic is applied to the last selector. | |
| 1082 */ | |
| 1083 html|form html|*:is(button, input)[type="submit"] | |
| 1084 ''' | |
| 1085 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT) | |
| 1086 # CSS pattern for `:indeterminate` | |
| 1087 CSS_INDETERMINATE = CSSParser( | |
| 1088 ''' | |
| 1089 html|input[type="checkbox"][indeterminate], | |
| 1090 html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]), | |
| 1091 html|progress:not([value]), | |
| 1092 | |
| 1093 /* | |
| 1094 This pattern must be at the end. | |
| 1095 Special logic is applied to the last selector. | |
| 1096 */ | |
| 1097 html|input[type="radio"][name]:not([name='']):not([checked]) | |
| 1098 ''' | |
| 1099 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) | |
| 1100 # CSS pattern for `:disabled` | |
| 1101 CSS_DISABLED = CSSParser( | |
| 1102 ''' | |
| 1103 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], | |
| 1104 html|optgroup[disabled] > html|option, | |
| 1105 html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset), | |
| 1106 html|fieldset[disabled] > | |
| 1107 html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset) | |
| 1108 ''' | |
| 1109 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1110 # CSS pattern for `:enabled` | |
| 1111 CSS_ENABLED = CSSParser( | |
| 1112 ''' | |
| 1113 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) | |
| 1114 ''' | |
| 1115 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1116 # CSS pattern for `:required` | |
| 1117 CSS_REQUIRED = CSSParser( | |
| 1118 'html|*:is(input, textarea, select)[required]' | |
| 1119 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1120 # CSS pattern for `:optional` | |
| 1121 CSS_OPTIONAL = CSSParser( | |
| 1122 'html|*:is(input, textarea, select):not([required])' | |
| 1123 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1124 # CSS pattern for `:placeholder-shown` | |
| 1125 CSS_PLACEHOLDER_SHOWN = CSSParser( | |
| 1126 ''' | |
| 1127 html|input:is( | |
| 1128 :not([type]), | |
| 1129 [type=""], | |
| 1130 [type=text], | |
| 1131 [type=search], | |
| 1132 [type=url], | |
| 1133 [type=tel], | |
| 1134 [type=email], | |
| 1135 [type=password], | |
| 1136 [type=number] | |
| 1137 )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]), | |
| 1138 html|textarea[placeholder]:not([placeholder='']) | |
| 1139 ''' | |
| 1140 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) | |
| 1141 # CSS pattern default for `:nth-child` "of S" feature | |
| 1142 CSS_NTH_OF_S_DEFAULT = CSSParser( | |
| 1143 '*|*' | |
| 1144 ).process_selectors(flags=FLG_PSEUDO) | |
| 1145 # CSS pattern for `:read-write` (CSS_DISABLED must be compiled first) | |
| 1146 CSS_READ_WRITE = CSSParser( | |
| 1147 ''' | |
| 1148 html|*:is( | |
| 1149 textarea, | |
| 1150 input:is( | |
| 1151 :not([type]), | |
| 1152 [type=""], | |
| 1153 [type=text], | |
| 1154 [type=search], | |
| 1155 [type=url], | |
| 1156 [type=tel], | |
| 1157 [type=email], | |
| 1158 [type=number], | |
| 1159 [type=password], | |
| 1160 [type=date], | |
| 1161 [type=datetime-local], | |
| 1162 [type=month], | |
| 1163 [type=time], | |
| 1164 [type=week] | |
| 1165 ) | |
| 1166 ):not([readonly], :disabled), | |
| 1167 html|*:is([contenteditable=""], [contenteditable="true" i]) | |
| 1168 ''' | |
| 1169 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1170 # CSS pattern for `:read-only` | |
| 1171 CSS_READ_ONLY = CSSParser( | |
| 1172 ''' | |
| 1173 html|*:not(:read-write) | |
| 1174 ''' | |
| 1175 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1176 # CSS pattern for `:in-range` | |
| 1177 CSS_IN_RANGE = CSSParser( | |
| 1178 ''' | |
| 1179 html|input:is( | |
| 1180 [type="date"], | |
| 1181 [type="month"], | |
| 1182 [type="week"], | |
| 1183 [type="time"], | |
| 1184 [type="datetime-local"], | |
| 1185 [type="number"], | |
| 1186 [type="range"] | |
| 1187 ):is( | |
| 1188 [min], | |
| 1189 [max] | |
| 1190 ) | |
| 1191 ''' | |
| 1192 ).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML) | |
| 1193 # CSS pattern for `:out-of-range` | |
| 1194 CSS_OUT_OF_RANGE = CSSParser( | |
| 1195 ''' | |
| 1196 html|input:is( | |
| 1197 [type="date"], | |
| 1198 [type="month"], | |
| 1199 [type="week"], | |
| 1200 [type="time"], | |
| 1201 [type="datetime-local"], | |
| 1202 [type="number"], | |
| 1203 [type="range"] | |
| 1204 ):is( | |
| 1205 [min], | |
| 1206 [max] | |
| 1207 ) | |
| 1208 ''' | |
| 1209 ).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML) |
