Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/soupsieve/css_parser.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 """CSS selector parser.""" | |
| 2 import re | |
| 3 from functools import lru_cache | |
| 4 from . import util | |
| 5 from . import css_match as cm | |
| 6 from . import css_types as ct | |
| 7 from .util import SelectorSyntaxError | |
| 8 | |
| 9 UNICODE_REPLACEMENT_CHAR = 0xFFFD | |
| 10 | |
| 11 # Simple pseudo classes that take no parameters | |
| 12 PSEUDO_SIMPLE = { | |
| 13 ":any-link", | |
| 14 ":empty", | |
| 15 ":first-child", | |
| 16 ":first-of-type", | |
| 17 ":in-range", | |
| 18 ":out-of-range", | |
| 19 ":last-child", | |
| 20 ":last-of-type", | |
| 21 ":link", | |
| 22 ":only-child", | |
| 23 ":only-of-type", | |
| 24 ":root", | |
| 25 ':checked', | |
| 26 ':default', | |
| 27 ':disabled', | |
| 28 ':enabled', | |
| 29 ':indeterminate', | |
| 30 ':optional', | |
| 31 ':placeholder-shown', | |
| 32 ':read-only', | |
| 33 ':read-write', | |
| 34 ':required', | |
| 35 ':scope', | |
| 36 ':defined' | |
| 37 } | |
| 38 | |
| 39 # Supported, simple pseudo classes that match nothing in the Soup Sieve environment | |
| 40 PSEUDO_SIMPLE_NO_MATCH = { | |
| 41 ':active', | |
| 42 ':current', | |
| 43 ':focus', | |
| 44 ':focus-visible', | |
| 45 ':focus-within', | |
| 46 ':future', | |
| 47 ':host', | |
| 48 ':hover', | |
| 49 ':local-link', | |
| 50 ':past', | |
| 51 ':paused', | |
| 52 ':playing', | |
| 53 ':target', | |
| 54 ':target-within', | |
| 55 ':user-invalid', | |
| 56 ':visited' | |
| 57 } | |
| 58 | |
| 59 # Complex pseudo classes that take selector lists | |
| 60 PSEUDO_COMPLEX = { | |
| 61 ':contains', | |
| 62 ':has', | |
| 63 ':is', | |
| 64 ':matches', | |
| 65 ':not', | |
| 66 ':where' | |
| 67 } | |
| 68 | |
| 69 PSEUDO_COMPLEX_NO_MATCH = { | |
| 70 ':current', | |
| 71 ':host', | |
| 72 ':host-context' | |
| 73 } | |
| 74 | |
| 75 # Complex pseudo classes that take very specific parameters and are handled special | |
| 76 PSEUDO_SPECIAL = { | |
| 77 ':dir', | |
| 78 ':lang', | |
| 79 ':nth-child', | |
| 80 ':nth-last-child', | |
| 81 ':nth-last-of-type', | |
| 82 ':nth-of-type' | |
| 83 } | |
| 84 | |
| 85 PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL | |
| 86 | |
| 87 # Sub-patterns parts | |
| 88 # Whitespace | |
| 89 NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' | |
| 90 WS = r'(?:[ \t]|{})'.format(NEWLINE) | |
| 91 # Comments | |
| 92 COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' | |
| 93 # Whitespace with comments included | |
| 94 WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) | |
| 95 # CSS escapes | |
| 96 CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) | |
| 97 CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) | |
| 98 # CSS Identifier | |
| 99 IDENTIFIER = r''' | |
| 100 (?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) | |
| 101 (?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) | |
| 102 '''.format(esc=CSS_ESCAPES) | |
| 103 # `nth` content | |
| 104 NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) | |
| 105 # Value: quoted string or identifier | |
| 106 VALUE = r''' | |
| 107 (?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+) | |
| 108 '''.format(nl=NEWLINE, ident=IDENTIFIER) | |
| 109 # Attribute value comparison. `!=` is handled special as it is non-standard. | |
| 110 ATTR = r''' | |
| 111 (?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\] | |
| 112 '''.format(ws=WSC, value=VALUE) | |
| 113 | |
| 114 # Selector patterns | |
| 115 # IDs (`#id`) | |
| 116 PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) | |
| 117 # Classes (`.class`) | |
| 118 PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) | |
| 119 # Prefix:Tag (`prefix|tag`) | |
| 120 PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER) | |
| 121 # Attributes (`[attr]`, `[attr=value]`, etc.) | |
| 122 PAT_ATTR = r''' | |
| 123 \[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr} | |
| 124 '''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) | |
| 125 # Pseudo class (`:pseudo-class`, `:pseudo-class(`) | |
| 126 PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) | |
| 127 # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. | |
| 128 PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) | |
| 129 # Custom pseudo class (`:--custom-pseudo`) | |
| 130 PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER) | |
| 131 # Closing pseudo group (`)`) | |
| 132 PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) | |
| 133 # Pseudo element (`::pseudo-element`) | |
| 134 PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) | |
| 135 # At rule (`@page`, etc.) (not supported) | |
| 136 PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) | |
| 137 # Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) | |
| 138 PAT_PSEUDO_NTH_CHILD = r''' | |
| 139 (?P<pseudo_nth_child>{name} | |
| 140 (?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) | |
| 141 '''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) | |
| 142 # Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) | |
| 143 PAT_PSEUDO_NTH_TYPE = r''' | |
| 144 (?P<pseudo_nth_type>{name} | |
| 145 (?P<nth_type>{nth}|even|odd)){ws}*\) | |
| 146 '''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) | |
| 147 # Pseudo class language (`:lang("*-de", en)`) | |
| 148 PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( | |
| 149 name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE | |
| 150 ) | |
| 151 # Pseudo class direction (`:dir(ltr)`) | |
| 152 PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) | |
| 153 # Combining characters (`>`, `~`, ` `, `+`, `,`) | |
| 154 PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) | |
| 155 # Extra: Contains (`:contains(text)`) | |
| 156 PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( | |
| 157 name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE | |
| 158 ) | |
| 159 | |
| 160 # Regular expressions | |
| 161 # CSS escape pattern | |
| 162 RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) | |
| 163 RE_CSS_STR_ESC = re.compile( | |
| 164 r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I | |
| 165 ) | |
| 166 # Pattern to break up `nth` specifiers | |
| 167 RE_NTH = re.compile( | |
| 168 r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC), | |
| 169 re.I | |
| 170 ) | |
| 171 # Pattern to iterate multiple values. | |
| 172 RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) | |
| 173 # Whitespace checks | |
| 174 RE_WS = re.compile(WS) | |
| 175 RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) | |
| 176 RE_WS_END = re.compile('{}*$'.format(WSC)) | |
| 177 RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) | |
| 178 | |
| 179 # Constants | |
| 180 # List split token | |
| 181 COMMA_COMBINATOR = ',' | |
| 182 # Relation token for descendant | |
| 183 WS_COMBINATOR = " " | |
| 184 | |
| 185 # Parse flags | |
| 186 FLG_PSEUDO = 0x01 | |
| 187 FLG_NOT = 0x02 | |
| 188 FLG_RELATIVE = 0x04 | |
| 189 FLG_DEFAULT = 0x08 | |
| 190 FLG_HTML = 0x10 | |
| 191 FLG_INDETERMINATE = 0x20 | |
| 192 FLG_OPEN = 0x40 | |
| 193 FLG_IN_RANGE = 0x80 | |
| 194 FLG_OUT_OF_RANGE = 0x100 | |
| 195 FLG_PLACEHOLDER_SHOWN = 0x200 | |
| 196 | |
| 197 # Maximum cached patterns to store | |
| 198 _MAXCACHE = 500 | |
| 199 | |
| 200 | |
| 201 @lru_cache(maxsize=_MAXCACHE) | |
| 202 def _cached_css_compile(pattern, namespaces, custom, flags): | |
| 203 """Cached CSS compile.""" | |
| 204 | |
| 205 custom_selectors = process_custom(custom) | |
| 206 return cm.SoupSieve( | |
| 207 pattern, | |
| 208 CSSParser(pattern, custom=custom_selectors, flags=flags).process_selectors(), | |
| 209 namespaces, | |
| 210 custom, | |
| 211 flags | |
| 212 ) | |
| 213 | |
| 214 | |
| 215 def _purge_cache(): | |
| 216 """Purge the cache.""" | |
| 217 | |
| 218 _cached_css_compile.cache_clear() | |
| 219 | |
| 220 | |
| 221 def process_custom(custom): | |
| 222 """Process custom.""" | |
| 223 | |
| 224 custom_selectors = {} | |
| 225 if custom is not None: | |
| 226 for key, value in custom.items(): | |
| 227 name = util.lower(key) | |
| 228 if RE_CUSTOM.match(name) is None: | |
| 229 raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) | |
| 230 if name in custom_selectors: | |
| 231 raise KeyError("The custom selector '{}' has already been registered".format(name)) | |
| 232 custom_selectors[css_unescape(name)] = value | |
| 233 return custom_selectors | |
| 234 | |
| 235 | |
| 236 def css_unescape(content, string=False): | |
| 237 """ | |
| 238 Unescape CSS value. | |
| 239 | |
| 240 Strings allow for spanning the value on multiple strings by escaping a new line. | |
| 241 """ | |
| 242 | |
| 243 def replace(m): | |
| 244 """Replace with the appropriate substitute.""" | |
| 245 | |
| 246 if m.group(1): | |
| 247 codepoint = int(m.group(1)[1:], 16) | |
| 248 if codepoint == 0: | |
| 249 codepoint = UNICODE_REPLACEMENT_CHAR | |
| 250 value = chr(codepoint) | |
| 251 elif m.group(2): | |
| 252 value = m.group(2)[1:] | |
| 253 elif m.group(3): | |
| 254 value = '\ufffd' | |
| 255 else: | |
| 256 value = '' | |
| 257 | |
| 258 return value | |
| 259 | |
| 260 return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content) | |
| 261 | |
| 262 | |
| 263 def escape(ident): | |
| 264 """Escape identifier.""" | |
| 265 | |
| 266 string = [] | |
| 267 length = len(ident) | |
| 268 start_dash = length > 0 and ident[0] == '-' | |
| 269 if length == 1 and start_dash: | |
| 270 # Need to escape identifier that is a single `-` with no other characters | |
| 271 string.append('\\{}'.format(ident)) | |
| 272 else: | |
| 273 for index, c in enumerate(ident): | |
| 274 codepoint = ord(c) | |
| 275 if codepoint == 0x00: | |
| 276 string.append('\ufffd') | |
| 277 elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: | |
| 278 string.append('\\{:x} '.format(codepoint)) | |
| 279 elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): | |
| 280 string.append('\\{:x} '.format(codepoint)) | |
| 281 elif ( | |
| 282 codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or | |
| 283 (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) | |
| 284 ): | |
| 285 string.append(c) | |
| 286 else: | |
| 287 string.append('\\{}'.format(c)) | |
| 288 return ''.join(string) | |
| 289 | |
| 290 | |
| 291 class SelectorPattern(object): | |
| 292 """Selector pattern.""" | |
| 293 | |
| 294 def __init__(self, name, pattern): | |
| 295 """Initialize.""" | |
| 296 | |
| 297 self.name = name | |
| 298 self.re_pattern = re.compile(pattern, re.I | re.X | re.U) | |
| 299 | |
| 300 def get_name(self): | |
| 301 """Get name.""" | |
| 302 | |
| 303 return self.name | |
| 304 | |
| 305 def match(self, selector, index, flags): | |
| 306 """Match the selector.""" | |
| 307 | |
| 308 return self.re_pattern.match(selector, index) | |
| 309 | |
| 310 | |
| 311 class SpecialPseudoPattern(SelectorPattern): | |
| 312 """Selector pattern.""" | |
| 313 | |
| 314 def __init__(self, patterns): | |
| 315 """Initialize.""" | |
| 316 | |
| 317 self.patterns = {} | |
| 318 for p in patterns: | |
| 319 name = p[0] | |
| 320 pattern = p[3](name, p[2]) | |
| 321 for pseudo in p[1]: | |
| 322 self.patterns[pseudo] = pattern | |
| 323 | |
| 324 self.matched_name = None | |
| 325 self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) | |
| 326 | |
| 327 def get_name(self): | |
| 328 """Get name.""" | |
| 329 | |
| 330 return self.matched_name.get_name() | |
| 331 | |
| 332 def match(self, selector, index, flags): | |
| 333 """Match the selector.""" | |
| 334 | |
| 335 pseudo = None | |
| 336 m = self.re_pseudo_name.match(selector, index) | |
| 337 if m: | |
| 338 name = util.lower(css_unescape(m.group('name'))) | |
| 339 pattern = self.patterns.get(name) | |
| 340 if pattern: | |
| 341 pseudo = pattern.match(selector, index, flags) | |
| 342 if pseudo: | |
| 343 self.matched_name = pattern | |
| 344 | |
| 345 return pseudo | |
| 346 | |
| 347 | |
| 348 class _Selector(object): | |
| 349 """ | |
| 350 Intermediate selector class. | |
| 351 | |
| 352 This stores selector data for a compound selector as we are acquiring them. | |
| 353 Once we are done collecting the data for a compound selector, we freeze | |
| 354 the data in an object that can be pickled and hashed. | |
| 355 """ | |
| 356 | |
| 357 def __init__(self, **kwargs): | |
| 358 """Initialize.""" | |
| 359 | |
| 360 self.tag = kwargs.get('tag', None) | |
| 361 self.ids = kwargs.get('ids', []) | |
| 362 self.classes = kwargs.get('classes', []) | |
| 363 self.attributes = kwargs.get('attributes', []) | |
| 364 self.nth = kwargs.get('nth', []) | |
| 365 self.selectors = kwargs.get('selectors', []) | |
| 366 self.relations = kwargs.get('relations', []) | |
| 367 self.rel_type = kwargs.get('rel_type', None) | |
| 368 self.contains = kwargs.get('contains', []) | |
| 369 self.lang = kwargs.get('lang', []) | |
| 370 self.flags = kwargs.get('flags', 0) | |
| 371 self.no_match = kwargs.get('no_match', False) | |
| 372 | |
| 373 def _freeze_relations(self, relations): | |
| 374 """Freeze relation.""" | |
| 375 | |
| 376 if relations: | |
| 377 sel = relations[0] | |
| 378 sel.relations.extend(relations[1:]) | |
| 379 return ct.SelectorList([sel.freeze()]) | |
| 380 else: | |
| 381 return ct.SelectorList() | |
| 382 | |
| 383 def freeze(self): | |
| 384 """Freeze self.""" | |
| 385 | |
| 386 if self.no_match: | |
| 387 return ct.SelectorNull() | |
| 388 else: | |
| 389 return ct.Selector( | |
| 390 self.tag, | |
| 391 tuple(self.ids), | |
| 392 tuple(self.classes), | |
| 393 tuple(self.attributes), | |
| 394 tuple(self.nth), | |
| 395 tuple(self.selectors), | |
| 396 self._freeze_relations(self.relations), | |
| 397 self.rel_type, | |
| 398 tuple(self.contains), | |
| 399 tuple(self.lang), | |
| 400 self.flags | |
| 401 ) | |
| 402 | |
| 403 def __str__(self): # pragma: no cover | |
| 404 """String representation.""" | |
| 405 | |
| 406 return ( | |
| 407 '_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' | |
| 408 'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' | |
| 409 ).format( | |
| 410 self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, | |
| 411 self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match | |
| 412 ) | |
| 413 | |
| 414 __repr__ = __str__ | |
| 415 | |
| 416 | |
| 417 class CSSParser(object): | |
| 418 """Parse CSS selectors.""" | |
| 419 | |
| 420 css_tokens = ( | |
| 421 SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), | |
| 422 SpecialPseudoPattern( | |
| 423 ( | |
| 424 ("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS, SelectorPattern), | |
| 425 ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern), | |
| 426 ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern), | |
| 427 ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern), | |
| 428 ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern) | |
| 429 ) | |
| 430 ), | |
| 431 SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), | |
| 432 SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), | |
| 433 SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), | |
| 434 SelectorPattern("at_rule", PAT_AT_RULE), | |
| 435 SelectorPattern("id", PAT_ID), | |
| 436 SelectorPattern("class", PAT_CLASS), | |
| 437 SelectorPattern("tag", PAT_TAG), | |
| 438 SelectorPattern("attribute", PAT_ATTR), | |
| 439 SelectorPattern("combine", PAT_COMBINE) | |
| 440 ) | |
| 441 | |
| 442 def __init__(self, selector, custom=None, flags=0): | |
| 443 """Initialize.""" | |
| 444 | |
| 445 self.pattern = selector.replace('\x00', '\ufffd') | |
| 446 self.flags = flags | |
| 447 self.debug = self.flags & util.DEBUG | |
| 448 self.custom = {} if custom is None else custom | |
| 449 | |
| 450 def parse_attribute_selector(self, sel, m, has_selector): | |
| 451 """Create attribute selector from the returned regex match.""" | |
| 452 | |
| 453 inverse = False | |
| 454 op = m.group('cmp') | |
| 455 case = util.lower(m.group('case')) if m.group('case') else None | |
| 456 ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else '' | |
| 457 attr = css_unescape(m.group('attr_name')) | |
| 458 is_type = False | |
| 459 pattern2 = None | |
| 460 | |
| 461 if case: | |
| 462 flags = re.I if case == 'i' else 0 | |
| 463 elif util.lower(attr) == 'type': | |
| 464 flags = re.I | |
| 465 is_type = True | |
| 466 else: | |
| 467 flags = 0 | |
| 468 | |
| 469 if op: | |
| 470 if m.group('value').startswith(('"', "'")): | |
| 471 value = css_unescape(m.group('value')[1:-1], True) | |
| 472 else: | |
| 473 value = css_unescape(m.group('value')) | |
| 474 else: | |
| 475 value = None | |
| 476 if not op: | |
| 477 # Attribute name | |
| 478 pattern = None | |
| 479 elif op.startswith('^'): | |
| 480 # Value start with | |
| 481 pattern = re.compile(r'^%s.*' % re.escape(value), flags) | |
| 482 elif op.startswith('$'): | |
| 483 # Value ends with | |
| 484 pattern = re.compile(r'.*?%s$' % re.escape(value), flags) | |
| 485 elif op.startswith('*'): | |
| 486 # Value contains | |
| 487 pattern = re.compile(r'.*?%s.*' % re.escape(value), flags) | |
| 488 elif op.startswith('~'): | |
| 489 # Value contains word within space separated list | |
| 490 # `~=` should match nothing if it is empty or contains whitespace, | |
| 491 # so if either of these cases is present, use `[^\s\S]` which cannot be matched. | |
| 492 value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value) | |
| 493 pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags) | |
| 494 elif op.startswith('|'): | |
| 495 # Value starts with word in dash separated list | |
| 496 pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags) | |
| 497 else: | |
| 498 # Value matches | |
| 499 pattern = re.compile(r'^%s$' % re.escape(value), flags) | |
| 500 if op.startswith('!'): | |
| 501 # Equivalent to `:not([attr=value])` | |
| 502 inverse = True | |
| 503 if is_type and pattern: | |
| 504 pattern2 = re.compile(pattern.pattern) | |
| 505 | |
| 506 # Append the attribute selector | |
| 507 sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2) | |
| 508 if inverse: | |
| 509 # If we are using `!=`, we need to nest the pattern under a `:not()`. | |
| 510 sub_sel = _Selector() | |
| 511 sub_sel.attributes.append(sel_attr) | |
| 512 not_list = ct.SelectorList([sub_sel.freeze()], True, False) | |
| 513 sel.selectors.append(not_list) | |
| 514 else: | |
| 515 sel.attributes.append(sel_attr) | |
| 516 | |
| 517 has_selector = True | |
| 518 return has_selector | |
| 519 | |
| 520 def parse_tag_pattern(self, sel, m, has_selector): | |
| 521 """Parse tag pattern from regex match.""" | |
| 522 | |
| 523 prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None | |
| 524 tag = css_unescape(m.group('tag_name')) | |
| 525 sel.tag = ct.SelectorTag(tag, prefix) | |
| 526 has_selector = True | |
| 527 return has_selector | |
| 528 | |
| 529 def parse_pseudo_class_custom(self, sel, m, has_selector): | |
| 530 """ | |
| 531 Parse custom pseudo class alias. | |
| 532 | |
| 533 Compile custom selectors as we need them. When compiling a custom selector, | |
| 534 set it to `None` in the dictionary so we can avoid an infinite loop. | |
| 535 """ | |
| 536 | |
| 537 pseudo = util.lower(css_unescape(m.group('name'))) | |
| 538 selector = self.custom.get(pseudo) | |
| 539 if selector is None: | |
| 540 raise SelectorSyntaxError( | |
| 541 "Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)), | |
| 542 self.pattern, | |
| 543 m.end(0) | |
| 544 ) | |
| 545 | |
| 546 if not isinstance(selector, ct.SelectorList): | |
| 547 self.custom[pseudo] = None | |
| 548 selector = CSSParser( | |
| 549 selector, custom=self.custom, flags=self.flags | |
| 550 ).process_selectors(flags=FLG_PSEUDO) | |
| 551 self.custom[pseudo] = selector | |
| 552 | |
| 553 sel.selectors.append(selector) | |
| 554 has_selector = True | |
| 555 return has_selector | |
| 556 | |
| 557 def parse_pseudo_class(self, sel, m, has_selector, iselector, is_html): | |
| 558 """Parse pseudo class.""" | |
| 559 | |
| 560 complex_pseudo = False | |
| 561 pseudo = util.lower(css_unescape(m.group('name'))) | |
| 562 if m.group('open'): | |
| 563 complex_pseudo = True | |
| 564 if complex_pseudo and pseudo in PSEUDO_COMPLEX: | |
| 565 has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0)) | |
| 566 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE: | |
| 567 if pseudo == ':root': | |
| 568 sel.flags |= ct.SEL_ROOT | |
| 569 elif pseudo == ':defined': | |
| 570 sel.flags |= ct.SEL_DEFINED | |
| 571 is_html = True | |
| 572 elif pseudo == ':scope': | |
| 573 sel.flags |= ct.SEL_SCOPE | |
| 574 elif pseudo == ':empty': | |
| 575 sel.flags |= ct.SEL_EMPTY | |
| 576 elif pseudo in (':link', ':any-link'): | |
| 577 sel.selectors.append(CSS_LINK) | |
| 578 elif pseudo == ':checked': | |
| 579 sel.selectors.append(CSS_CHECKED) | |
| 580 elif pseudo == ':default': | |
| 581 sel.selectors.append(CSS_DEFAULT) | |
| 582 elif pseudo == ':indeterminate': | |
| 583 sel.selectors.append(CSS_INDETERMINATE) | |
| 584 elif pseudo == ":disabled": | |
| 585 sel.selectors.append(CSS_DISABLED) | |
| 586 elif pseudo == ":enabled": | |
| 587 sel.selectors.append(CSS_ENABLED) | |
| 588 elif pseudo == ":required": | |
| 589 sel.selectors.append(CSS_REQUIRED) | |
| 590 elif pseudo == ":optional": | |
| 591 sel.selectors.append(CSS_OPTIONAL) | |
| 592 elif pseudo == ":read-only": | |
| 593 sel.selectors.append(CSS_READ_ONLY) | |
| 594 elif pseudo == ":read-write": | |
| 595 sel.selectors.append(CSS_READ_WRITE) | |
| 596 elif pseudo == ":in-range": | |
| 597 sel.selectors.append(CSS_IN_RANGE) | |
| 598 elif pseudo == ":out-of-range": | |
| 599 sel.selectors.append(CSS_OUT_OF_RANGE) | |
| 600 elif pseudo == ":placeholder-shown": | |
| 601 sel.selectors.append(CSS_PLACEHOLDER_SHOWN) | |
| 602 elif pseudo == ':first-child': | |
| 603 sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList())) | |
| 604 elif pseudo == ':last-child': | |
| 605 sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())) | |
| 606 elif pseudo == ':first-of-type': | |
| 607 sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList())) | |
| 608 elif pseudo == ':last-of-type': | |
| 609 sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())) | |
| 610 elif pseudo == ':only-child': | |
| 611 sel.nth.extend( | |
| 612 [ | |
| 613 ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()), | |
| 614 ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()) | |
| 615 ] | |
| 616 ) | |
| 617 elif pseudo == ':only-of-type': | |
| 618 sel.nth.extend( | |
| 619 [ | |
| 620 ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()), | |
| 621 ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()) | |
| 622 ] | |
| 623 ) | |
| 624 has_selector = True | |
| 625 elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH: | |
| 626 self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) | |
| 627 sel.no_match = True | |
| 628 has_selector = True | |
| 629 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH: | |
| 630 sel.no_match = True | |
| 631 has_selector = True | |
| 632 elif pseudo in PSEUDO_SUPPORTED: | |
| 633 raise SelectorSyntaxError( | |
| 634 "Invalid syntax for pseudo class '{}'".format(pseudo), | |
| 635 self.pattern, | |
| 636 m.start(0) | |
| 637 ) | |
| 638 else: | |
| 639 raise NotImplementedError( | |
| 640 "'{}' pseudo-class is not implemented at this time".format(pseudo) | |
| 641 ) | |
| 642 | |
| 643 return has_selector, is_html | |
| 644 | |
| 645 def parse_pseudo_nth(self, sel, m, has_selector, iselector): | |
| 646 """Parse `nth` pseudo.""" | |
| 647 | |
| 648 mdict = m.groupdict() | |
| 649 if mdict.get('pseudo_nth_child'): | |
| 650 postfix = '_child' | |
| 651 else: | |
| 652 postfix = '_type' | |
| 653 mdict['name'] = util.lower(css_unescape(mdict['name'])) | |
| 654 content = util.lower(mdict.get('nth' + postfix)) | |
| 655 if content == 'even': | |
| 656 # 2n | |
| 657 s1 = 2 | |
| 658 s2 = 0 | |
| 659 var = True | |
| 660 elif content == 'odd': | |
| 661 # 2n+1 | |
| 662 s1 = 2 | |
| 663 s2 = 1 | |
| 664 var = True | |
| 665 else: | |
| 666 nth_parts = RE_NTH.match(content) | |
| 667 s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else '' | |
| 668 a = nth_parts.group('a') | |
| 669 var = a.endswith('n') | |
| 670 if a.startswith('n'): | |
| 671 s1 += '1' | |
| 672 elif var: | |
| 673 s1 += a[:-1] | |
| 674 else: | |
| 675 s1 += a | |
| 676 s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else '' | |
| 677 if nth_parts.group('b'): | |
| 678 s2 += nth_parts.group('b') | |
| 679 else: | |
| 680 s2 = '0' | |
| 681 s1 = int(s1, 10) | |
| 682 s2 = int(s2, 10) | |
| 683 | |
| 684 pseudo_sel = mdict['name'] | |
| 685 if postfix == '_child': | |
| 686 if m.group('of'): | |
| 687 # Parse the rest of `of S`. | |
| 688 nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) | |
| 689 else: | |
| 690 # Use default `*|*` for `of S`. | |
| 691 nth_sel = CSS_NTH_OF_S_DEFAULT | |
| 692 if pseudo_sel == ':nth-child': | |
| 693 sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel)) | |
| 694 elif pseudo_sel == ':nth-last-child': | |
| 695 sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel)) | |
| 696 else: | |
| 697 if pseudo_sel == ':nth-of-type': | |
| 698 sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList())) | |
| 699 elif pseudo_sel == ':nth-last-of-type': | |
| 700 sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList())) | |
| 701 has_selector = True | |
| 702 return has_selector | |
| 703 | |
| 704 def parse_pseudo_open(self, sel, name, has_selector, iselector, index): | |
| 705 """Parse pseudo with opening bracket.""" | |
| 706 | |
| 707 flags = FLG_PSEUDO | FLG_OPEN | |
| 708 if name == ':not': | |
| 709 flags |= FLG_NOT | |
| 710 if name == ':has': | |
| 711 flags |= FLG_RELATIVE | |
| 712 | |
| 713 sel.selectors.append(self.parse_selectors(iselector, index, flags)) | |
| 714 has_selector = True | |
| 715 return has_selector | |
| 716 | |
| 717 def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index): | |
| 718 """Parse combinator tokens.""" | |
| 719 | |
| 720 combinator = m.group('relation').strip() | |
| 721 if not combinator: | |
| 722 combinator = WS_COMBINATOR | |
| 723 if combinator == COMMA_COMBINATOR: | |
| 724 if not has_selector: | |
| 725 # If we've not captured any selector parts, the comma is either at the beginning of the pattern | |
| 726 # or following another comma, both of which are unexpected. Commas must split selectors. | |
| 727 raise SelectorSyntaxError( | |
| 728 "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index), | |
| 729 self.pattern, | |
| 730 index | |
| 731 ) | |
| 732 sel.rel_type = rel_type | |
| 733 selectors[-1].relations.append(sel) | |
| 734 rel_type = ":" + WS_COMBINATOR | |
| 735 selectors.append(_Selector()) | |
| 736 else: | |
| 737 if has_selector: | |
| 738 # End the current selector and associate the leading combinator with this selector. | |
| 739 sel.rel_type = rel_type | |
| 740 selectors[-1].relations.append(sel) | |
| 741 elif rel_type[1:] != WS_COMBINATOR: | |
| 742 # It's impossible to have two whitespace combinators after each other as the patterns | |
| 743 # will gobble up trailing whitespace. It is also impossible to have a whitespace | |
| 744 # combinator after any other kind for the same reason. But we could have | |
| 745 # multiple non-whitespace combinators. So if the current combinator is not a whitespace, | |
| 746 # then we've hit the multiple combinator case, so we should fail. | |
| 747 raise SelectorSyntaxError( | |
| 748 'The multiple combinators at position {}'.format(index), | |
| 749 self.pattern, | |
| 750 index | |
| 751 ) | |
| 752 # Set the leading combinator for the next selector. | |
| 753 rel_type = ':' + combinator | |
| 754 sel = _Selector() | |
| 755 | |
| 756 has_selector = False | |
| 757 return has_selector, sel, rel_type | |
| 758 | |
| 759 def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index): | |
| 760 """Parse combinator tokens.""" | |
| 761 | |
| 762 combinator = m.group('relation').strip() | |
| 763 if not combinator: | |
| 764 combinator = WS_COMBINATOR | |
| 765 if not has_selector: | |
| 766 raise SelectorSyntaxError( | |
| 767 "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index), | |
| 768 self.pattern, | |
| 769 index | |
| 770 ) | |
| 771 | |
| 772 if combinator == COMMA_COMBINATOR: | |
| 773 if not sel.tag and not is_pseudo: | |
| 774 # Implied `*` | |
| 775 sel.tag = ct.SelectorTag('*', None) | |
| 776 sel.relations.extend(relations) | |
| 777 selectors.append(sel) | |
| 778 del relations[:] | |
| 779 else: | |
| 780 sel.relations.extend(relations) | |
| 781 sel.rel_type = combinator | |
| 782 del relations[:] | |
| 783 relations.append(sel) | |
| 784 sel = _Selector() | |
| 785 | |
| 786 has_selector = False | |
| 787 return has_selector, sel | |
| 788 | |
| 789 def parse_class_id(self, sel, m, has_selector): | |
| 790 """Parse HTML classes and ids.""" | |
| 791 | |
| 792 selector = m.group(0) | |
| 793 if selector.startswith('.'): | |
| 794 sel.classes.append(css_unescape(selector[1:])) | |
| 795 else: | |
| 796 sel.ids.append(css_unescape(selector[1:])) | |
| 797 has_selector = True | |
| 798 return has_selector | |
| 799 | |
| 800 def parse_pseudo_contains(self, sel, m, has_selector): | |
| 801 """Parse contains.""" | |
| 802 | |
| 803 values = m.group('values') | |
| 804 patterns = [] | |
| 805 for token in RE_VALUES.finditer(values): | |
| 806 if token.group('split'): | |
| 807 continue | |
| 808 value = token.group('value') | |
| 809 if value.startswith(("'", '"')): | |
| 810 value = css_unescape(value[1:-1], True) | |
| 811 else: | |
| 812 value = css_unescape(value) | |
| 813 patterns.append(value) | |
| 814 sel.contains.append(ct.SelectorContains(tuple(patterns))) | |
| 815 has_selector = True | |
| 816 return has_selector | |
| 817 | |
| 818 def parse_pseudo_lang(self, sel, m, has_selector): | |
| 819 """Parse pseudo language.""" | |
| 820 | |
| 821 values = m.group('values') | |
| 822 patterns = [] | |
| 823 for token in RE_VALUES.finditer(values): | |
| 824 if token.group('split'): | |
| 825 continue | |
| 826 value = token.group('value') | |
| 827 if value.startswith(('"', "'")): | |
| 828 value = css_unescape(value[1:-1], True) | |
| 829 else: | |
| 830 value = css_unescape(value) | |
| 831 | |
| 832 patterns.append(value) | |
| 833 | |
| 834 sel.lang.append(ct.SelectorLang(patterns)) | |
| 835 has_selector = True | |
| 836 | |
| 837 return has_selector | |
| 838 | |
| 839 def parse_pseudo_dir(self, sel, m, has_selector): | |
| 840 """Parse pseudo direction.""" | |
| 841 | |
| 842 value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL | |
| 843 sel.flags |= value | |
| 844 has_selector = True | |
| 845 return has_selector | |
| 846 | |
| 847 def parse_selectors(self, iselector, index=0, flags=0): | |
| 848 """Parse selectors.""" | |
| 849 | |
| 850 sel = _Selector() | |
| 851 selectors = [] | |
| 852 has_selector = False | |
| 853 closed = False | |
| 854 relations = [] | |
| 855 rel_type = ":" + WS_COMBINATOR | |
| 856 is_open = bool(flags & FLG_OPEN) | |
| 857 is_pseudo = bool(flags & FLG_PSEUDO) | |
| 858 is_relative = bool(flags & FLG_RELATIVE) | |
| 859 is_not = bool(flags & FLG_NOT) | |
| 860 is_html = bool(flags & FLG_HTML) | |
| 861 is_default = bool(flags & FLG_DEFAULT) | |
| 862 is_indeterminate = bool(flags & FLG_INDETERMINATE) | |
| 863 is_in_range = bool(flags & FLG_IN_RANGE) | |
| 864 is_out_of_range = bool(flags & FLG_OUT_OF_RANGE) | |
| 865 is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN) | |
| 866 | |
| 867 if self.debug: # pragma: no cover | |
| 868 if is_pseudo: | |
| 869 print(' is_pseudo: True') | |
| 870 if is_open: | |
| 871 print(' is_open: True') | |
| 872 if is_relative: | |
| 873 print(' is_relative: True') | |
| 874 if is_not: | |
| 875 print(' is_not: True') | |
| 876 if is_html: | |
| 877 print(' is_html: True') | |
| 878 if is_default: | |
| 879 print(' is_default: True') | |
| 880 if is_indeterminate: | |
| 881 print(' is_indeterminate: True') | |
| 882 if is_in_range: | |
| 883 print(' is_in_range: True') | |
| 884 if is_out_of_range: | |
| 885 print(' is_out_of_range: True') | |
| 886 if is_placeholder_shown: | |
| 887 print(' is_placeholder_shown: True') | |
| 888 | |
| 889 if is_relative: | |
| 890 selectors.append(_Selector()) | |
| 891 | |
| 892 try: | |
| 893 while True: | |
| 894 key, m = next(iselector) | |
| 895 | |
| 896 # Handle parts | |
| 897 if key == "at_rule": | |
| 898 raise NotImplementedError("At-rules found at position {}".format(m.start(0))) | |
| 899 elif key == 'pseudo_class_custom': | |
| 900 has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) | |
| 901 elif key == 'pseudo_class': | |
| 902 has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) | |
| 903 elif key == 'pseudo_element': | |
| 904 raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0))) | |
| 905 elif key == 'pseudo_contains': | |
| 906 has_selector = self.parse_pseudo_contains(sel, m, has_selector) | |
| 907 elif key in ('pseudo_nth_type', 'pseudo_nth_child'): | |
| 908 has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector) | |
| 909 elif key == 'pseudo_lang': | |
| 910 has_selector = self.parse_pseudo_lang(sel, m, has_selector) | |
| 911 elif key == 'pseudo_dir': | |
| 912 has_selector = self.parse_pseudo_dir(sel, m, has_selector) | |
| 913 # Currently only supports HTML | |
| 914 is_html = True | |
| 915 elif key == 'pseudo_close': | |
| 916 if not has_selector: | |
| 917 raise SelectorSyntaxError( | |
| 918 "Expected a selector at postion {}".format(m.start(0)), | |
| 919 self.pattern, | |
| 920 m.start(0) | |
| 921 ) | |
| 922 if is_open: | |
| 923 closed = True | |
| 924 break | |
| 925 else: | |
| 926 raise SelectorSyntaxError( | |
| 927 "Unmatched pseudo-class close at postion {}".format(m.start(0)), | |
| 928 self.pattern, | |
| 929 m.start(0) | |
| 930 ) | |
| 931 elif key == 'combine': | |
| 932 if is_relative: | |
| 933 has_selector, sel, rel_type = self.parse_has_combinator( | |
| 934 sel, m, has_selector, selectors, rel_type, index | |
| 935 ) | |
| 936 else: | |
| 937 has_selector, sel = self.parse_combinator( | |
| 938 sel, m, has_selector, selectors, relations, is_pseudo, index | |
| 939 ) | |
| 940 elif key == 'attribute': | |
| 941 has_selector = self.parse_attribute_selector(sel, m, has_selector) | |
| 942 elif key == 'tag': | |
| 943 if has_selector: | |
| 944 raise SelectorSyntaxError( | |
| 945 "Tag name found at position {} instead of at the start".format(m.start(0)), | |
| 946 self.pattern, | |
| 947 m.start(0) | |
| 948 ) | |
| 949 has_selector = self.parse_tag_pattern(sel, m, has_selector) | |
| 950 elif key in ('class', 'id'): | |
| 951 has_selector = self.parse_class_id(sel, m, has_selector) | |
| 952 | |
| 953 index = m.end(0) | |
| 954 except StopIteration: | |
| 955 pass | |
| 956 | |
| 957 if is_open and not closed: | |
| 958 raise SelectorSyntaxError( | |
| 959 "Unclosed pseudo-class at position {}".format(index), | |
| 960 self.pattern, | |
| 961 index | |
| 962 ) | |
| 963 | |
| 964 if has_selector: | |
| 965 if not sel.tag and not is_pseudo: | |
| 966 # Implied `*` | |
| 967 sel.tag = ct.SelectorTag('*', None) | |
| 968 if is_relative: | |
| 969 sel.rel_type = rel_type | |
| 970 selectors[-1].relations.append(sel) | |
| 971 else: | |
| 972 sel.relations.extend(relations) | |
| 973 del relations[:] | |
| 974 selectors.append(sel) | |
| 975 else: | |
| 976 # We will always need to finish a selector when `:has()` is used as it leads with combining. | |
| 977 raise SelectorSyntaxError( | |
| 978 'Expected a selector at position {}'.format(index), | |
| 979 self.pattern, | |
| 980 index | |
| 981 ) | |
| 982 | |
| 983 # Some patterns require additional logic, such as default. We try to make these the | |
| 984 # last pattern, and append the appropriate flag to that selector which communicates | |
| 985 # to the matcher what additional logic is required. | |
| 986 if is_default: | |
| 987 selectors[-1].flags = ct.SEL_DEFAULT | |
| 988 if is_indeterminate: | |
| 989 selectors[-1].flags = ct.SEL_INDETERMINATE | |
| 990 if is_in_range: | |
| 991 selectors[-1].flags = ct.SEL_IN_RANGE | |
| 992 if is_out_of_range: | |
| 993 selectors[-1].flags = ct.SEL_OUT_OF_RANGE | |
| 994 if is_placeholder_shown: | |
| 995 selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN | |
| 996 | |
| 997 return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html) | |
| 998 | |
| 999 def selector_iter(self, pattern): | |
| 1000 """Iterate selector tokens.""" | |
| 1001 | |
| 1002 # Ignore whitespace and comments at start and end of pattern | |
| 1003 m = RE_WS_BEGIN.search(pattern) | |
| 1004 index = m.end(0) if m else 0 | |
| 1005 m = RE_WS_END.search(pattern) | |
| 1006 end = (m.start(0) - 1) if m else (len(pattern) - 1) | |
| 1007 | |
| 1008 if self.debug: # pragma: no cover | |
| 1009 print('## PARSING: {!r}'.format(pattern)) | |
| 1010 while index <= end: | |
| 1011 m = None | |
| 1012 for v in self.css_tokens: | |
| 1013 m = v.match(pattern, index, self.flags) | |
| 1014 if m: | |
| 1015 name = v.get_name() | |
| 1016 if self.debug: # pragma: no cover | |
| 1017 print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) | |
| 1018 index = m.end(0) | |
| 1019 yield name, m | |
| 1020 break | |
| 1021 if m is None: | |
| 1022 c = pattern[index] | |
| 1023 # If the character represents the start of one of the known selector types, | |
| 1024 # throw an exception mentioning that the known selector type is in error; | |
| 1025 # otherwise, report the invalid character. | |
| 1026 if c == '[': | |
| 1027 msg = "Malformed attribute selector at position {}".format(index) | |
| 1028 elif c == '.': | |
| 1029 msg = "Malformed class selector at position {}".format(index) | |
| 1030 elif c == '#': | |
| 1031 msg = "Malformed id selector at position {}".format(index) | |
| 1032 elif c == ':': | |
| 1033 msg = "Malformed pseudo-class selector at position {}".format(index) | |
| 1034 else: | |
| 1035 msg = "Invalid character {!r} position {}".format(c, index) | |
| 1036 raise SelectorSyntaxError(msg, self.pattern, index) | |
| 1037 if self.debug: # pragma: no cover | |
| 1038 print('## END PARSING') | |
| 1039 | |
| 1040 def process_selectors(self, index=0, flags=0): | |
| 1041 """Process selectors.""" | |
| 1042 | |
| 1043 return self.parse_selectors(self.selector_iter(self.pattern), index, flags) | |
| 1044 | |
| 1045 | |
| 1046 # Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern) | |
| 1047 # A few patterns are order dependent as they use patterns previous compiled. | |
| 1048 | |
| 1049 # CSS pattern for `:link` and `:any-link` | |
| 1050 CSS_LINK = CSSParser( | |
| 1051 'html|*:is(a, area, link)[href]' | |
| 1052 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1053 # CSS pattern for `:checked` | |
| 1054 CSS_CHECKED = CSSParser( | |
| 1055 ''' | |
| 1056 html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected] | |
| 1057 ''' | |
| 1058 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1059 # CSS pattern for `:default` (must compile CSS_CHECKED first) | |
| 1060 CSS_DEFAULT = CSSParser( | |
| 1061 ''' | |
| 1062 :checked, | |
| 1063 | |
| 1064 /* | |
| 1065 This pattern must be at the end. | |
| 1066 Special logic is applied to the last selector. | |
| 1067 */ | |
| 1068 html|form html|*:is(button, input)[type="submit"] | |
| 1069 ''' | |
| 1070 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT) | |
| 1071 # CSS pattern for `:indeterminate` | |
| 1072 CSS_INDETERMINATE = CSSParser( | |
| 1073 ''' | |
| 1074 html|input[type="checkbox"][indeterminate], | |
| 1075 html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]), | |
| 1076 html|progress:not([value]), | |
| 1077 | |
| 1078 /* | |
| 1079 This pattern must be at the end. | |
| 1080 Special logic is applied to the last selector. | |
| 1081 */ | |
| 1082 html|input[type="radio"][name][name!='']:not([checked]) | |
| 1083 ''' | |
| 1084 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) | |
| 1085 # CSS pattern for `:disabled` | |
| 1086 CSS_DISABLED = CSSParser( | |
| 1087 ''' | |
| 1088 html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], | |
| 1089 html|optgroup[disabled] > html|option, | |
| 1090 html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset), | |
| 1091 html|fieldset[disabled] > | |
| 1092 html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset) | |
| 1093 ''' | |
| 1094 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1095 # CSS pattern for `:enabled` | |
| 1096 CSS_ENABLED = CSSParser( | |
| 1097 ''' | |
| 1098 html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) | |
| 1099 ''' | |
| 1100 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1101 # CSS pattern for `:required` | |
| 1102 CSS_REQUIRED = CSSParser( | |
| 1103 'html|*:is(input, textarea, select)[required]' | |
| 1104 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1105 # CSS pattern for `:optional` | |
| 1106 CSS_OPTIONAL = CSSParser( | |
| 1107 'html|*:is(input, textarea, select):not([required])' | |
| 1108 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1109 # CSS pattern for `:placeholder-shown` | |
| 1110 CSS_PLACEHOLDER_SHOWN = CSSParser( | |
| 1111 ''' | |
| 1112 html|input:is( | |
| 1113 :not([type]), | |
| 1114 [type=""], | |
| 1115 [type=text], | |
| 1116 [type=search], | |
| 1117 [type=url], | |
| 1118 [type=tel], | |
| 1119 [type=email], | |
| 1120 [type=password], | |
| 1121 [type=number] | |
| 1122 )[placeholder][placeholder!='']:is(:not([value]), [value=""]), | |
| 1123 html|textarea[placeholder][placeholder!=''] | |
| 1124 ''' | |
| 1125 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) | |
| 1126 # CSS pattern default for `:nth-child` "of S" feature | |
| 1127 CSS_NTH_OF_S_DEFAULT = CSSParser( | |
| 1128 '*|*' | |
| 1129 ).process_selectors(flags=FLG_PSEUDO) | |
| 1130 # CSS pattern for `:read-write` (CSS_DISABLED must be compiled first) | |
| 1131 CSS_READ_WRITE = CSSParser( | |
| 1132 ''' | |
| 1133 html|*:is( | |
| 1134 textarea, | |
| 1135 input:is( | |
| 1136 :not([type]), | |
| 1137 [type=""], | |
| 1138 [type=text], | |
| 1139 [type=search], | |
| 1140 [type=url], | |
| 1141 [type=tel], | |
| 1142 [type=email], | |
| 1143 [type=number], | |
| 1144 [type=password], | |
| 1145 [type=date], | |
| 1146 [type=datetime-local], | |
| 1147 [type=month], | |
| 1148 [type=time], | |
| 1149 [type=week] | |
| 1150 ) | |
| 1151 ):not([readonly], :disabled), | |
| 1152 html|*:is([contenteditable=""], [contenteditable="true" i]) | |
| 1153 ''' | |
| 1154 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1155 # CSS pattern for `:read-only` | |
| 1156 CSS_READ_ONLY = CSSParser( | |
| 1157 ''' | |
| 1158 html|*:not(:read-write) | |
| 1159 ''' | |
| 1160 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
| 1161 # CSS pattern for `:in-range` | |
| 1162 CSS_IN_RANGE = CSSParser( | |
| 1163 ''' | |
| 1164 html|input:is( | |
| 1165 [type="date"], | |
| 1166 [type="month"], | |
| 1167 [type="week"], | |
| 1168 [type="time"], | |
| 1169 [type="datetime-local"], | |
| 1170 [type="number"], | |
| 1171 [type="range"] | |
| 1172 ):is( | |
| 1173 [min], | |
| 1174 [max] | |
| 1175 ) | |
| 1176 ''' | |
| 1177 ).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML) | |
| 1178 # CSS pattern for `:out-of-range` | |
| 1179 CSS_OUT_OF_RANGE = CSSParser( | |
| 1180 ''' | |
| 1181 html|input:is( | |
| 1182 [type="date"], | |
| 1183 [type="month"], | |
| 1184 [type="week"], | |
| 1185 [type="time"], | |
| 1186 [type="datetime-local"], | |
| 1187 [type="number"], | |
| 1188 [type="range"] | |
| 1189 ):is( | |
| 1190 [min], | |
| 1191 [max] | |
| 1192 ) | |
| 1193 ''' | |
| 1194 ).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML) |
