Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/soupsieve/css_parser.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """CSS selector parser.""" | |
2 import re | |
3 from functools import lru_cache | |
4 from . import util | |
5 from . import css_match as cm | |
6 from . import css_types as ct | |
7 from .util import SelectorSyntaxError | |
8 import warnings | |
9 | |
10 UNICODE_REPLACEMENT_CHAR = 0xFFFD | |
11 | |
12 # Simple pseudo classes that take no parameters | |
13 PSEUDO_SIMPLE = { | |
14 ":any-link", | |
15 ":empty", | |
16 ":first-child", | |
17 ":first-of-type", | |
18 ":in-range", | |
19 ":out-of-range", | |
20 ":last-child", | |
21 ":last-of-type", | |
22 ":link", | |
23 ":only-child", | |
24 ":only-of-type", | |
25 ":root", | |
26 ':checked', | |
27 ':default', | |
28 ':disabled', | |
29 ':enabled', | |
30 ':indeterminate', | |
31 ':optional', | |
32 ':placeholder-shown', | |
33 ':read-only', | |
34 ':read-write', | |
35 ':required', | |
36 ':scope', | |
37 ':defined' | |
38 } | |
39 | |
40 # Supported, simple pseudo classes that match nothing in the Soup Sieve environment | |
41 PSEUDO_SIMPLE_NO_MATCH = { | |
42 ':active', | |
43 ':current', | |
44 ':focus', | |
45 ':focus-visible', | |
46 ':focus-within', | |
47 ':future', | |
48 ':host', | |
49 ':hover', | |
50 ':local-link', | |
51 ':past', | |
52 ':paused', | |
53 ':playing', | |
54 ':target', | |
55 ':target-within', | |
56 ':user-invalid', | |
57 ':visited' | |
58 } | |
59 | |
60 # Complex pseudo classes that take selector lists | |
61 PSEUDO_COMPLEX = { | |
62 ':contains', | |
63 ':-soup-contains', | |
64 ':-soup-contains-own', | |
65 ':has', | |
66 ':is', | |
67 ':matches', | |
68 ':not', | |
69 ':where' | |
70 } | |
71 | |
72 PSEUDO_COMPLEX_NO_MATCH = { | |
73 ':current', | |
74 ':host', | |
75 ':host-context' | |
76 } | |
77 | |
78 # Complex pseudo classes that take very specific parameters and are handled special | |
79 PSEUDO_SPECIAL = { | |
80 ':dir', | |
81 ':lang', | |
82 ':nth-child', | |
83 ':nth-last-child', | |
84 ':nth-last-of-type', | |
85 ':nth-of-type' | |
86 } | |
87 | |
88 PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL | |
89 | |
90 # Sub-patterns parts | |
91 # Whitespace | |
92 NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' | |
93 WS = r'(?:[ \t]|{})'.format(NEWLINE) | |
94 # Comments | |
95 COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' | |
96 # Whitespace with comments included | |
97 WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) | |
98 # CSS escapes | |
99 CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) | |
100 CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) | |
101 # CSS Identifier | |
102 IDENTIFIER = r''' | |
103 (?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) | |
104 (?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) | |
105 '''.format(esc=CSS_ESCAPES) | |
106 # `nth` content | |
107 NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) | |
108 # Value: quoted string or identifier | |
109 VALUE = r''' | |
110 (?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+) | |
111 '''.format(nl=NEWLINE, ident=IDENTIFIER) | |
112 # Attribute value comparison. `!=` is handled special as it is non-standard. | |
113 ATTR = r''' | |
114 (?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\] | |
115 '''.format(ws=WSC, value=VALUE) | |
116 | |
117 # Selector patterns | |
118 # IDs (`#id`) | |
119 PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) | |
120 # Classes (`.class`) | |
121 PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) | |
122 # Prefix:Tag (`prefix|tag`) | |
123 PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER) | |
124 # Attributes (`[attr]`, `[attr=value]`, etc.) | |
125 PAT_ATTR = r''' | |
126 \[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr} | |
127 '''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) | |
128 # Pseudo class (`:pseudo-class`, `:pseudo-class(`) | |
129 PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) | |
130 # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. | |
131 PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) | |
132 # Custom pseudo class (`:--custom-pseudo`) | |
133 PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER) | |
134 # Closing pseudo group (`)`) | |
135 PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) | |
136 # Pseudo element (`::pseudo-element`) | |
137 PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) | |
138 # At rule (`@page`, etc.) (not supported) | |
139 PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) | |
140 # Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) | |
141 PAT_PSEUDO_NTH_CHILD = r''' | |
142 (?P<pseudo_nth_child>{name} | |
143 (?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) | |
144 '''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) | |
145 # Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) | |
146 PAT_PSEUDO_NTH_TYPE = r''' | |
147 (?P<pseudo_nth_type>{name} | |
148 (?P<nth_type>{nth}|even|odd)){ws}*\) | |
149 '''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) | |
150 # Pseudo class language (`:lang("*-de", en)`) | |
151 PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( | |
152 name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE | |
153 ) | |
154 # Pseudo class direction (`:dir(ltr)`) | |
155 PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) | |
156 # Combining characters (`>`, `~`, ` `, `+`, `,`) | |
157 PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) | |
158 # Extra: Contains (`:contains(text)`) | |
159 PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( | |
160 name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE | |
161 ) | |
162 | |
163 # Regular expressions | |
164 # CSS escape pattern | |
165 RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) | |
166 RE_CSS_STR_ESC = re.compile( | |
167 r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I | |
168 ) | |
169 # Pattern to break up `nth` specifiers | |
170 RE_NTH = re.compile( | |
171 r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC), | |
172 re.I | |
173 ) | |
174 # Pattern to iterate multiple values. | |
175 RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) | |
176 # Whitespace checks | |
177 RE_WS = re.compile(WS) | |
178 RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) | |
179 RE_WS_END = re.compile('{}*$'.format(WSC)) | |
180 RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) | |
181 | |
182 # Constants | |
183 # List split token | |
184 COMMA_COMBINATOR = ',' | |
185 # Relation token for descendant | |
186 WS_COMBINATOR = " " | |
187 | |
188 # Parse flags | |
189 FLG_PSEUDO = 0x01 | |
190 FLG_NOT = 0x02 | |
191 FLG_RELATIVE = 0x04 | |
192 FLG_DEFAULT = 0x08 | |
193 FLG_HTML = 0x10 | |
194 FLG_INDETERMINATE = 0x20 | |
195 FLG_OPEN = 0x40 | |
196 FLG_IN_RANGE = 0x80 | |
197 FLG_OUT_OF_RANGE = 0x100 | |
198 FLG_PLACEHOLDER_SHOWN = 0x200 | |
199 | |
200 # Maximum cached patterns to store | |
201 _MAXCACHE = 500 | |
202 | |
203 | |
204 @lru_cache(maxsize=_MAXCACHE) | |
205 def _cached_css_compile(pattern, namespaces, custom, flags): | |
206 """Cached CSS compile.""" | |
207 | |
208 custom_selectors = process_custom(custom) | |
209 return cm.SoupSieve( | |
210 pattern, | |
211 CSSParser(pattern, custom=custom_selectors, flags=flags).process_selectors(), | |
212 namespaces, | |
213 custom, | |
214 flags | |
215 ) | |
216 | |
217 | |
218 def _purge_cache(): | |
219 """Purge the cache.""" | |
220 | |
221 _cached_css_compile.cache_clear() | |
222 | |
223 | |
224 def process_custom(custom): | |
225 """Process custom.""" | |
226 | |
227 custom_selectors = {} | |
228 if custom is not None: | |
229 for key, value in custom.items(): | |
230 name = util.lower(key) | |
231 if RE_CUSTOM.match(name) is None: | |
232 raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) | |
233 if name in custom_selectors: | |
234 raise KeyError("The custom selector '{}' has already been registered".format(name)) | |
235 custom_selectors[css_unescape(name)] = value | |
236 return custom_selectors | |
237 | |
238 | |
239 def css_unescape(content, string=False): | |
240 """ | |
241 Unescape CSS value. | |
242 | |
243 Strings allow for spanning the value on multiple strings by escaping a new line. | |
244 """ | |
245 | |
246 def replace(m): | |
247 """Replace with the appropriate substitute.""" | |
248 | |
249 if m.group(1): | |
250 codepoint = int(m.group(1)[1:], 16) | |
251 if codepoint == 0: | |
252 codepoint = UNICODE_REPLACEMENT_CHAR | |
253 value = chr(codepoint) | |
254 elif m.group(2): | |
255 value = m.group(2)[1:] | |
256 elif m.group(3): | |
257 value = '\ufffd' | |
258 else: | |
259 value = '' | |
260 | |
261 return value | |
262 | |
263 return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content) | |
264 | |
265 | |
266 def escape(ident): | |
267 """Escape identifier.""" | |
268 | |
269 string = [] | |
270 length = len(ident) | |
271 start_dash = length > 0 and ident[0] == '-' | |
272 if length == 1 and start_dash: | |
273 # Need to escape identifier that is a single `-` with no other characters | |
274 string.append('\\{}'.format(ident)) | |
275 else: | |
276 for index, c in enumerate(ident): | |
277 codepoint = ord(c) | |
278 if codepoint == 0x00: | |
279 string.append('\ufffd') | |
280 elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: | |
281 string.append('\\{:x} '.format(codepoint)) | |
282 elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): | |
283 string.append('\\{:x} '.format(codepoint)) | |
284 elif ( | |
285 codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or | |
286 (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) | |
287 ): | |
288 string.append(c) | |
289 else: | |
290 string.append('\\{}'.format(c)) | |
291 return ''.join(string) | |
292 | |
293 | |
294 class SelectorPattern(object): | |
295 """Selector pattern.""" | |
296 | |
297 def __init__(self, name, pattern): | |
298 """Initialize.""" | |
299 | |
300 self.name = name | |
301 self.re_pattern = re.compile(pattern, re.I | re.X | re.U) | |
302 | |
303 def get_name(self): | |
304 """Get name.""" | |
305 | |
306 return self.name | |
307 | |
308 def match(self, selector, index, flags): | |
309 """Match the selector.""" | |
310 | |
311 return self.re_pattern.match(selector, index) | |
312 | |
313 | |
314 class SpecialPseudoPattern(SelectorPattern): | |
315 """Selector pattern.""" | |
316 | |
317 def __init__(self, patterns): | |
318 """Initialize.""" | |
319 | |
320 self.patterns = {} | |
321 for p in patterns: | |
322 name = p[0] | |
323 pattern = p[3](name, p[2]) | |
324 for pseudo in p[1]: | |
325 self.patterns[pseudo] = pattern | |
326 | |
327 self.matched_name = None | |
328 self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U) | |
329 | |
330 def get_name(self): | |
331 """Get name.""" | |
332 | |
333 return self.matched_name.get_name() | |
334 | |
335 def match(self, selector, index, flags): | |
336 """Match the selector.""" | |
337 | |
338 pseudo = None | |
339 m = self.re_pseudo_name.match(selector, index) | |
340 if m: | |
341 name = util.lower(css_unescape(m.group('name'))) | |
342 pattern = self.patterns.get(name) | |
343 if pattern: | |
344 pseudo = pattern.match(selector, index, flags) | |
345 if pseudo: | |
346 self.matched_name = pattern | |
347 | |
348 return pseudo | |
349 | |
350 | |
351 class _Selector(object): | |
352 """ | |
353 Intermediate selector class. | |
354 | |
355 This stores selector data for a compound selector as we are acquiring them. | |
356 Once we are done collecting the data for a compound selector, we freeze | |
357 the data in an object that can be pickled and hashed. | |
358 """ | |
359 | |
360 def __init__(self, **kwargs): | |
361 """Initialize.""" | |
362 | |
363 self.tag = kwargs.get('tag', None) | |
364 self.ids = kwargs.get('ids', []) | |
365 self.classes = kwargs.get('classes', []) | |
366 self.attributes = kwargs.get('attributes', []) | |
367 self.nth = kwargs.get('nth', []) | |
368 self.selectors = kwargs.get('selectors', []) | |
369 self.relations = kwargs.get('relations', []) | |
370 self.rel_type = kwargs.get('rel_type', None) | |
371 self.contains = kwargs.get('contains', []) | |
372 self.lang = kwargs.get('lang', []) | |
373 self.flags = kwargs.get('flags', 0) | |
374 self.no_match = kwargs.get('no_match', False) | |
375 | |
376 def _freeze_relations(self, relations): | |
377 """Freeze relation.""" | |
378 | |
379 if relations: | |
380 sel = relations[0] | |
381 sel.relations.extend(relations[1:]) | |
382 return ct.SelectorList([sel.freeze()]) | |
383 else: | |
384 return ct.SelectorList() | |
385 | |
386 def freeze(self): | |
387 """Freeze self.""" | |
388 | |
389 if self.no_match: | |
390 return ct.SelectorNull() | |
391 else: | |
392 return ct.Selector( | |
393 self.tag, | |
394 tuple(self.ids), | |
395 tuple(self.classes), | |
396 tuple(self.attributes), | |
397 tuple(self.nth), | |
398 tuple(self.selectors), | |
399 self._freeze_relations(self.relations), | |
400 self.rel_type, | |
401 tuple(self.contains), | |
402 tuple(self.lang), | |
403 self.flags | |
404 ) | |
405 | |
406 def __str__(self): # pragma: no cover | |
407 """String representation.""" | |
408 | |
409 return ( | |
410 '_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' | |
411 'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' | |
412 ).format( | |
413 self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, | |
414 self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match | |
415 ) | |
416 | |
417 __repr__ = __str__ | |
418 | |
419 | |
420 class CSSParser(object): | |
421 """Parse CSS selectors.""" | |
422 | |
423 css_tokens = ( | |
424 SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE), | |
425 SpecialPseudoPattern( | |
426 ( | |
427 ( | |
428 "pseudo_contains", | |
429 (':contains', ':-soup-contains', ':-soup-contains-own'), | |
430 PAT_PSEUDO_CONTAINS, | |
431 SelectorPattern | |
432 ), | |
433 ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern), | |
434 ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern), | |
435 ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern), | |
436 ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern) | |
437 ) | |
438 ), | |
439 SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM), | |
440 SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS), | |
441 SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT), | |
442 SelectorPattern("at_rule", PAT_AT_RULE), | |
443 SelectorPattern("id", PAT_ID), | |
444 SelectorPattern("class", PAT_CLASS), | |
445 SelectorPattern("tag", PAT_TAG), | |
446 SelectorPattern("attribute", PAT_ATTR), | |
447 SelectorPattern("combine", PAT_COMBINE) | |
448 ) | |
449 | |
450 def __init__(self, selector, custom=None, flags=0): | |
451 """Initialize.""" | |
452 | |
453 self.pattern = selector.replace('\x00', '\ufffd') | |
454 self.flags = flags | |
455 self.debug = self.flags & util.DEBUG | |
456 self.custom = {} if custom is None else custom | |
457 | |
458 def parse_attribute_selector(self, sel, m, has_selector): | |
459 """Create attribute selector from the returned regex match.""" | |
460 | |
461 inverse = False | |
462 op = m.group('cmp') | |
463 case = util.lower(m.group('case')) if m.group('case') else None | |
464 ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else '' | |
465 attr = css_unescape(m.group('attr_name')) | |
466 is_type = False | |
467 pattern2 = None | |
468 | |
469 if case: | |
470 flags = re.I if case == 'i' else 0 | |
471 elif util.lower(attr) == 'type': | |
472 flags = re.I | |
473 is_type = True | |
474 else: | |
475 flags = 0 | |
476 | |
477 if op: | |
478 if m.group('value').startswith(('"', "'")): | |
479 value = css_unescape(m.group('value')[1:-1], True) | |
480 else: | |
481 value = css_unescape(m.group('value')) | |
482 else: | |
483 value = None | |
484 if not op: | |
485 # Attribute name | |
486 pattern = None | |
487 elif op.startswith('^'): | |
488 # Value start with | |
489 pattern = re.compile(r'^%s.*' % re.escape(value), flags) | |
490 elif op.startswith('$'): | |
491 # Value ends with | |
492 pattern = re.compile(r'.*?%s$' % re.escape(value), flags) | |
493 elif op.startswith('*'): | |
494 # Value contains | |
495 pattern = re.compile(r'.*?%s.*' % re.escape(value), flags) | |
496 elif op.startswith('~'): | |
497 # Value contains word within space separated list | |
498 # `~=` should match nothing if it is empty or contains whitespace, | |
499 # so if either of these cases is present, use `[^\s\S]` which cannot be matched. | |
500 value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value) | |
501 pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags) | |
502 elif op.startswith('|'): | |
503 # Value starts with word in dash separated list | |
504 pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags) | |
505 else: | |
506 # Value matches | |
507 pattern = re.compile(r'^%s$' % re.escape(value), flags) | |
508 if op.startswith('!'): | |
509 # Equivalent to `:not([attr=value])` | |
510 inverse = True | |
511 if is_type and pattern: | |
512 pattern2 = re.compile(pattern.pattern) | |
513 | |
514 # Append the attribute selector | |
515 sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2) | |
516 if inverse: | |
517 # If we are using `!=`, we need to nest the pattern under a `:not()`. | |
518 sub_sel = _Selector() | |
519 sub_sel.attributes.append(sel_attr) | |
520 not_list = ct.SelectorList([sub_sel.freeze()], True, False) | |
521 sel.selectors.append(not_list) | |
522 else: | |
523 sel.attributes.append(sel_attr) | |
524 | |
525 has_selector = True | |
526 return has_selector | |
527 | |
528 def parse_tag_pattern(self, sel, m, has_selector): | |
529 """Parse tag pattern from regex match.""" | |
530 | |
531 prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None | |
532 tag = css_unescape(m.group('tag_name')) | |
533 sel.tag = ct.SelectorTag(tag, prefix) | |
534 has_selector = True | |
535 return has_selector | |
536 | |
537 def parse_pseudo_class_custom(self, sel, m, has_selector): | |
538 """ | |
539 Parse custom pseudo class alias. | |
540 | |
541 Compile custom selectors as we need them. When compiling a custom selector, | |
542 set it to `None` in the dictionary so we can avoid an infinite loop. | |
543 """ | |
544 | |
545 pseudo = util.lower(css_unescape(m.group('name'))) | |
546 selector = self.custom.get(pseudo) | |
547 if selector is None: | |
548 raise SelectorSyntaxError( | |
549 "Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)), | |
550 self.pattern, | |
551 m.end(0) | |
552 ) | |
553 | |
554 if not isinstance(selector, ct.SelectorList): | |
555 self.custom[pseudo] = None | |
556 selector = CSSParser( | |
557 selector, custom=self.custom, flags=self.flags | |
558 ).process_selectors(flags=FLG_PSEUDO) | |
559 self.custom[pseudo] = selector | |
560 | |
561 sel.selectors.append(selector) | |
562 has_selector = True | |
563 return has_selector | |
564 | |
565 def parse_pseudo_class(self, sel, m, has_selector, iselector, is_html): | |
566 """Parse pseudo class.""" | |
567 | |
568 complex_pseudo = False | |
569 pseudo = util.lower(css_unescape(m.group('name'))) | |
570 if m.group('open'): | |
571 complex_pseudo = True | |
572 if complex_pseudo and pseudo in PSEUDO_COMPLEX: | |
573 has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0)) | |
574 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE: | |
575 if pseudo == ':root': | |
576 sel.flags |= ct.SEL_ROOT | |
577 elif pseudo == ':defined': | |
578 sel.flags |= ct.SEL_DEFINED | |
579 is_html = True | |
580 elif pseudo == ':scope': | |
581 sel.flags |= ct.SEL_SCOPE | |
582 elif pseudo == ':empty': | |
583 sel.flags |= ct.SEL_EMPTY | |
584 elif pseudo in (':link', ':any-link'): | |
585 sel.selectors.append(CSS_LINK) | |
586 elif pseudo == ':checked': | |
587 sel.selectors.append(CSS_CHECKED) | |
588 elif pseudo == ':default': | |
589 sel.selectors.append(CSS_DEFAULT) | |
590 elif pseudo == ':indeterminate': | |
591 sel.selectors.append(CSS_INDETERMINATE) | |
592 elif pseudo == ":disabled": | |
593 sel.selectors.append(CSS_DISABLED) | |
594 elif pseudo == ":enabled": | |
595 sel.selectors.append(CSS_ENABLED) | |
596 elif pseudo == ":required": | |
597 sel.selectors.append(CSS_REQUIRED) | |
598 elif pseudo == ":optional": | |
599 sel.selectors.append(CSS_OPTIONAL) | |
600 elif pseudo == ":read-only": | |
601 sel.selectors.append(CSS_READ_ONLY) | |
602 elif pseudo == ":read-write": | |
603 sel.selectors.append(CSS_READ_WRITE) | |
604 elif pseudo == ":in-range": | |
605 sel.selectors.append(CSS_IN_RANGE) | |
606 elif pseudo == ":out-of-range": | |
607 sel.selectors.append(CSS_OUT_OF_RANGE) | |
608 elif pseudo == ":placeholder-shown": | |
609 sel.selectors.append(CSS_PLACEHOLDER_SHOWN) | |
610 elif pseudo == ':first-child': | |
611 sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList())) | |
612 elif pseudo == ':last-child': | |
613 sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())) | |
614 elif pseudo == ':first-of-type': | |
615 sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList())) | |
616 elif pseudo == ':last-of-type': | |
617 sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())) | |
618 elif pseudo == ':only-child': | |
619 sel.nth.extend( | |
620 [ | |
621 ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()), | |
622 ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()) | |
623 ] | |
624 ) | |
625 elif pseudo == ':only-of-type': | |
626 sel.nth.extend( | |
627 [ | |
628 ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()), | |
629 ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()) | |
630 ] | |
631 ) | |
632 has_selector = True | |
633 elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH: | |
634 self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) | |
635 sel.no_match = True | |
636 has_selector = True | |
637 elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH: | |
638 sel.no_match = True | |
639 has_selector = True | |
640 elif pseudo in PSEUDO_SUPPORTED: | |
641 raise SelectorSyntaxError( | |
642 "Invalid syntax for pseudo class '{}'".format(pseudo), | |
643 self.pattern, | |
644 m.start(0) | |
645 ) | |
646 else: | |
647 raise NotImplementedError( | |
648 "'{}' pseudo-class is not implemented at this time".format(pseudo) | |
649 ) | |
650 | |
651 return has_selector, is_html | |
652 | |
653 def parse_pseudo_nth(self, sel, m, has_selector, iselector): | |
654 """Parse `nth` pseudo.""" | |
655 | |
656 mdict = m.groupdict() | |
657 if mdict.get('pseudo_nth_child'): | |
658 postfix = '_child' | |
659 else: | |
660 postfix = '_type' | |
661 mdict['name'] = util.lower(css_unescape(mdict['name'])) | |
662 content = util.lower(mdict.get('nth' + postfix)) | |
663 if content == 'even': | |
664 # 2n | |
665 s1 = 2 | |
666 s2 = 0 | |
667 var = True | |
668 elif content == 'odd': | |
669 # 2n+1 | |
670 s1 = 2 | |
671 s2 = 1 | |
672 var = True | |
673 else: | |
674 nth_parts = RE_NTH.match(content) | |
675 s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else '' | |
676 a = nth_parts.group('a') | |
677 var = a.endswith('n') | |
678 if a.startswith('n'): | |
679 s1 += '1' | |
680 elif var: | |
681 s1 += a[:-1] | |
682 else: | |
683 s1 += a | |
684 s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else '' | |
685 if nth_parts.group('b'): | |
686 s2 += nth_parts.group('b') | |
687 else: | |
688 s2 = '0' | |
689 s1 = int(s1, 10) | |
690 s2 = int(s2, 10) | |
691 | |
692 pseudo_sel = mdict['name'] | |
693 if postfix == '_child': | |
694 if m.group('of'): | |
695 # Parse the rest of `of S`. | |
696 nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN) | |
697 else: | |
698 # Use default `*|*` for `of S`. | |
699 nth_sel = CSS_NTH_OF_S_DEFAULT | |
700 if pseudo_sel == ':nth-child': | |
701 sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel)) | |
702 elif pseudo_sel == ':nth-last-child': | |
703 sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel)) | |
704 else: | |
705 if pseudo_sel == ':nth-of-type': | |
706 sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList())) | |
707 elif pseudo_sel == ':nth-last-of-type': | |
708 sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList())) | |
709 has_selector = True | |
710 return has_selector | |
711 | |
712 def parse_pseudo_open(self, sel, name, has_selector, iselector, index): | |
713 """Parse pseudo with opening bracket.""" | |
714 | |
715 flags = FLG_PSEUDO | FLG_OPEN | |
716 if name == ':not': | |
717 flags |= FLG_NOT | |
718 if name == ':has': | |
719 flags |= FLG_RELATIVE | |
720 | |
721 sel.selectors.append(self.parse_selectors(iselector, index, flags)) | |
722 has_selector = True | |
723 return has_selector | |
724 | |
725 def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index): | |
726 """Parse combinator tokens.""" | |
727 | |
728 combinator = m.group('relation').strip() | |
729 if not combinator: | |
730 combinator = WS_COMBINATOR | |
731 if combinator == COMMA_COMBINATOR: | |
732 if not has_selector: | |
733 # If we've not captured any selector parts, the comma is either at the beginning of the pattern | |
734 # or following another comma, both of which are unexpected. Commas must split selectors. | |
735 raise SelectorSyntaxError( | |
736 "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index), | |
737 self.pattern, | |
738 index | |
739 ) | |
740 sel.rel_type = rel_type | |
741 selectors[-1].relations.append(sel) | |
742 rel_type = ":" + WS_COMBINATOR | |
743 selectors.append(_Selector()) | |
744 else: | |
745 if has_selector: | |
746 # End the current selector and associate the leading combinator with this selector. | |
747 sel.rel_type = rel_type | |
748 selectors[-1].relations.append(sel) | |
749 elif rel_type[1:] != WS_COMBINATOR: | |
750 # It's impossible to have two whitespace combinators after each other as the patterns | |
751 # will gobble up trailing whitespace. It is also impossible to have a whitespace | |
752 # combinator after any other kind for the same reason. But we could have | |
753 # multiple non-whitespace combinators. So if the current combinator is not a whitespace, | |
754 # then we've hit the multiple combinator case, so we should fail. | |
755 raise SelectorSyntaxError( | |
756 'The multiple combinators at position {}'.format(index), | |
757 self.pattern, | |
758 index | |
759 ) | |
760 # Set the leading combinator for the next selector. | |
761 rel_type = ':' + combinator | |
762 sel = _Selector() | |
763 | |
764 has_selector = False | |
765 return has_selector, sel, rel_type | |
766 | |
767 def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index): | |
768 """Parse combinator tokens.""" | |
769 | |
770 combinator = m.group('relation').strip() | |
771 if not combinator: | |
772 combinator = WS_COMBINATOR | |
773 if not has_selector: | |
774 raise SelectorSyntaxError( | |
775 "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index), | |
776 self.pattern, | |
777 index | |
778 ) | |
779 | |
780 if combinator == COMMA_COMBINATOR: | |
781 if not sel.tag and not is_pseudo: | |
782 # Implied `*` | |
783 sel.tag = ct.SelectorTag('*', None) | |
784 sel.relations.extend(relations) | |
785 selectors.append(sel) | |
786 del relations[:] | |
787 else: | |
788 sel.relations.extend(relations) | |
789 sel.rel_type = combinator | |
790 del relations[:] | |
791 relations.append(sel) | |
792 sel = _Selector() | |
793 | |
794 has_selector = False | |
795 return has_selector, sel | |
796 | |
797 def parse_class_id(self, sel, m, has_selector): | |
798 """Parse HTML classes and ids.""" | |
799 | |
800 selector = m.group(0) | |
801 if selector.startswith('.'): | |
802 sel.classes.append(css_unescape(selector[1:])) | |
803 else: | |
804 sel.ids.append(css_unescape(selector[1:])) | |
805 has_selector = True | |
806 return has_selector | |
807 | |
808 def parse_pseudo_contains(self, sel, m, has_selector): | |
809 """Parse contains.""" | |
810 | |
811 pseudo = util.lower(css_unescape(m.group('name'))) | |
812 if pseudo == ":contains": | |
813 warnings.warn( | |
814 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", | |
815 FutureWarning | |
816 ) | |
817 contains_own = pseudo == ":-soup-contains-own" | |
818 values = css_unescape(m.group('values')) | |
819 patterns = [] | |
820 for token in RE_VALUES.finditer(values): | |
821 if token.group('split'): | |
822 continue | |
823 value = token.group('value') | |
824 if value.startswith(("'", '"')): | |
825 value = css_unescape(value[1:-1], True) | |
826 else: | |
827 value = css_unescape(value) | |
828 patterns.append(value) | |
829 sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own)) | |
830 has_selector = True | |
831 return has_selector | |
832 | |
833 def parse_pseudo_lang(self, sel, m, has_selector): | |
834 """Parse pseudo language.""" | |
835 | |
836 values = m.group('values') | |
837 patterns = [] | |
838 for token in RE_VALUES.finditer(values): | |
839 if token.group('split'): | |
840 continue | |
841 value = token.group('value') | |
842 if value.startswith(('"', "'")): | |
843 value = css_unescape(value[1:-1], True) | |
844 else: | |
845 value = css_unescape(value) | |
846 | |
847 patterns.append(value) | |
848 | |
849 sel.lang.append(ct.SelectorLang(patterns)) | |
850 has_selector = True | |
851 | |
852 return has_selector | |
853 | |
854 def parse_pseudo_dir(self, sel, m, has_selector): | |
855 """Parse pseudo direction.""" | |
856 | |
857 value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL | |
858 sel.flags |= value | |
859 has_selector = True | |
860 return has_selector | |
861 | |
862 def parse_selectors(self, iselector, index=0, flags=0): | |
863 """Parse selectors.""" | |
864 | |
865 sel = _Selector() | |
866 selectors = [] | |
867 has_selector = False | |
868 closed = False | |
869 relations = [] | |
870 rel_type = ":" + WS_COMBINATOR | |
871 is_open = bool(flags & FLG_OPEN) | |
872 is_pseudo = bool(flags & FLG_PSEUDO) | |
873 is_relative = bool(flags & FLG_RELATIVE) | |
874 is_not = bool(flags & FLG_NOT) | |
875 is_html = bool(flags & FLG_HTML) | |
876 is_default = bool(flags & FLG_DEFAULT) | |
877 is_indeterminate = bool(flags & FLG_INDETERMINATE) | |
878 is_in_range = bool(flags & FLG_IN_RANGE) | |
879 is_out_of_range = bool(flags & FLG_OUT_OF_RANGE) | |
880 is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN) | |
881 | |
882 if self.debug: # pragma: no cover | |
883 if is_pseudo: | |
884 print(' is_pseudo: True') | |
885 if is_open: | |
886 print(' is_open: True') | |
887 if is_relative: | |
888 print(' is_relative: True') | |
889 if is_not: | |
890 print(' is_not: True') | |
891 if is_html: | |
892 print(' is_html: True') | |
893 if is_default: | |
894 print(' is_default: True') | |
895 if is_indeterminate: | |
896 print(' is_indeterminate: True') | |
897 if is_in_range: | |
898 print(' is_in_range: True') | |
899 if is_out_of_range: | |
900 print(' is_out_of_range: True') | |
901 if is_placeholder_shown: | |
902 print(' is_placeholder_shown: True') | |
903 | |
904 if is_relative: | |
905 selectors.append(_Selector()) | |
906 | |
907 try: | |
908 while True: | |
909 key, m = next(iselector) | |
910 | |
911 # Handle parts | |
912 if key == "at_rule": | |
913 raise NotImplementedError("At-rules found at position {}".format(m.start(0))) | |
914 elif key == 'pseudo_class_custom': | |
915 has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) | |
916 elif key == 'pseudo_class': | |
917 has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) | |
918 elif key == 'pseudo_element': | |
919 raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) | |
920 elif key == 'pseudo_contains': | |
921 has_selector = self.parse_pseudo_contains(sel, m, has_selector) | |
922 elif key in ('pseudo_nth_type', 'pseudo_nth_child'): | |
923 has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector) | |
924 elif key == 'pseudo_lang': | |
925 has_selector = self.parse_pseudo_lang(sel, m, has_selector) | |
926 elif key == 'pseudo_dir': | |
927 has_selector = self.parse_pseudo_dir(sel, m, has_selector) | |
928 # Currently only supports HTML | |
929 is_html = True | |
930 elif key == 'pseudo_close': | |
931 if not has_selector: | |
932 raise SelectorSyntaxError( | |
933 "Expected a selector at postion {}".format(m.start(0)), | |
934 self.pattern, | |
935 m.start(0) | |
936 ) | |
937 if is_open: | |
938 closed = True | |
939 break | |
940 else: | |
941 raise SelectorSyntaxError( | |
942 "Unmatched pseudo-class close at postion {}".format(m.start(0)), | |
943 self.pattern, | |
944 m.start(0) | |
945 ) | |
946 elif key == 'combine': | |
947 if is_relative: | |
948 has_selector, sel, rel_type = self.parse_has_combinator( | |
949 sel, m, has_selector, selectors, rel_type, index | |
950 ) | |
951 else: | |
952 has_selector, sel = self.parse_combinator( | |
953 sel, m, has_selector, selectors, relations, is_pseudo, index | |
954 ) | |
955 elif key == 'attribute': | |
956 has_selector = self.parse_attribute_selector(sel, m, has_selector) | |
957 elif key == 'tag': | |
958 if has_selector: | |
959 raise SelectorSyntaxError( | |
960 "Tag name found at position {} instead of at the start".format(m.start(0)), | |
961 self.pattern, | |
962 m.start(0) | |
963 ) | |
964 has_selector = self.parse_tag_pattern(sel, m, has_selector) | |
965 elif key in ('class', 'id'): | |
966 has_selector = self.parse_class_id(sel, m, has_selector) | |
967 | |
968 index = m.end(0) | |
969 except StopIteration: | |
970 pass | |
971 | |
972 if is_open and not closed: | |
973 raise SelectorSyntaxError( | |
974 "Unclosed pseudo-class at position {}".format(index), | |
975 self.pattern, | |
976 index | |
977 ) | |
978 | |
979 if has_selector: | |
980 if not sel.tag and not is_pseudo: | |
981 # Implied `*` | |
982 sel.tag = ct.SelectorTag('*', None) | |
983 if is_relative: | |
984 sel.rel_type = rel_type | |
985 selectors[-1].relations.append(sel) | |
986 else: | |
987 sel.relations.extend(relations) | |
988 del relations[:] | |
989 selectors.append(sel) | |
990 else: | |
991 # We will always need to finish a selector when `:has()` is used as it leads with combining. | |
992 raise SelectorSyntaxError( | |
993 'Expected a selector at position {}'.format(index), | |
994 self.pattern, | |
995 index | |
996 ) | |
997 | |
998 # Some patterns require additional logic, such as default. We try to make these the | |
999 # last pattern, and append the appropriate flag to that selector which communicates | |
1000 # to the matcher what additional logic is required. | |
1001 if is_default: | |
1002 selectors[-1].flags = ct.SEL_DEFAULT | |
1003 if is_indeterminate: | |
1004 selectors[-1].flags = ct.SEL_INDETERMINATE | |
1005 if is_in_range: | |
1006 selectors[-1].flags = ct.SEL_IN_RANGE | |
1007 if is_out_of_range: | |
1008 selectors[-1].flags = ct.SEL_OUT_OF_RANGE | |
1009 if is_placeholder_shown: | |
1010 selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN | |
1011 | |
1012 return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html) | |
1013 | |
1014 def selector_iter(self, pattern): | |
1015 """Iterate selector tokens.""" | |
1016 | |
1017 # Ignore whitespace and comments at start and end of pattern | |
1018 m = RE_WS_BEGIN.search(pattern) | |
1019 index = m.end(0) if m else 0 | |
1020 m = RE_WS_END.search(pattern) | |
1021 end = (m.start(0) - 1) if m else (len(pattern) - 1) | |
1022 | |
1023 if self.debug: # pragma: no cover | |
1024 print('## PARSING: {!r}'.format(pattern)) | |
1025 while index <= end: | |
1026 m = None | |
1027 for v in self.css_tokens: | |
1028 m = v.match(pattern, index, self.flags) | |
1029 if m: | |
1030 name = v.get_name() | |
1031 if self.debug: # pragma: no cover | |
1032 print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) | |
1033 index = m.end(0) | |
1034 yield name, m | |
1035 break | |
1036 if m is None: | |
1037 c = pattern[index] | |
1038 # If the character represents the start of one of the known selector types, | |
1039 # throw an exception mentioning that the known selector type is in error; | |
1040 # otherwise, report the invalid character. | |
1041 if c == '[': | |
1042 msg = "Malformed attribute selector at position {}".format(index) | |
1043 elif c == '.': | |
1044 msg = "Malformed class selector at position {}".format(index) | |
1045 elif c == '#': | |
1046 msg = "Malformed id selector at position {}".format(index) | |
1047 elif c == ':': | |
1048 msg = "Malformed pseudo-class selector at position {}".format(index) | |
1049 else: | |
1050 msg = "Invalid character {!r} position {}".format(c, index) | |
1051 raise SelectorSyntaxError(msg, self.pattern, index) | |
1052 if self.debug: # pragma: no cover | |
1053 print('## END PARSING') | |
1054 | |
1055 def process_selectors(self, index=0, flags=0): | |
1056 """Process selectors.""" | |
1057 | |
1058 return self.parse_selectors(self.selector_iter(self.pattern), index, flags) | |
1059 | |
1060 | |
1061 # Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern) | |
1062 # A few patterns are order dependent as they use patterns previous compiled. | |
1063 | |
1064 # CSS pattern for `:link` and `:any-link` | |
1065 CSS_LINK = CSSParser( | |
1066 'html|*:is(a, area)[href]' | |
1067 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
1068 # CSS pattern for `:checked` | |
1069 CSS_CHECKED = CSSParser( | |
1070 ''' | |
1071 html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected] | |
1072 ''' | |
1073 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
1074 # CSS pattern for `:default` (must compile CSS_CHECKED first) | |
1075 CSS_DEFAULT = CSSParser( | |
1076 ''' | |
1077 :checked, | |
1078 | |
1079 /* | |
1080 This pattern must be at the end. | |
1081 Special logic is applied to the last selector. | |
1082 */ | |
1083 html|form html|*:is(button, input)[type="submit"] | |
1084 ''' | |
1085 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT) | |
1086 # CSS pattern for `:indeterminate` | |
1087 CSS_INDETERMINATE = CSSParser( | |
1088 ''' | |
1089 html|input[type="checkbox"][indeterminate], | |
1090 html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]), | |
1091 html|progress:not([value]), | |
1092 | |
1093 /* | |
1094 This pattern must be at the end. | |
1095 Special logic is applied to the last selector. | |
1096 */ | |
1097 html|input[type="radio"][name]:not([name='']):not([checked]) | |
1098 ''' | |
1099 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) | |
1100 # CSS pattern for `:disabled` | |
1101 CSS_DISABLED = CSSParser( | |
1102 ''' | |
1103 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], | |
1104 html|optgroup[disabled] > html|option, | |
1105 html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset), | |
1106 html|fieldset[disabled] > | |
1107 html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset) | |
1108 ''' | |
1109 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
1110 # CSS pattern for `:enabled` | |
1111 CSS_ENABLED = CSSParser( | |
1112 ''' | |
1113 html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) | |
1114 ''' | |
1115 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
1116 # CSS pattern for `:required` | |
1117 CSS_REQUIRED = CSSParser( | |
1118 'html|*:is(input, textarea, select)[required]' | |
1119 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
1120 # CSS pattern for `:optional` | |
1121 CSS_OPTIONAL = CSSParser( | |
1122 'html|*:is(input, textarea, select):not([required])' | |
1123 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
1124 # CSS pattern for `:placeholder-shown` | |
1125 CSS_PLACEHOLDER_SHOWN = CSSParser( | |
1126 ''' | |
1127 html|input:is( | |
1128 :not([type]), | |
1129 [type=""], | |
1130 [type=text], | |
1131 [type=search], | |
1132 [type=url], | |
1133 [type=tel], | |
1134 [type=email], | |
1135 [type=password], | |
1136 [type=number] | |
1137 )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]), | |
1138 html|textarea[placeholder]:not([placeholder='']) | |
1139 ''' | |
1140 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) | |
1141 # CSS pattern default for `:nth-child` "of S" feature | |
1142 CSS_NTH_OF_S_DEFAULT = CSSParser( | |
1143 '*|*' | |
1144 ).process_selectors(flags=FLG_PSEUDO) | |
1145 # CSS pattern for `:read-write` (CSS_DISABLED must be compiled first) | |
1146 CSS_READ_WRITE = CSSParser( | |
1147 ''' | |
1148 html|*:is( | |
1149 textarea, | |
1150 input:is( | |
1151 :not([type]), | |
1152 [type=""], | |
1153 [type=text], | |
1154 [type=search], | |
1155 [type=url], | |
1156 [type=tel], | |
1157 [type=email], | |
1158 [type=number], | |
1159 [type=password], | |
1160 [type=date], | |
1161 [type=datetime-local], | |
1162 [type=month], | |
1163 [type=time], | |
1164 [type=week] | |
1165 ) | |
1166 ):not([readonly], :disabled), | |
1167 html|*:is([contenteditable=""], [contenteditable="true" i]) | |
1168 ''' | |
1169 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
1170 # CSS pattern for `:read-only` | |
1171 CSS_READ_ONLY = CSSParser( | |
1172 ''' | |
1173 html|*:not(:read-write) | |
1174 ''' | |
1175 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML) | |
1176 # CSS pattern for `:in-range` | |
1177 CSS_IN_RANGE = CSSParser( | |
1178 ''' | |
1179 html|input:is( | |
1180 [type="date"], | |
1181 [type="month"], | |
1182 [type="week"], | |
1183 [type="time"], | |
1184 [type="datetime-local"], | |
1185 [type="number"], | |
1186 [type="range"] | |
1187 ):is( | |
1188 [min], | |
1189 [max] | |
1190 ) | |
1191 ''' | |
1192 ).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML) | |
1193 # CSS pattern for `:out-of-range` | |
1194 CSS_OUT_OF_RANGE = CSSParser( | |
1195 ''' | |
1196 html|input:is( | |
1197 [type="date"], | |
1198 [type="month"], | |
1199 [type="week"], | |
1200 [type="time"], | |
1201 [type="datetime-local"], | |
1202 [type="number"], | |
1203 [type="range"] | |
1204 ):is( | |
1205 [min], | |
1206 [max] | |
1207 ) | |
1208 ''' | |
1209 ).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML) |