2
|
1 #!/usr/bin/env python2
|
|
2 #
|
|
3 # Copyright 2011-2016 Google Inc.
|
|
4 #
|
|
5 # Licensed under the Apache License, Version 2.0 (the "License");
|
|
6 # you may not use this file except in compliance with the License.
|
|
7 # You may obtain a copy of the License at
|
|
8 #
|
|
9 # http://www.apache.org/licenses/LICENSE-2.0
|
|
10 #
|
|
11 # Unless required by applicable law or agreed to in writing, software
|
|
12 # distributed under the License is distributed on an "AS IS" BASIS,
|
|
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14 # See the License for the specific language governing permissions and
|
|
15 # limitations under the License.
|
|
16 #
|
|
17 # vim: sw=2 sts=2 et
|
|
18
|
|
19 """This module can generate all strings that match a regular expression.
|
|
20
|
|
21 The regex is parsed using the SRE module that is standard in python,
|
|
22 then the data structure is executed to form a bunch of iterators.
|
|
23 """
|
|
24
|
|
25 __author__ = 'alexperry@google.com (Alex Perry)'
|
|
26 __all__ = ['Values', 'AllStrings', 'AllMatches', 'ParseError']
|
|
27
|
|
28
|
|
29 import bisect
|
|
30 import math
|
|
31 import re
|
|
32 import sre_constants
|
|
33 import sre_parse
|
|
34 import string
|
|
35 import sys
|
|
36 import types
|
|
37
|
|
38 import cachingseq
|
|
39 import fastdivmod
|
|
40
|
|
41 try:
|
|
42 xrange = xrange
|
|
43 except NameError:
|
|
44 xrange = range
|
|
45
|
|
46 _RE_METACHARS = r'$^{}*+\\'
|
|
47 _ESCAPED_METACHAR = r'\\[' + _RE_METACHARS + r']'
|
|
48 ESCAPED_METACHAR_RE = re.compile(_ESCAPED_METACHAR)
|
|
49 # ASCII by default, see https://github.com/google/sre_yield/issues/3
|
|
50 CHARSET = [chr(c) for c in range(256)]
|
|
51
|
|
52 WORD = string.ascii_letters + string.digits + '_'
|
|
53
|
|
54 try:
|
|
55 DEFAULT_RE_FLAGS = re.ASCII
|
|
56 except AttributeError:
|
|
57 DEFAULT_RE_FLAGS = 0
|
|
58
|
|
59 STATE_START, STATE_MIDDLE, STATE_END = list(range(3))
|
|
60
|
|
61 def Not(chars):
|
|
62 return ''.join(sorted(set(CHARSET) - set(chars)))
|
|
63
|
|
64
|
|
65 CATEGORIES = {
|
|
66 sre_constants.CATEGORY_WORD: WORD,
|
|
67 sre_constants.CATEGORY_NOT_WORD: Not(WORD),
|
|
68 sre_constants.CATEGORY_DIGIT: string.digits,
|
|
69 sre_constants.CATEGORY_NOT_DIGIT: Not(string.digits),
|
|
70 sre_constants.CATEGORY_SPACE: string.whitespace,
|
|
71 sre_constants.CATEGORY_NOT_SPACE: Not(string.whitespace),
|
|
72 }
|
|
73
|
|
74 # This constant varies between builds of Python; this is the lower value.
|
|
75 MAX_REPEAT_COUNT = 65535
|
|
76
|
|
77
|
|
78 class ParseError(Exception):
|
|
79 pass
|
|
80
|
|
81
|
|
82 def slice_indices(slice_obj, size):
|
|
83 """slice_obj.indices() except this one supports longs."""
|
|
84 # start stop step
|
|
85 start = slice_obj.start
|
|
86 stop = slice_obj.stop
|
|
87 step = slice_obj.step
|
|
88
|
|
89 # We don't always update a value for negative indices (if we wrote it here
|
|
90 # due to None).
|
|
91 if step is None:
|
|
92 step = 1
|
|
93 if start is None:
|
|
94 if step > 0:
|
|
95 start = 0
|
|
96 else:
|
|
97 start = size - 1
|
|
98 else:
|
|
99 start = _adjust_index(start, size)
|
|
100
|
|
101 if stop is None:
|
|
102 if step > 0:
|
|
103 stop = size
|
|
104 else:
|
|
105 stop = -1
|
|
106 else:
|
|
107 stop = _adjust_index(stop, size)
|
|
108
|
|
109 return (start, stop, step)
|
|
110
|
|
111
|
|
112 def _adjust_index(n, size):
|
|
113 if n < 0:
|
|
114 n += size
|
|
115
|
|
116 if n < 0:
|
|
117 raise IndexError("Out of range")
|
|
118 if n > size:
|
|
119 n = size
|
|
120 return n
|
|
121
|
|
122
|
|
123 def _xrange(*args):
|
|
124 """Because xrange doesn't support longs :("""
|
|
125 # prefer real xrange if it works
|
|
126 try:
|
|
127 return xrange(*args)
|
|
128 except OverflowError:
|
|
129 return _bigrange(*args)
|
|
130
|
|
131
|
|
132 def _bigrange(*args):
|
|
133 if len(args) == 1:
|
|
134 start = 0; stop = args[0]; step = 1
|
|
135 elif len(args) == 2:
|
|
136 start, stop = args
|
|
137 step = 1
|
|
138 elif len(args) == 3:
|
|
139 start, stop, step = args
|
|
140 else:
|
|
141 raise ValueError("Too many args for _bigrange")
|
|
142
|
|
143 i = start
|
|
144 while True:
|
|
145 yield i
|
|
146 i += step
|
|
147 if step < 0 and i <= stop:
|
|
148 break
|
|
149 if step > 0 and i >= stop:
|
|
150 break
|
|
151
|
|
152
|
|
153 class WrappedSequence(object):
|
|
154 """This wraps a sequence, purely as a base clase for the other uses."""
|
|
155
|
|
156 def __init__(self, raw):
|
|
157 # Derived classes will likely override this constructor
|
|
158 self.raw = raw
|
|
159 # Note that we can't use the function len() because it insists on trying
|
|
160 # to convert the returned number from a long-int to an ordinary int.
|
|
161 self.length = raw.__len__()
|
|
162
|
|
163 def get_item(self, i, d=None):
|
|
164 i = _adjust_index(i, self.length)
|
|
165 if hasattr(self.raw, 'get_item'):
|
|
166 return self.raw.get_item(i, d)
|
|
167 return self.raw[i]
|
|
168
|
|
169 def __len__(self):
|
|
170 return self.length
|
|
171
|
|
172 def __getitem__(self, i):
|
|
173 # If the user wanted a slice, we provide a wrapper
|
|
174 if isinstance(i, slice):
|
|
175 result = SlicedSequence(self, slicer=i)
|
|
176 if result.__len__() < 16:
|
|
177 # Short lists are unpacked
|
|
178 result = [item for item in result]
|
|
179 return result
|
|
180 i = _adjust_index(i, self.length)
|
|
181 # Usually we just call the user-provided function
|
|
182 return self.get_item(i)
|
|
183
|
|
184 def __iter__(self):
|
|
185 for i in _xrange(int(self.length)):
|
|
186 yield self.get_item(i)
|
|
187
|
|
188
|
|
189 def _sign(x):
|
|
190 if x > 0:
|
|
191 return 1
|
|
192 else:
|
|
193 return -1
|
|
194
|
|
195
|
|
196 class SlicedSequence(WrappedSequence):
|
|
197 """This is part of an immutable and potentially arbitrarily long list."""
|
|
198
|
|
199 def __init__(self, raw, slicer=None):
|
|
200 # Derived classes will likely override this constructor
|
|
201 self.raw = raw
|
|
202 if slicer is None:
|
|
203 self.start, self.stop, self.step = 0, raw.__len__(), 1
|
|
204 else:
|
|
205 self.start, self.stop, self.step = slice_indices(slicer, raw.__len__())
|
|
206
|
|
207 # Integer round up, depending on step direction
|
|
208 self.length = ((self.stop - self.start + self.step - _sign(self.step)) /
|
|
209 self.step)
|
|
210
|
|
211 def get_item(self, i, d=None):
|
|
212 j = i * self.step + self.start
|
|
213 return self.raw[j]
|
|
214
|
|
215
|
|
216 class ConcatenatedSequence(WrappedSequence):
|
|
217 """This is equivalent to using extend() but without unpacking the lists."""
|
|
218
|
|
219 def __init__(self, *alternatives):
|
|
220 self.list_lengths = [(a, a.__len__()) for a in alternatives]
|
|
221 self.length = sum(a_len for _, a_len in self.list_lengths)
|
|
222
|
|
223 def get_item(self, i, d=None):
|
|
224 for a, a_len in self.list_lengths:
|
|
225 if i < a_len:
|
|
226 return a[i]
|
|
227 i -= a_len
|
|
228 raise IndexError('Too Big')
|
|
229
|
|
230 def __contains__(self, item):
|
|
231 for a, _ in self.list_lengths:
|
|
232 if item in a:
|
|
233 return True
|
|
234 return False
|
|
235
|
|
236 def __repr__(self):
|
|
237 return '{concat ' + repr(self.list_lengths) + '}'
|
|
238
|
|
239
|
|
240 class CombinatoricsSequence(WrappedSequence):
|
|
241 """This uses all combinations of one item from each passed list."""
|
|
242
|
|
243 def __init__(self, *components):
|
|
244 self.list_lengths = [(a, a.__len__()) for a in components]
|
|
245 self.length = 1
|
|
246 for _, c_len in self.list_lengths:
|
|
247 self.length *= c_len
|
|
248
|
|
249 def get_item(self, i, d=None):
|
|
250 result = []
|
|
251 if i < 0:
|
|
252 i += self.length
|
|
253 if i < 0 or i >= self.length:
|
|
254 raise IndexError("Index %d out of bounds" % (i,))
|
|
255
|
|
256 if len(self.list_lengths) == 1:
|
|
257 # skip unnecessary ''.join -- big speedup
|
|
258 return self.list_lengths[0][0][i]
|
|
259
|
|
260 for c, c_len in self.list_lengths:
|
|
261 i, mod = divmod(i, c_len)
|
|
262 if hasattr(c, 'get_item'):
|
|
263 result.append(c.get_item(mod, d))
|
|
264 else:
|
|
265 result.append(c[mod])
|
|
266 return ''.join(result)
|
|
267
|
|
268 def __repr__(self):
|
|
269 return '{combin ' + repr(self.list_lengths) + '}'
|
|
270
|
|
271
|
|
272 class RepetitiveSequence(WrappedSequence):
|
|
273 """This chooses an entry from a list, many times, and concatenates."""
|
|
274
|
|
275 def __init__(self, content, lowest=1, highest=1):
|
|
276 self.content = content
|
|
277 self.content_length = content.__len__()
|
|
278 self.length = fastdivmod.powersum(self.content_length, lowest, highest)
|
|
279 self.lowest = lowest
|
|
280 self.highest = highest
|
|
281
|
|
282 def arbitrary_entry(i):
|
|
283 return (fastdivmod.powersum(self.content_length, lowest, i+lowest-1), i+lowest)
|
|
284
|
|
285 def entry_from_prev(i, prev):
|
|
286 return (prev[0] + (self.content_length ** prev[1]), prev[1] + 1)
|
|
287
|
|
288 self.offsets = cachingseq.CachingFuncSequence(
|
|
289 arbitrary_entry, highest - lowest+1, entry_from_prev)
|
|
290 # This needs to be a constant in order to reuse caclulations in future
|
|
291 # calls to bisect (a moving target will produce more misses).
|
|
292 if self.offsets[-1][0] > sys.maxsize:
|
|
293 i = 0
|
|
294 while i + 2 < len(self.offsets):
|
|
295 if self.offsets[i+1][0] > sys.maxsize:
|
|
296 self.index_of_offset = i
|
|
297 self.offset_break = self.offsets[i][0]
|
|
298 break
|
|
299 i += 1
|
|
300 else:
|
|
301 self.index_of_offset = len(self.offsets)
|
|
302 self.offset_break = sys.maxsize
|
|
303
|
|
304 def get_item(self, i, d=None):
|
|
305 """Finds out how many repeats this index implies, then picks strings."""
|
|
306 if i < self.offset_break:
|
|
307 by_bisect = bisect.bisect_left(self.offsets, (i, -1), hi=self.index_of_offset)
|
|
308 else:
|
|
309 by_bisect = bisect.bisect_left(self.offsets, (i, -1), lo=self.index_of_offset)
|
|
310
|
|
311 if by_bisect == len(self.offsets) or self.offsets[by_bisect][0] > i:
|
|
312 by_bisect -= 1
|
|
313
|
|
314 num = i - self.offsets[by_bisect][0]
|
|
315 count = self.offsets[by_bisect][1]
|
|
316
|
|
317 if count > 100 and self.content_length < 1000:
|
|
318 content = list(self.content)
|
|
319 else:
|
|
320 content = self.content
|
|
321
|
|
322 result = []
|
|
323
|
|
324 if count == 0:
|
|
325 return ''
|
|
326
|
|
327 for modulus in fastdivmod.divmod_iter(num, self.content_length):
|
|
328 result.append(content[modulus])
|
|
329
|
|
330 leftover = count - len(result)
|
|
331 if leftover:
|
|
332 assert leftover > 0
|
|
333 result.extend([content[0]] * leftover)
|
|
334
|
|
335 # smallest place value ends up on the right
|
|
336 return ''.join(result[::-1])
|
|
337
|
|
338 def __repr__(self):
|
|
339 return '{repeat base=%d low=%d high=%d}' % (self.content_length, self.lowest, self.highest)
|
|
340
|
|
341
|
|
342 class SaveCaptureGroup(WrappedSequence):
|
|
343 def __init__(self, parsed, key):
|
|
344 self.key = key
|
|
345 super(SaveCaptureGroup, self).__init__(parsed)
|
|
346
|
|
347 def get_item(self, n, d=None):
|
|
348 rv = super(SaveCaptureGroup, self).get_item(n, d)
|
|
349 if d is not None:
|
|
350 d[self.key] = rv
|
|
351 return rv
|
|
352
|
|
353
|
|
354 class ReadCaptureGroup(WrappedSequence):
|
|
355 def __init__(self, n):
|
|
356 self.num = n
|
|
357 self.length = 1
|
|
358
|
|
359 def get_item(self, i, d=None):
|
|
360 if i != 0:
|
|
361 raise IndexError(i)
|
|
362 if d is None:
|
|
363 raise ValueError('ReadCaptureGroup with no dict')
|
|
364 return d.get(self.num, "fail")
|
|
365
|
|
366
|
|
367 class RegexMembershipSequence(WrappedSequence):
|
|
368 """Creates a sequence from the regex, knows how to test membership."""
|
|
369
|
|
370 def empty_list(self, *_):
|
|
371 return []
|
|
372
|
|
373 def nothing_added(self, *_):
|
|
374 return ['']
|
|
375
|
|
376 def branch_values(self, _, items):
|
|
377 """Converts SRE parser data into literals and merges those lists."""
|
|
378 return ConcatenatedSequence(
|
|
379 *[self.sub_values(parsed) for parsed in items])
|
|
380
|
|
381 def max_repeat_values(self, min_count, max_count, items):
|
|
382 """Sequential expansion of the count to be combinatorics."""
|
|
383 max_count = min(max_count, self.max_count)
|
|
384 return RepetitiveSequence(
|
|
385 self.sub_values(items), min_count, max_count)
|
|
386
|
|
387 def in_values(self, items):
|
|
388 # Special case which distinguishes branch from charset operator
|
|
389 if items and items[0][0] == sre_constants.NEGATE:
|
|
390 items = self.branch_values(None, items[1:])
|
|
391 return [item for item in self.charset if item not in items]
|
|
392 return self.branch_values(None, items)
|
|
393
|
|
394 def not_literal(self, y):
|
|
395 return self.in_values(((sre_constants.NEGATE,),
|
|
396 (sre_constants.LITERAL, y),))
|
|
397
|
|
398 def category(self, y):
|
|
399 return CATEGORIES[y]
|
|
400
|
|
401 def groupref(self, n):
|
|
402 self.has_groupref = True
|
|
403 return ReadCaptureGroup(n)
|
|
404
|
|
405 def get_item(self, i, d=None):
|
|
406 """Typically only pass i. d is an internal detail, for consistency with other classes.
|
|
407
|
|
408 If you care about the capture groups, you should use
|
|
409 RegexMembershipSequenceMatches instead, which returns a Match object
|
|
410 instead of a string."""
|
|
411 if self.has_groupref or d is not None:
|
|
412 if d is None:
|
|
413 d = {}
|
|
414 return super(RegexMembershipSequence, self).get_item(i, d)
|
|
415 else:
|
|
416 return super(RegexMembershipSequence, self).get_item(i)
|
|
417
|
|
418 def sub_values(self, parsed):
|
|
419 """This knows how to convert one piece of parsed pattern."""
|
|
420 # If this is a subpattern object, we just want its data
|
|
421 if isinstance(parsed, sre_parse.SubPattern):
|
|
422 parsed = parsed.data
|
|
423 # A list indicates sequential elements of a string
|
|
424 if isinstance(parsed, list):
|
|
425 elements = [self.sub_values(p) for p in parsed]
|
|
426 return CombinatoricsSequence(*elements)
|
|
427 # If not a list, a tuple represents a specific match type
|
|
428 if isinstance(parsed, tuple) and parsed:
|
|
429 matcher, arguments = parsed
|
|
430 if not isinstance(arguments, tuple):
|
|
431 arguments = (arguments,)
|
|
432 if matcher in self.backends:
|
|
433 self.check_anchor_state(matcher, arguments)
|
|
434 return self.backends[matcher](*arguments)
|
|
435 # No idea what to do here
|
|
436 raise ParseError(repr(parsed))
|
|
437
|
|
438 def maybe_save(self, *args):
|
|
439 # Python 3.6 has group, add_flags, del_flags, parsed
|
|
440 # while earlier versions just have group, parsed
|
|
441 group = args[0]
|
|
442 parsed = args[-1]
|
|
443 rv = self.sub_values(parsed)
|
|
444 if group is not None:
|
|
445 rv = SaveCaptureGroup(rv, group)
|
|
446 return rv
|
|
447
|
|
448 def check_anchor_state(self, matcher, arguments):
|
|
449 # A bit of a hack to support zero-width leading anchors. The goal is
|
|
450 # that /^(a|b)$/ will match properly, and that /a^b/ or /a\bb/ throws
|
|
451 # an error. (It's unfortunate that I couldn't easily handle /$^/ which
|
|
452 # matches the empty string; I went for the common case.)
|
|
453 #
|
|
454 # There are three states, for example:
|
|
455 # / STATE_START
|
|
456 # | / STATE_START (^ causes no transition here, but is illegal at STATE_MIDDLE or STATE_END)
|
|
457 # | | / STATE_START (\b causes no transition here, but advances MIDDLE to END)
|
|
458 # | | | / (same as above for ^)
|
|
459 # | | | | / STATE_MIDDLE (anything besides ^ and \b advances START to MIDDLE)
|
|
460 # | | | | | / still STATE_MIDDLE
|
|
461 # . . . . . . / advances MIDDLE to END
|
|
462 # ^ \b ^ X Y \b $
|
|
463 old_state = self.state
|
|
464 if self.state == STATE_START:
|
|
465 if matcher == sre_constants.AT:
|
|
466 if arguments[0] in (sre_constants.AT_END, sre_constants.AT_END_STRING):
|
|
467 self.state = STATE_END
|
|
468 elif arguments[0] == sre_constants.AT_NON_BOUNDARY:
|
|
469 # This is nonsensical at beginning of string
|
|
470 raise ParseError('Anchor %r found at START state' % (arguments[0],))
|
|
471 # All others (AT_BEGINNING, AT_BEGINNING_STRING, and AT_BOUNDARY) remain in START.
|
|
472 elif matcher != sre_constants.SUBPATTERN:
|
|
473 self.state = STATE_MIDDLE
|
|
474 # subpattern remains in START
|
|
475 elif self.state == STATE_END:
|
|
476 if matcher == sre_constants.AT:
|
|
477 if arguments[0] not in (
|
|
478 sre_constants.AT_END, sre_constants.AT_END_STRING,
|
|
479 sre_constants.AT_BOUNDARY):
|
|
480 raise ParseError('Anchor %r found at END state' % (arguments[0],))
|
|
481 # those three remain in END
|
|
482 elif matcher != sre_constants.SUBPATTERN:
|
|
483 raise ParseError('Non-end-anchor %r found at END state' % (arguments[0],))
|
|
484 # subpattern remains in END
|
|
485 else: # self.state == STATE_MIDDLE
|
|
486 if matcher == sre_constants.AT:
|
|
487 if arguments[0] not in (
|
|
488 sre_constants.AT_END, sre_constants.AT_END_STRING,
|
|
489 sre_constants.AT_BOUNDARY):
|
|
490 raise ParseError('Anchor %r found at MIDDLE state' % (arguments[0],))
|
|
491 # All others (AT_END, AT_END_STRING, AT_BOUNDARY) advance to END.
|
|
492 self.state = STATE_END
|
|
493
|
|
494 def __init__(self, pattern, flags=0, charset=CHARSET, max_count=None):
|
|
495 # If the RE module cannot compile it, we give up quickly
|
|
496 self.matcher = re.compile(r'(?:%s)\Z' % pattern, flags)
|
|
497 if not flags & re.DOTALL:
|
|
498 charset = ''.join(c for c in charset if c != '\n')
|
|
499 self.charset = charset
|
|
500
|
|
501 self.named_group_lookup = self.matcher.groupindex
|
|
502
|
|
503 flags |= DEFAULT_RE_FLAGS # https://github.com/google/sre_yield/issues/3
|
|
504 if flags & re.IGNORECASE:
|
|
505 raise ParseError('Flag "i" not supported. https://github.com/google/sre_yield/issues/4')
|
|
506 elif flags & re.UNICODE:
|
|
507 raise ParseError('Flag "u" not supported. https://github.com/google/sre_yield/issues/3')
|
|
508 elif flags & re.LOCALE:
|
|
509 raise ParseError('Flag "l" not supported. https://github.com/google/sre_yield/issues/5')
|
|
510
|
|
511 if max_count is None:
|
|
512 self.max_count = MAX_REPEAT_COUNT
|
|
513 else:
|
|
514 self.max_count = max_count
|
|
515
|
|
516 self.has_groupref = False
|
|
517
|
|
518 # Configure the parser backends
|
|
519 self.backends = {
|
|
520 sre_constants.LITERAL: lambda y: [chr(y)],
|
|
521 sre_constants.RANGE: lambda l, h: [chr(c) for c in range(l, h+1)],
|
|
522 sre_constants.SUBPATTERN: self.maybe_save,
|
|
523 sre_constants.BRANCH: self.branch_values,
|
|
524 sre_constants.MIN_REPEAT: self.max_repeat_values,
|
|
525 sre_constants.MAX_REPEAT: self.max_repeat_values,
|
|
526 sre_constants.AT: self.nothing_added,
|
|
527 sre_constants.ASSERT: self.empty_list,
|
|
528 sre_constants.ASSERT_NOT: self.empty_list,
|
|
529 sre_constants.ANY:
|
|
530 lambda _: self.in_values(((sre_constants.NEGATE,),)),
|
|
531 sre_constants.IN: self.in_values,
|
|
532 sre_constants.NOT_LITERAL: self.not_literal,
|
|
533 sre_constants.CATEGORY: self.category,
|
|
534 sre_constants.GROUPREF: self.groupref,
|
|
535 }
|
|
536 self.state = STATE_START
|
|
537 # Now build a generator that knows all possible patterns
|
|
538 self.raw = self.sub_values(sre_parse.parse(pattern, flags))
|
|
539 # Configure this class instance to know about that result
|
|
540 self.length = self.raw.__len__()
|
|
541
|
|
542 def __contains__(self, item):
|
|
543 # Since we have a regex, we can search the list really cheaply
|
|
544 return self.matcher.match(item) is not None
|
|
545
|
|
546
|
|
547 class RegexMembershipSequenceMatches(RegexMembershipSequence):
|
|
548 def __getitem__(self, i):
|
|
549 if isinstance(i, slice):
|
|
550 result = SlicedSequence(self, slicer=i)
|
|
551 if result.__len__() < 16:
|
|
552 # Short lists are unpacked
|
|
553 result = [item for item in result]
|
|
554 return result
|
|
555
|
|
556 d = {}
|
|
557 s = super(RegexMembershipSequenceMatches, self).get_item(i, d)
|
|
558 return Match(s, d, self.named_group_lookup)
|
|
559
|
|
560
|
|
561 def AllStrings(regex, flags=0, charset=CHARSET, max_count=None):
|
|
562 """Constructs an object that will generate all matching strings."""
|
|
563 return RegexMembershipSequence(regex, flags, charset, max_count=max_count)
|
|
564
|
|
565 Values = AllStrings
|
|
566
|
|
567
|
|
568 class Match(object):
|
|
569 def __init__(self, string, groups, named_groups):
|
|
570 # TODO keep group(0) only, and spans for the rest.
|
|
571 self._string = string
|
|
572 self._groups = groups
|
|
573 self._named_groups = named_groups
|
|
574 self.lastindex = len(groups) + 1
|
|
575
|
|
576 def group(self, n=0):
|
|
577 if n == 0:
|
|
578 return self._string
|
|
579 if not isinstance(n, int):
|
|
580 n = self._named_groups[n]
|
|
581 return self._groups[n]
|
|
582
|
|
583 def groups(self):
|
|
584 return tuple(self._groups[i] for i in range(1, self.lastindex))
|
|
585
|
|
586 def groupdict(self):
|
|
587 d = {}
|
|
588 for k, v in self._named_groups.items():
|
|
589 d[k] = self._groups[v]
|
|
590 return d
|
|
591
|
|
592 def span(self, n=0):
|
|
593 raise NotImplementedError()
|
|
594
|
|
595
|
|
596 def AllMatches(regex, flags=0, charset=CHARSET, max_count=None):
|
|
597 """Constructs an object that will generate all matching strings."""
|
|
598 return RegexMembershipSequenceMatches(regex, flags, charset, max_count=max_count)
|
|
599
|
|
600
|
|
601 def main(argv=None):
|
|
602 """This module can be executed on the command line for testing."""
|
|
603 if argv is None:
|
|
604 argv = sys.argv
|
|
605 for arg in argv[1:]:
|
|
606 for i in AllStrings(arg):
|
|
607 print(i)
|
|
608
|
|
609
|
|
610 if __name__ == '__main__':
|
|
611 main()
|