comparison env/lib/python3.9/site-packages/docutils/statemachine.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # $Id: statemachine.py 8435 2019-12-12 13:04:57Z milde $
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
4
5 """
6 A finite state machine specialized for regular-expression-based text filters,
7 this module defines the following classes:
8
9 - `StateMachine`, a state machine
10 - `State`, a state superclass
11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
12 - `StateWS`, a state superclass for use with `StateMachineWS`
13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
15 - `ViewList`, extends standard Python lists.
16 - `StringList`, string-specific ViewList.
17
18 Exception classes:
19
20 - `StateMachineError`
21 - `UnknownStateError`
22 - `DuplicateStateError`
23 - `UnknownTransitionError`
24 - `DuplicateTransitionError`
25 - `TransitionPatternNotFound`
26 - `TransitionMethodNotFound`
27 - `UnexpectedIndentationError`
28 - `TransitionCorrection`: Raised to switch to another transition.
29 - `StateCorrection`: Raised to switch to another state & transition.
30
31 Functions:
32
33 - `string2lines()`: split a multi-line string into a list of one-line strings
34
35
36 How To Use This Module
37 ======================
38 (See the individual classes, methods, and attributes for details.)
39
40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
41 You will also need to ``import re``.
42
43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
44 machine::
45
46 class MyState(statemachine.State):
47
48 Within the state's class definition:
49
50 a) Include a pattern for each transition, in `State.patterns`::
51
52 patterns = {'atransition': r'pattern', ...}
53
54 b) Include a list of initial transitions to be set up automatically, in
55 `State.initial_transitions`::
56
57 initial_transitions = ['atransition', ...]
58
59 c) Define a method for each transition, with the same name as the
60 transition pattern::
61
62 def atransition(self, match, context, next_state):
63 # do something
64 result = [...] # a list
65 return context, next_state, result
66 # context, next_state may be altered
67
68 Transition methods may raise an `EOFError` to cut processing short.
69
70 d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
71 transition methods, which handle the beginning- and end-of-file.
72
73 e) In order to handle nested processing, you may wish to override the
74 attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
75
76 If you are using `StateWS` as a base class, in order to handle nested
77 indented blocks, you may wish to:
78
79 - override the attributes `StateWS.indent_sm`,
80 `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
81 `StateWS.known_indent_sm_kwargs`;
82 - override the `StateWS.blank()` method; and/or
83 - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
84 and/or `StateWS.firstknown_indent()` methods.
85
86 3. Create a state machine object::
87
88 sm = StateMachine(state_classes=[MyState, ...],
89 initial_state='MyState')
90
91 4. Obtain the input text, which needs to be converted into a tab-free list of
92 one-line strings. For example, to read text from a file called
93 'inputfile'::
94
95 input_string = open('inputfile').read()
96 input_lines = statemachine.string2lines(input_string)
97
98 5. Run the state machine on the input text and collect the results, a list::
99
100 results = sm.run(input_lines)
101
102 6. Remove any lingering circular references::
103
104 sm.unlink()
105 """
106 from __future__ import print_function
107
108 __docformat__ = 'restructuredtext'
109
110 import sys
111 import re
112 import unicodedata
113 from docutils import utils
114 from docutils.utils.error_reporting import ErrorOutput
115
116 if sys.version_info >= (3, 0):
117 unicode = str # noqa
118
119
120 class StateMachine(object):
121
122 """
123 A finite state machine for text filters using regular expressions.
124
125 The input is provided in the form of a list of one-line strings (no
126 newlines). States are subclasses of the `State` class. Transitions consist
127 of regular expression patterns and transition methods, and are defined in
128 each state.
129
130 The state machine is started with the `run()` method, which returns the
131 results of processing in a list.
132 """
133
134 def __init__(self, state_classes, initial_state, debug=False):
135 """
136 Initialize a `StateMachine` object; add state objects.
137
138 Parameters:
139
140 - `state_classes`: a list of `State` (sub)classes.
141 - `initial_state`: a string, the class name of the initial state.
142 - `debug`: a boolean; produce verbose output if true (nonzero).
143 """
144
145 self.input_lines = None
146 """`StringList` of input lines (without newlines).
147 Filled by `self.run()`."""
148
149 self.input_offset = 0
150 """Offset of `self.input_lines` from the beginning of the file."""
151
152 self.line = None
153 """Current input line."""
154
155 self.line_offset = -1
156 """Current input line offset from beginning of `self.input_lines`."""
157
158 self.debug = debug
159 """Debugging mode on/off."""
160
161 self.initial_state = initial_state
162 """The name of the initial state (key to `self.states`)."""
163
164 self.current_state = initial_state
165 """The name of the current state (key to `self.states`)."""
166
167 self.states = {}
168 """Mapping of {state_name: State_object}."""
169
170 self.add_states(state_classes)
171
172 self.observers = []
173 """List of bound methods or functions to call whenever the current
174 line changes. Observers are called with one argument, ``self``.
175 Cleared at the end of `run()`."""
176
177 self._stderr = ErrorOutput()
178 """Wrapper around sys.stderr catching en-/decoding errors"""
179
180
181 def unlink(self):
182 """Remove circular references to objects no longer required."""
183 for state in self.states.values():
184 state.unlink()
185 self.states = None
186
187 def run(self, input_lines, input_offset=0, context=None,
188 input_source=None, initial_state=None):
189 """
190 Run the state machine on `input_lines`. Return results (a list).
191
192 Reset `self.line_offset` and `self.current_state`. Run the
193 beginning-of-file transition. Input one line at a time and check for a
194 matching transition. If a match is found, call the transition method
195 and possibly change the state. Store the context returned by the
196 transition method to be passed on to the next transition matched.
197 Accumulate the results returned by the transition methods in a list.
198 Run the end-of-file transition. Finally, return the accumulated
199 results.
200
201 Parameters:
202
203 - `input_lines`: a list of strings without newlines, or `StringList`.
204 - `input_offset`: the line offset of `input_lines` from the beginning
205 of the file.
206 - `context`: application-specific storage.
207 - `input_source`: name or path of source of `input_lines`.
208 - `initial_state`: name of initial state.
209 """
210 self.runtime_init()
211 if isinstance(input_lines, StringList):
212 self.input_lines = input_lines
213 else:
214 self.input_lines = StringList(input_lines, source=input_source)
215 self.input_offset = input_offset
216 self.line_offset = -1
217 self.current_state = initial_state or self.initial_state
218 if self.debug:
219 print((
220 u'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
221 % (self.line_offset, u'\n| '.join(self.input_lines))), file=self._stderr)
222 transitions = None
223 results = []
224 state = self.get_state()
225 try:
226 if self.debug:
227 print('\nStateMachine.run: bof transition', file=self._stderr)
228 context, result = state.bof(context)
229 results.extend(result)
230 while True:
231 try:
232 try:
233 self.next_line()
234 if self.debug:
235 source, offset = self.input_lines.info(
236 self.line_offset)
237 print((
238 u'\nStateMachine.run: line (source=%r, '
239 u'offset=%r):\n| %s'
240 % (source, offset, self.line)), file=self._stderr)
241 context, next_state, result = self.check_line(
242 context, state, transitions)
243 except EOFError:
244 if self.debug:
245 print((
246 '\nStateMachine.run: %s.eof transition'
247 % state.__class__.__name__), file=self._stderr)
248 result = state.eof(context)
249 results.extend(result)
250 break
251 else:
252 results.extend(result)
253 except TransitionCorrection as exception:
254 self.previous_line() # back up for another try
255 transitions = (exception.args[0],)
256 if self.debug:
257 print((
258 '\nStateMachine.run: TransitionCorrection to '
259 'state "%s", transition %s.'
260 % (state.__class__.__name__, transitions[0])), file=self._stderr)
261 continue
262 except StateCorrection as exception:
263 self.previous_line() # back up for another try
264 next_state = exception.args[0]
265 if len(exception.args) == 1:
266 transitions = None
267 else:
268 transitions = (exception.args[1],)
269 if self.debug:
270 print((
271 '\nStateMachine.run: StateCorrection to state '
272 '"%s", transition %s.'
273 % (next_state, transitions[0])), file=self._stderr)
274 else:
275 transitions = None
276 state = self.get_state(next_state)
277 except:
278 if self.debug:
279 self.error()
280 raise
281 self.observers = []
282 return results
283
284 def get_state(self, next_state=None):
285 """
286 Return current state object; set it first if `next_state` given.
287
288 Parameter `next_state`: a string, the name of the next state.
289
290 Exception: `UnknownStateError` raised if `next_state` unknown.
291 """
292 if next_state:
293 if self.debug and next_state != self.current_state:
294 print((
295 '\nStateMachine.get_state: Changing state from '
296 '"%s" to "%s" (input line %s).'
297 % (self.current_state, next_state,
298 self.abs_line_number())), file=self._stderr)
299 self.current_state = next_state
300 try:
301 return self.states[self.current_state]
302 except KeyError:
303 raise UnknownStateError(self.current_state)
304
305 def next_line(self, n=1):
306 """Load `self.line` with the `n`'th next line and return it."""
307 try:
308 try:
309 self.line_offset += n
310 self.line = self.input_lines[self.line_offset]
311 except IndexError:
312 self.line = None
313 raise EOFError
314 return self.line
315 finally:
316 self.notify_observers()
317
318 def is_next_line_blank(self):
319 """Return 1 if the next line is blank or non-existant."""
320 try:
321 return not self.input_lines[self.line_offset + 1].strip()
322 except IndexError:
323 return 1
324
325 def at_eof(self):
326 """Return 1 if the input is at or past end-of-file."""
327 return self.line_offset >= len(self.input_lines) - 1
328
329 def at_bof(self):
330 """Return 1 if the input is at or before beginning-of-file."""
331 return self.line_offset <= 0
332
333 def previous_line(self, n=1):
334 """Load `self.line` with the `n`'th previous line and return it."""
335 self.line_offset -= n
336 if self.line_offset < 0:
337 self.line = None
338 else:
339 self.line = self.input_lines[self.line_offset]
340 self.notify_observers()
341 return self.line
342
343 def goto_line(self, line_offset):
344 """Jump to absolute line offset `line_offset`, load and return it."""
345 try:
346 try:
347 self.line_offset = line_offset - self.input_offset
348 self.line = self.input_lines[self.line_offset]
349 except IndexError:
350 self.line = None
351 raise EOFError
352 return self.line
353 finally:
354 self.notify_observers()
355
356 def get_source(self, line_offset):
357 """Return source of line at absolute line offset `line_offset`."""
358 return self.input_lines.source(line_offset - self.input_offset)
359
360 def abs_line_offset(self):
361 """Return line offset of current line, from beginning of file."""
362 return self.line_offset + self.input_offset
363
364 def abs_line_number(self):
365 """Return line number of current line (counting from 1)."""
366 return self.line_offset + self.input_offset + 1
367
368 def get_source_and_line(self, lineno=None):
369 """Return (source, line) tuple for current or given line number.
370
371 Looks up the source and line number in the `self.input_lines`
372 StringList instance to count for included source files.
373
374 If the optional argument `lineno` is given, convert it from an
375 absolute line number to the corresponding (source, line) pair.
376 """
377 if lineno is None:
378 offset = self.line_offset
379 else:
380 offset = lineno - self.input_offset - 1
381 try:
382 src, srcoffset = self.input_lines.info(offset)
383 srcline = srcoffset + 1
384 except (TypeError):
385 # line is None if index is "Just past the end"
386 src, srcline = self.get_source_and_line(offset + self.input_offset)
387 return src, srcline + 1
388 except (IndexError): # `offset` is off the list
389 src, srcline = None, None
390 # raise AssertionError('cannot find line %d in %s lines' %
391 # (offset, len(self.input_lines)))
392 # # list(self.input_lines.lines())))
393 return (src, srcline)
394
395 def insert_input(self, input_lines, source):
396 self.input_lines.insert(self.line_offset + 1, '',
397 source='internal padding after '+source,
398 offset=len(input_lines))
399 self.input_lines.insert(self.line_offset + 1, '',
400 source='internal padding before '+source,
401 offset=-1)
402 self.input_lines.insert(self.line_offset + 2,
403 StringList(input_lines, source))
404
405 def get_text_block(self, flush_left=False):
406 """
407 Return a contiguous block of text.
408
409 If `flush_left` is true, raise `UnexpectedIndentationError` if an
410 indented line is encountered before the text block ends (with a blank
411 line).
412 """
413 try:
414 block = self.input_lines.get_text_block(self.line_offset,
415 flush_left)
416 self.next_line(len(block) - 1)
417 return block
418 except UnexpectedIndentationError as err:
419 block = err.args[0]
420 self.next_line(len(block) - 1) # advance to last line of block
421 raise
422
423 def check_line(self, context, state, transitions=None):
424 """
425 Examine one line of input for a transition match & execute its method.
426
427 Parameters:
428
429 - `context`: application-dependent storage.
430 - `state`: a `State` object, the current state.
431 - `transitions`: an optional ordered list of transition names to try,
432 instead of ``state.transition_order``.
433
434 Return the values returned by the transition method:
435
436 - context: possibly modified from the parameter `context`;
437 - next state name (`State` subclass name);
438 - the result output of the transition, a list.
439
440 When there is no match, ``state.no_match()`` is called and its return
441 value is returned.
442 """
443 if transitions is None:
444 transitions = state.transition_order
445 state_correction = None
446 if self.debug:
447 print((
448 '\nStateMachine.check_line: state="%s", transitions=%r.'
449 % (state.__class__.__name__, transitions)), file=self._stderr)
450 for name in transitions:
451 pattern, method, next_state = state.transitions[name]
452 match = pattern.match(self.line)
453 if match:
454 if self.debug:
455 print((
456 '\nStateMachine.check_line: Matched transition '
457 '"%s" in state "%s".'
458 % (name, state.__class__.__name__)), file=self._stderr)
459 return method(match, context, next_state)
460 else:
461 if self.debug:
462 print((
463 '\nStateMachine.check_line: No match in state "%s".'
464 % state.__class__.__name__), file=self._stderr)
465 return state.no_match(context, transitions)
466
467 def add_state(self, state_class):
468 """
469 Initialize & add a `state_class` (`State` subclass) object.
470
471 Exception: `DuplicateStateError` raised if `state_class` was already
472 added.
473 """
474 statename = state_class.__name__
475 if statename in self.states:
476 raise DuplicateStateError(statename)
477 self.states[statename] = state_class(self, self.debug)
478
479 def add_states(self, state_classes):
480 """
481 Add `state_classes` (a list of `State` subclasses).
482 """
483 for state_class in state_classes:
484 self.add_state(state_class)
485
486 def runtime_init(self):
487 """
488 Initialize `self.states`.
489 """
490 for state in self.states.values():
491 state.runtime_init()
492
493 def error(self):
494 """Report error details."""
495 type, value, module, line, function = _exception_data()
496 print(u'%s: %s' % (type, value), file=self._stderr)
497 print('input line %s' % (self.abs_line_number()), file=self._stderr)
498 print((u'module %s, line %s, function %s' %
499 (module, line, function)), file=self._stderr)
500
501 def attach_observer(self, observer):
502 """
503 The `observer` parameter is a function or bound method which takes two
504 arguments, the source and offset of the current line.
505 """
506 self.observers.append(observer)
507
508 def detach_observer(self, observer):
509 self.observers.remove(observer)
510
511 def notify_observers(self):
512 for observer in self.observers:
513 try:
514 info = self.input_lines.info(self.line_offset)
515 except IndexError:
516 info = (None, None)
517 observer(*info)
518
519
520 class State(object):
521
522 """
523 State superclass. Contains a list of transitions, and transition methods.
524
525 Transition methods all have the same signature. They take 3 parameters:
526
527 - An `re` match object. ``match.string`` contains the matched input line,
528 ``match.start()`` gives the start index of the match, and
529 ``match.end()`` gives the end index.
530 - A context object, whose meaning is application-defined (initial value
531 ``None``). It can be used to store any information required by the state
532 machine, and the retured context is passed on to the next transition
533 method unchanged.
534 - The name of the next state, a string, taken from the transitions list;
535 normally it is returned unchanged, but it may be altered by the
536 transition method if necessary.
537
538 Transition methods all return a 3-tuple:
539
540 - A context object, as (potentially) modified by the transition method.
541 - The next state name (a return value of ``None`` means no state change).
542 - The processing result, a list, which is accumulated by the state
543 machine.
544
545 Transition methods may raise an `EOFError` to cut processing short.
546
547 There are two implicit transitions, and corresponding transition methods
548 are defined: `bof()` handles the beginning-of-file, and `eof()` handles
549 the end-of-file. These methods have non-standard signatures and return
550 values. `bof()` returns the initial context and results, and may be used
551 to return a header string, or do any other processing needed. `eof()`
552 should handle any remaining context and wrap things up; it returns the
553 final processing result.
554
555 Typical applications need only subclass `State` (or a subclass), set the
556 `patterns` and `initial_transitions` class attributes, and provide
557 corresponding transition methods. The default object initialization will
558 take care of constructing the list of transitions.
559 """
560
561 patterns = None
562 """
563 {Name: pattern} mapping, used by `make_transition()`. Each pattern may
564 be a string or a compiled `re` pattern. Override in subclasses.
565 """
566
567 initial_transitions = None
568 """
569 A list of transitions to initialize when a `State` is instantiated.
570 Each entry is either a transition name string, or a (transition name, next
571 state name) pair. See `make_transitions()`. Override in subclasses.
572 """
573
574 nested_sm = None
575 """
576 The `StateMachine` class for handling nested processing.
577
578 If left as ``None``, `nested_sm` defaults to the class of the state's
579 controlling state machine. Override it in subclasses to avoid the default.
580 """
581
582 nested_sm_kwargs = None
583 """
584 Keyword arguments dictionary, passed to the `nested_sm` constructor.
585
586 Two keys must have entries in the dictionary:
587
588 - Key 'state_classes' must be set to a list of `State` classes.
589 - Key 'initial_state' must be set to the name of the initial state class.
590
591 If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
592 class of the current state, and 'initial_state' defaults to the name of
593 the class of the current state. Override in subclasses to avoid the
594 defaults.
595 """
596
597 def __init__(self, state_machine, debug=False):
598 """
599 Initialize a `State` object; make & add initial transitions.
600
601 Parameters:
602
603 - `statemachine`: the controlling `StateMachine` object.
604 - `debug`: a boolean; produce verbose output if true.
605 """
606
607 self.transition_order = []
608 """A list of transition names in search order."""
609
610 self.transitions = {}
611 """
612 A mapping of transition names to 3-tuples containing
613 (compiled_pattern, transition_method, next_state_name). Initialized as
614 an instance attribute dynamically (instead of as a class attribute)
615 because it may make forward references to patterns and methods in this
616 or other classes.
617 """
618
619 self.add_initial_transitions()
620
621 self.state_machine = state_machine
622 """A reference to the controlling `StateMachine` object."""
623
624 self.debug = debug
625 """Debugging mode on/off."""
626
627 if self.nested_sm is None:
628 self.nested_sm = self.state_machine.__class__
629 if self.nested_sm_kwargs is None:
630 self.nested_sm_kwargs = {'state_classes': [self.__class__],
631 'initial_state': self.__class__.__name__}
632
633 def runtime_init(self):
634 """
635 Initialize this `State` before running the state machine; called from
636 `self.state_machine.run()`.
637 """
638 pass
639
640 def unlink(self):
641 """Remove circular references to objects no longer required."""
642 self.state_machine = None
643
644 def add_initial_transitions(self):
645 """Make and add transitions listed in `self.initial_transitions`."""
646 if self.initial_transitions:
647 names, transitions = self.make_transitions(
648 self.initial_transitions)
649 self.add_transitions(names, transitions)
650
651 def add_transitions(self, names, transitions):
652 """
653 Add a list of transitions to the start of the transition list.
654
655 Parameters:
656
657 - `names`: a list of transition names.
658 - `transitions`: a mapping of names to transition tuples.
659
660 Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
661 """
662 for name in names:
663 if name in self.transitions:
664 raise DuplicateTransitionError(name)
665 if name not in transitions:
666 raise UnknownTransitionError(name)
667 self.transition_order[:0] = names
668 self.transitions.update(transitions)
669
670 def add_transition(self, name, transition):
671 """
672 Add a transition to the start of the transition list.
673
674 Parameter `transition`: a ready-made transition 3-tuple.
675
676 Exception: `DuplicateTransitionError`.
677 """
678 if name in self.transitions:
679 raise DuplicateTransitionError(name)
680 self.transition_order[:0] = [name]
681 self.transitions[name] = transition
682
683 def remove_transition(self, name):
684 """
685 Remove a transition by `name`.
686
687 Exception: `UnknownTransitionError`.
688 """
689 try:
690 del self.transitions[name]
691 self.transition_order.remove(name)
692 except:
693 raise UnknownTransitionError(name)
694
695 def make_transition(self, name, next_state=None):
696 """
697 Make & return a transition tuple based on `name`.
698
699 This is a convenience function to simplify transition creation.
700
701 Parameters:
702
703 - `name`: a string, the name of the transition pattern & method. This
704 `State` object must have a method called '`name`', and a dictionary
705 `self.patterns` containing a key '`name`'.
706 - `next_state`: a string, the name of the next `State` object for this
707 transition. A value of ``None`` (or absent) implies no state change
708 (i.e., continue with the same state).
709
710 Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
711 """
712 if next_state is None:
713 next_state = self.__class__.__name__
714 try:
715 pattern = self.patterns[name]
716 if not hasattr(pattern, 'match'):
717 pattern = self.patterns[name] = re.compile(pattern)
718 except KeyError:
719 raise TransitionPatternNotFound(
720 '%s.patterns[%r]' % (self.__class__.__name__, name))
721 try:
722 method = getattr(self, name)
723 except AttributeError:
724 raise TransitionMethodNotFound(
725 '%s.%s' % (self.__class__.__name__, name))
726 return (pattern, method, next_state)
727
728 def make_transitions(self, name_list):
729 """
730 Return a list of transition names and a transition mapping.
731
732 Parameter `name_list`: a list, where each entry is either a transition
733 name string, or a 1- or 2-tuple (transition name, optional next state
734 name).
735 """
736 stringtype = type('')
737 names = []
738 transitions = {}
739 for namestate in name_list:
740 if isinstance(namestate, stringtype):
741 transitions[namestate] = self.make_transition(namestate)
742 names.append(namestate)
743 else:
744 transitions[namestate[0]] = self.make_transition(*namestate)
745 names.append(namestate[0])
746 return names, transitions
747
748 def no_match(self, context, transitions):
749 """
750 Called when there is no match from `StateMachine.check_line()`.
751
752 Return the same values returned by transition methods:
753
754 - context: unchanged;
755 - next state name: ``None``;
756 - empty result list.
757
758 Override in subclasses to catch this event.
759 """
760 return context, None, []
761
762 def bof(self, context):
763 """
764 Handle beginning-of-file. Return unchanged `context`, empty result.
765
766 Override in subclasses.
767
768 Parameter `context`: application-defined storage.
769 """
770 return context, []
771
772 def eof(self, context):
773 """
774 Handle end-of-file. Return empty result.
775
776 Override in subclasses.
777
778 Parameter `context`: application-defined storage.
779 """
780 return []
781
782 def nop(self, match, context, next_state):
783 """
784 A "do nothing" transition method.
785
786 Return unchanged `context` & `next_state`, empty result. Useful for
787 simple state changes (actionless transitions).
788 """
789 return context, next_state, []
790
791
792 class StateMachineWS(StateMachine):
793
794 """
795 `StateMachine` subclass specialized for whitespace recognition.
796
797 There are three methods provided for extracting indented text blocks:
798
799 - `get_indented()`: use when the indent is unknown.
800 - `get_known_indented()`: use when the indent is known for all lines.
801 - `get_first_known_indented()`: use when only the first line's indent is
802 known.
803 """
804
805 def get_indented(self, until_blank=False, strip_indent=True):
806 """
807 Return a block of indented lines of text, and info.
808
809 Extract an indented block where the indent is unknown for all lines.
810
811 :Parameters:
812 - `until_blank`: Stop collecting at the first blank line if true.
813 - `strip_indent`: Strip common leading indent if true (default).
814
815 :Return:
816 - the indented block (a list of lines of text),
817 - its indent,
818 - its first line offset from BOF, and
819 - whether or not it finished with a blank line.
820 """
821 offset = self.abs_line_offset()
822 indented, indent, blank_finish = self.input_lines.get_indented(
823 self.line_offset, until_blank, strip_indent)
824 if indented:
825 self.next_line(len(indented) - 1) # advance to last indented line
826 while indented and not indented[0].strip():
827 indented.trim_start()
828 offset += 1
829 return indented, indent, offset, blank_finish
830
831 def get_known_indented(self, indent, until_blank=False, strip_indent=True):
832 """
833 Return an indented block and info.
834
835 Extract an indented block where the indent is known for all lines.
836 Starting with the current line, extract the entire text block with at
837 least `indent` indentation (which must be whitespace, except for the
838 first line).
839
840 :Parameters:
841 - `indent`: The number of indent columns/characters.
842 - `until_blank`: Stop collecting at the first blank line if true.
843 - `strip_indent`: Strip `indent` characters of indentation if true
844 (default).
845
846 :Return:
847 - the indented block,
848 - its first line offset from BOF, and
849 - whether or not it finished with a blank line.
850 """
851 offset = self.abs_line_offset()
852 indented, indent, blank_finish = self.input_lines.get_indented(
853 self.line_offset, until_blank, strip_indent,
854 block_indent=indent)
855 self.next_line(len(indented) - 1) # advance to last indented line
856 while indented and not indented[0].strip():
857 indented.trim_start()
858 offset += 1
859 return indented, offset, blank_finish
860
861 def get_first_known_indented(self, indent, until_blank=False,
862 strip_indent=True, strip_top=True):
863 """
864 Return an indented block and info.
865
866 Extract an indented block where the indent is known for the first line
867 and unknown for all other lines.
868
869 :Parameters:
870 - `indent`: The first line's indent (# of columns/characters).
871 - `until_blank`: Stop collecting at the first blank line if true
872 (1).
873 - `strip_indent`: Strip `indent` characters of indentation if true
874 (1, default).
875 - `strip_top`: Strip blank lines from the beginning of the block.
876
877 :Return:
878 - the indented block,
879 - its indent,
880 - its first line offset from BOF, and
881 - whether or not it finished with a blank line.
882 """
883 offset = self.abs_line_offset()
884 indented, indent, blank_finish = self.input_lines.get_indented(
885 self.line_offset, until_blank, strip_indent,
886 first_indent=indent)
887 self.next_line(len(indented) - 1) # advance to last indented line
888 if strip_top:
889 while indented and not indented[0].strip():
890 indented.trim_start()
891 offset += 1
892 return indented, indent, offset, blank_finish
893
894
895 class StateWS(State):
896
897 """
898 State superclass specialized for whitespace (blank lines & indents).
899
900 Use this class with `StateMachineWS`. The transitions 'blank' (for blank
901 lines) and 'indent' (for indented text blocks) are added automatically,
902 before any other transitions. The transition method `blank()` handles
903 blank lines and `indent()` handles nested indented blocks. Indented
904 blocks trigger a new state machine to be created by `indent()` and run.
905 The class of the state machine to be created is in `indent_sm`, and the
906 constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
907
908 The methods `known_indent()` and `firstknown_indent()` are provided for
909 indented blocks where the indent (all lines' and first line's only,
910 respectively) is known to the transition method, along with the attributes
911 `known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method
912 is triggered automatically.
913 """
914
915 indent_sm = None
916 """
917 The `StateMachine` class handling indented text blocks.
918
919 If left as ``None``, `indent_sm` defaults to the value of
920 `State.nested_sm`. Override it in subclasses to avoid the default.
921 """
922
923 indent_sm_kwargs = None
924 """
925 Keyword arguments dictionary, passed to the `indent_sm` constructor.
926
927 If left as ``None``, `indent_sm_kwargs` defaults to the value of
928 `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
929 """
930
931 known_indent_sm = None
932 """
933 The `StateMachine` class handling known-indented text blocks.
934
935 If left as ``None``, `known_indent_sm` defaults to the value of
936 `indent_sm`. Override it in subclasses to avoid the default.
937 """
938
939 known_indent_sm_kwargs = None
940 """
941 Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
942
943 If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
944 `indent_sm_kwargs`. Override it in subclasses to avoid the default.
945 """
946
947 ws_patterns = {'blank': re.compile(' *$'),
948 'indent': re.compile(' +')}
949 """Patterns for default whitespace transitions. May be overridden in
950 subclasses."""
951
952 ws_initial_transitions = ('blank', 'indent')
953 """Default initial whitespace transitions, added before those listed in
954 `State.initial_transitions`. May be overridden in subclasses."""
955
956 def __init__(self, state_machine, debug=False):
957 """
958 Initialize a `StateSM` object; extends `State.__init__()`.
959
960 Check for indent state machine attributes, set defaults if not set.
961 """
962 State.__init__(self, state_machine, debug)
963 if self.indent_sm is None:
964 self.indent_sm = self.nested_sm
965 if self.indent_sm_kwargs is None:
966 self.indent_sm_kwargs = self.nested_sm_kwargs
967 if self.known_indent_sm is None:
968 self.known_indent_sm = self.indent_sm
969 if self.known_indent_sm_kwargs is None:
970 self.known_indent_sm_kwargs = self.indent_sm_kwargs
971
972 def add_initial_transitions(self):
973 """
974 Add whitespace-specific transitions before those defined in subclass.
975
976 Extends `State.add_initial_transitions()`.
977 """
978 State.add_initial_transitions(self)
979 if self.patterns is None:
980 self.patterns = {}
981 self.patterns.update(self.ws_patterns)
982 names, transitions = self.make_transitions(
983 self.ws_initial_transitions)
984 self.add_transitions(names, transitions)
985
986 def blank(self, match, context, next_state):
987 """Handle blank lines. Does nothing. Override in subclasses."""
988 return self.nop(match, context, next_state)
989
990 def indent(self, match, context, next_state):
991 """
992 Handle an indented text block. Extend or override in subclasses.
993
994 Recursively run the registered state machine for indented blocks
995 (`self.indent_sm`).
996 """
997 indented, indent, line_offset, blank_finish = \
998 self.state_machine.get_indented()
999 sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
1000 results = sm.run(indented, input_offset=line_offset)
1001 return context, next_state, results
1002
1003 def known_indent(self, match, context, next_state):
1004 """
1005 Handle a known-indent text block. Extend or override in subclasses.
1006
1007 Recursively run the registered state machine for known-indent indented
1008 blocks (`self.known_indent_sm`). The indent is the length of the
1009 match, ``match.end()``.
1010 """
1011 indented, line_offset, blank_finish = \
1012 self.state_machine.get_known_indented(match.end())
1013 sm = self.known_indent_sm(debug=self.debug,
1014 **self.known_indent_sm_kwargs)
1015 results = sm.run(indented, input_offset=line_offset)
1016 return context, next_state, results
1017
1018 def first_known_indent(self, match, context, next_state):
1019 """
1020 Handle an indented text block (first line's indent known).
1021
1022 Extend or override in subclasses.
1023
1024 Recursively run the registered state machine for known-indent indented
1025 blocks (`self.known_indent_sm`). The indent is the length of the
1026 match, ``match.end()``.
1027 """
1028 indented, line_offset, blank_finish = \
1029 self.state_machine.get_first_known_indented(match.end())
1030 sm = self.known_indent_sm(debug=self.debug,
1031 **self.known_indent_sm_kwargs)
1032 results = sm.run(indented, input_offset=line_offset)
1033 return context, next_state, results
1034
1035
1036 class _SearchOverride(object):
1037
1038 """
1039 Mix-in class to override `StateMachine` regular expression behavior.
1040
1041 Changes regular expression matching, from the default `re.match()`
1042 (succeeds only if the pattern matches at the start of `self.line`) to
1043 `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1044 When subclassing a `StateMachine`, list this class **first** in the
1045 inheritance list of the class definition.
1046 """
1047
1048 def match(self, pattern):
1049 """
1050 Return the result of a regular expression search.
1051
1052 Overrides `StateMachine.match()`.
1053
1054 Parameter `pattern`: `re` compiled regular expression.
1055 """
1056 return pattern.search(self.line)
1057
1058
1059 class SearchStateMachine(_SearchOverride, StateMachine):
1060 """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1061 pass
1062
1063
1064 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1065 """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1066 pass
1067
1068
1069 class ViewList(object):
1070
1071 """
1072 List with extended functionality: slices of ViewList objects are child
1073 lists, linked to their parents. Changes made to a child list also affect
1074 the parent list. A child list is effectively a "view" (in the SQL sense)
1075 of the parent list. Changes to parent lists, however, do *not* affect
1076 active child lists. If a parent list is changed, any active child lists
1077 should be recreated.
1078
1079 The start and end of the slice can be trimmed using the `trim_start()` and
1080 `trim_end()` methods, without affecting the parent list. The link between
1081 child and parent lists can be broken by calling `disconnect()` on the
1082 child list.
1083
1084 Also, ViewList objects keep track of the source & offset of each item.
1085 This information is accessible via the `source()`, `offset()`, and
1086 `info()` methods.
1087 """
1088
1089 def __init__(self, initlist=None, source=None, items=None,
1090 parent=None, parent_offset=None):
1091 self.data = []
1092 """The actual list of data, flattened from various sources."""
1093
1094 self.items = []
1095 """A list of (source, offset) pairs, same length as `self.data`: the
1096 source of each line and the offset of each line from the beginning of
1097 its source."""
1098
1099 self.parent = parent
1100 """The parent list."""
1101
1102 self.parent_offset = parent_offset
1103 """Offset of this list from the beginning of the parent list."""
1104
1105 if isinstance(initlist, ViewList):
1106 self.data = initlist.data[:]
1107 self.items = initlist.items[:]
1108 elif initlist is not None:
1109 self.data = list(initlist)
1110 if items:
1111 self.items = items
1112 else:
1113 self.items = [(source, i) for i in range(len(initlist))]
1114 assert len(self.data) == len(self.items), 'data mismatch'
1115
1116 def __str__(self):
1117 return str(self.data)
1118
1119 def __repr__(self):
1120 return '%s(%s, items=%s)' % (self.__class__.__name__,
1121 self.data, self.items)
1122
1123 def __lt__(self, other): return self.data < self.__cast(other)
1124 def __le__(self, other): return self.data <= self.__cast(other)
1125 def __eq__(self, other): return self.data == self.__cast(other)
1126 def __ne__(self, other): return self.data != self.__cast(other)
1127 def __gt__(self, other): return self.data > self.__cast(other)
1128 def __ge__(self, other): return self.data >= self.__cast(other)
1129
1130 def __cmp__(self, other):
1131 # from https://docs.python.org/3.0/whatsnew/3.0.html
1132 mine = self.data
1133 yours = self.__cast(other)
1134 return (mine > yours) - (yours < mine)
1135
1136 def __cast(self, other):
1137 if isinstance(other, ViewList):
1138 return other.data
1139 else:
1140 return other
1141
1142 def __contains__(self, item): return item in self.data
1143 def __len__(self): return len(self.data)
1144
1145 # The __getitem__()/__setitem__() methods check whether the index
1146 # is a slice first, since indexing a native list with a slice object
1147 # just works.
1148
1149 def __getitem__(self, i):
1150 if isinstance(i, slice):
1151 assert i.step in (None, 1), 'cannot handle slice with stride'
1152 return self.__class__(self.data[i.start:i.stop],
1153 items=self.items[i.start:i.stop],
1154 parent=self, parent_offset=i.start or 0)
1155 else:
1156 return self.data[i]
1157
1158 def __setitem__(self, i, item):
1159 if isinstance(i, slice):
1160 assert i.step in (None, 1), 'cannot handle slice with stride'
1161 if not isinstance(item, ViewList):
1162 raise TypeError('assigning non-ViewList to ViewList slice')
1163 self.data[i.start:i.stop] = item.data
1164 self.items[i.start:i.stop] = item.items
1165 assert len(self.data) == len(self.items), 'data mismatch'
1166 if self.parent:
1167 self.parent[(i.start or 0) + self.parent_offset
1168 : (i.stop or len(self)) + self.parent_offset] = item
1169 else:
1170 self.data[i] = item
1171 if self.parent:
1172 self.parent[i + self.parent_offset] = item
1173
1174 def __delitem__(self, i):
1175 try:
1176 del self.data[i]
1177 del self.items[i]
1178 if self.parent:
1179 del self.parent[i + self.parent_offset]
1180 except TypeError:
1181 assert i.step is None, 'cannot handle slice with stride'
1182 del self.data[i.start:i.stop]
1183 del self.items[i.start:i.stop]
1184 if self.parent:
1185 del self.parent[(i.start or 0) + self.parent_offset
1186 : (i.stop or len(self)) + self.parent_offset]
1187
1188 def __add__(self, other):
1189 if isinstance(other, ViewList):
1190 return self.__class__(self.data + other.data,
1191 items=(self.items + other.items))
1192 else:
1193 raise TypeError('adding non-ViewList to a ViewList')
1194
1195 def __radd__(self, other):
1196 if isinstance(other, ViewList):
1197 return self.__class__(other.data + self.data,
1198 items=(other.items + self.items))
1199 else:
1200 raise TypeError('adding ViewList to a non-ViewList')
1201
1202 def __iadd__(self, other):
1203 if isinstance(other, ViewList):
1204 self.data += other.data
1205 else:
1206 raise TypeError('argument to += must be a ViewList')
1207 return self
1208
1209 def __mul__(self, n):
1210 return self.__class__(self.data * n, items=(self.items * n))
1211
1212 __rmul__ = __mul__
1213
1214 def __imul__(self, n):
1215 self.data *= n
1216 self.items *= n
1217 return self
1218
1219 def extend(self, other):
1220 if not isinstance(other, ViewList):
1221 raise TypeError('extending a ViewList with a non-ViewList')
1222 if self.parent:
1223 self.parent.insert(len(self.data) + self.parent_offset, other)
1224 self.data.extend(other.data)
1225 self.items.extend(other.items)
1226
1227 def append(self, item, source=None, offset=0):
1228 if source is None:
1229 self.extend(item)
1230 else:
1231 if self.parent:
1232 self.parent.insert(len(self.data) + self.parent_offset, item,
1233 source, offset)
1234 self.data.append(item)
1235 self.items.append((source, offset))
1236
1237 def insert(self, i, item, source=None, offset=0):
1238 if source is None:
1239 if not isinstance(item, ViewList):
1240 raise TypeError('inserting non-ViewList with no source given')
1241 self.data[i:i] = item.data
1242 self.items[i:i] = item.items
1243 if self.parent:
1244 index = (len(self.data) + i) % len(self.data)
1245 self.parent.insert(index + self.parent_offset, item)
1246 else:
1247 self.data.insert(i, item)
1248 self.items.insert(i, (source, offset))
1249 if self.parent:
1250 index = (len(self.data) + i) % len(self.data)
1251 self.parent.insert(index + self.parent_offset, item,
1252 source, offset)
1253
1254 def pop(self, i=-1):
1255 if self.parent:
1256 index = (len(self.data) + i) % len(self.data)
1257 self.parent.pop(index + self.parent_offset)
1258 self.items.pop(i)
1259 return self.data.pop(i)
1260
1261 def trim_start(self, n=1):
1262 """
1263 Remove items from the start of the list, without touching the parent.
1264 """
1265 if n > len(self.data):
1266 raise IndexError("Size of trim too large; can't trim %s items "
1267 "from a list of size %s." % (n, len(self.data)))
1268 elif n < 0:
1269 raise IndexError('Trim size must be >= 0.')
1270 del self.data[:n]
1271 del self.items[:n]
1272 if self.parent:
1273 self.parent_offset += n
1274
1275 def trim_end(self, n=1):
1276 """
1277 Remove items from the end of the list, without touching the parent.
1278 """
1279 if n > len(self.data):
1280 raise IndexError("Size of trim too large; can't trim %s items "
1281 "from a list of size %s." % (n, len(self.data)))
1282 elif n < 0:
1283 raise IndexError('Trim size must be >= 0.')
1284 del self.data[-n:]
1285 del self.items[-n:]
1286
1287 def remove(self, item):
1288 index = self.index(item)
1289 del self[index]
1290
1291 def count(self, item): return self.data.count(item)
1292 def index(self, item): return self.data.index(item)
1293
1294 def reverse(self):
1295 self.data.reverse()
1296 self.items.reverse()
1297 self.parent = None
1298
1299 def sort(self, *args):
1300 tmp = sorted(zip(self.data, self.items), *args)
1301 self.data = [entry[0] for entry in tmp]
1302 self.items = [entry[1] for entry in tmp]
1303 self.parent = None
1304
1305 def info(self, i):
1306 """Return source & offset for index `i`."""
1307 try:
1308 return self.items[i]
1309 except IndexError:
1310 if i == len(self.data): # Just past the end
1311 return self.items[i - 1][0], None
1312 else:
1313 raise
1314
1315 def source(self, i):
1316 """Return source for index `i`."""
1317 return self.info(i)[0]
1318
1319 def offset(self, i):
1320 """Return offset for index `i`."""
1321 return self.info(i)[1]
1322
1323 def disconnect(self):
1324 """Break link between this list and parent list."""
1325 self.parent = None
1326
1327 def xitems(self):
1328 """Return iterator yielding (source, offset, value) tuples."""
1329 for (value, (source, offset)) in zip(self.data, self.items):
1330 yield (source, offset, value)
1331
1332 def pprint(self):
1333 """Print the list in `grep` format (`source:offset:value` lines)"""
1334 for line in self.xitems():
1335 print("%s:%d:%s" % line)
1336
1337
1338 class StringList(ViewList):
1339
1340 """A `ViewList` with string-specific methods."""
1341
1342 def trim_left(self, length, start=0, end=sys.maxsize):
1343 """
1344 Trim `length` characters off the beginning of each item, in-place,
1345 from index `start` to `end`. No whitespace-checking is done on the
1346 trimmed text. Does not affect slice parent.
1347 """
1348 self.data[start:end] = [line[length:]
1349 for line in self.data[start:end]]
1350
1351 def get_text_block(self, start, flush_left=False):
1352 """
1353 Return a contiguous block of text.
1354
1355 If `flush_left` is true, raise `UnexpectedIndentationError` if an
1356 indented line is encountered before the text block ends (with a blank
1357 line).
1358 """
1359 end = start
1360 last = len(self.data)
1361 while end < last:
1362 line = self.data[end]
1363 if not line.strip():
1364 break
1365 if flush_left and (line[0] == ' '):
1366 source, offset = self.info(end)
1367 raise UnexpectedIndentationError(self[start:end], source,
1368 offset + 1)
1369 end += 1
1370 return self[start:end]
1371
1372 def get_indented(self, start=0, until_blank=False, strip_indent=True,
1373 block_indent=None, first_indent=None):
1374 """
1375 Extract and return a StringList of indented lines of text.
1376
1377 Collect all lines with indentation, determine the minimum indentation,
1378 remove the minimum indentation from all indented lines (unless
1379 `strip_indent` is false), and return them. All lines up to but not
1380 including the first unindented line will be returned.
1381
1382 :Parameters:
1383 - `start`: The index of the first line to examine.
1384 - `until_blank`: Stop collecting at the first blank line if true.
1385 - `strip_indent`: Strip common leading indent if true (default).
1386 - `block_indent`: The indent of the entire block, if known.
1387 - `first_indent`: The indent of the first line, if known.
1388
1389 :Return:
1390 - a StringList of indented lines with mininum indent removed;
1391 - the amount of the indent;
1392 - a boolean: did the indented block finish with a blank line or EOF?
1393 """
1394 indent = block_indent # start with None if unknown
1395 end = start
1396 if block_indent is not None and first_indent is None:
1397 first_indent = block_indent
1398 if first_indent is not None:
1399 end += 1
1400 last = len(self.data)
1401 while end < last:
1402 line = self.data[end]
1403 if line and (line[0] != ' '
1404 or (block_indent is not None
1405 and line[:block_indent].strip())):
1406 # Line not indented or insufficiently indented.
1407 # Block finished properly iff the last indented line blank:
1408 blank_finish = ((end > start)
1409 and not self.data[end - 1].strip())
1410 break
1411 stripped = line.lstrip()
1412 if not stripped: # blank line
1413 if until_blank:
1414 blank_finish = 1
1415 break
1416 elif block_indent is None:
1417 line_indent = len(line) - len(stripped)
1418 if indent is None:
1419 indent = line_indent
1420 else:
1421 indent = min(indent, line_indent)
1422 end += 1
1423 else:
1424 blank_finish = 1 # block ends at end of lines
1425 block = self[start:end]
1426 if first_indent is not None and block:
1427 block.data[0] = block.data[0][first_indent:]
1428 if indent and strip_indent:
1429 block.trim_left(indent, start=(first_indent is not None))
1430 return block, indent or 0, blank_finish
1431
1432 def get_2D_block(self, top, left, bottom, right, strip_indent=True):
1433 block = self[top:bottom]
1434 indent = right
1435 for i in range(len(block.data)):
1436 # get slice from line, care for combining characters
1437 ci = utils.column_indices(block.data[i])
1438 try:
1439 left = ci[left]
1440 except IndexError:
1441 left += len(block.data[i]) - len(ci)
1442 try:
1443 right = ci[right]
1444 except IndexError:
1445 right += len(block.data[i]) - len(ci)
1446 block.data[i] = line = block.data[i][left:right].rstrip()
1447 if line:
1448 indent = min(indent, len(line) - len(line.lstrip()))
1449 if strip_indent and 0 < indent < right:
1450 block.data = [line[indent:] for line in block.data]
1451 return block
1452
1453 def pad_double_width(self, pad_char):
1454 """
1455 Pad all double-width characters in self by appending `pad_char` to each.
1456 For East Asian language support.
1457 """
1458 east_asian_width = unicodedata.east_asian_width
1459 for i in range(len(self.data)):
1460 line = self.data[i]
1461 if isinstance(line, unicode):
1462 new = []
1463 for char in line:
1464 new.append(char)
1465 if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1466 new.append(pad_char)
1467 self.data[i] = ''.join(new)
1468
1469 def replace(self, old, new):
1470 """Replace all occurrences of substring `old` with `new`."""
1471 for i in range(len(self.data)):
1472 self.data[i] = self.data[i].replace(old, new)
1473
1474
1475 class StateMachineError(Exception): pass
1476 class UnknownStateError(StateMachineError): pass
1477 class DuplicateStateError(StateMachineError): pass
1478 class UnknownTransitionError(StateMachineError): pass
1479 class DuplicateTransitionError(StateMachineError): pass
1480 class TransitionPatternNotFound(StateMachineError): pass
1481 class TransitionMethodNotFound(StateMachineError): pass
1482 class UnexpectedIndentationError(StateMachineError): pass
1483
1484
1485 class TransitionCorrection(Exception):
1486
1487 """
1488 Raise from within a transition method to switch to another transition.
1489
1490 Raise with one argument, the new transition name.
1491 """
1492
1493
1494 class StateCorrection(Exception):
1495
1496 """
1497 Raise from within a transition method to switch to another state.
1498
1499 Raise with one or two arguments: new state name, and an optional new
1500 transition name.
1501 """
1502
1503 def string2lines(astring, tab_width=8, convert_whitespace=False,
1504 whitespace=re.compile('[\v\f]')):
1505 """
1506 Return a list of one-line strings with tabs expanded, no newlines, and
1507 trailing whitespace stripped.
1508
1509 Each tab is expanded with between 1 and `tab_width` spaces, so that the
1510 next character's index becomes a multiple of `tab_width` (8 by default).
1511
1512 Parameters:
1513
1514 - `astring`: a multi-line string.
1515 - `tab_width`: the number of columns between tab stops.
1516 - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1517 """
1518 if convert_whitespace:
1519 astring = whitespace.sub(' ', astring)
1520 # TODO: add a test for too long lines (max_line_lenght = 1000, say)?
1521 # See bug #381.
1522 return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1523
1524 def _exception_data():
1525 """
1526 Return exception information:
1527
1528 - the exception's class name;
1529 - the exception object;
1530 - the name of the file containing the offending code;
1531 - the line number of the offending code;
1532 - the function name of the offending code.
1533 """
1534 type, value, traceback = sys.exc_info()
1535 while traceback.tb_next:
1536 traceback = traceback.tb_next
1537 code = traceback.tb_frame.f_code
1538 return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1539 code.co_name)