Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/docutils/parsers/rst/__init__.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # $Id: __init__.py 8344 2019-08-26 12:10:43Z milde $ | |
2 # Author: David Goodger <goodger@python.org> | |
3 # Copyright: This module has been placed in the public domain. | |
4 | |
5 """ | |
6 This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`, | |
7 the reStructuredText parser. | |
8 | |
9 | |
10 Usage | |
11 ===== | |
12 | |
13 1. Create a parser:: | |
14 | |
15 parser = docutils.parsers.rst.Parser() | |
16 | |
17 Several optional arguments may be passed to modify the parser's behavior. | |
18 Please see `Customizing the Parser`_ below for details. | |
19 | |
20 2. Gather input (a multi-line string), by reading a file or the standard | |
21 input:: | |
22 | |
23 input = sys.stdin.read() | |
24 | |
25 3. Create a new empty `docutils.nodes.document` tree:: | |
26 | |
27 document = docutils.utils.new_document(source, settings) | |
28 | |
29 See `docutils.utils.new_document()` for parameter details. | |
30 | |
31 4. Run the parser, populating the document tree:: | |
32 | |
33 parser.parse(input, document) | |
34 | |
35 | |
36 Parser Overview | |
37 =============== | |
38 | |
39 The reStructuredText parser is implemented as a state machine, examining its | |
40 input one line at a time. To understand how the parser works, please first | |
41 become familiar with the `docutils.statemachine` module, then see the | |
42 `states` module. | |
43 | |
44 | |
45 Customizing the Parser | |
46 ---------------------- | |
47 | |
48 Anything that isn't already customizable is that way simply because that type | |
49 of customizability hasn't been implemented yet. Patches welcome! | |
50 | |
51 When instantiating an object of the `Parser` class, two parameters may be | |
52 passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=True`` to enable an | |
53 initial RFC-2822 style header block, parsed as a "field_list" element (with | |
54 "class" attribute set to "rfc2822"). Currently this is the only body-level | |
55 element which is customizable without subclassing. (Tip: subclass `Parser` | |
56 and change its "state_classes" and "initial_state" attributes to refer to new | |
57 classes. Contact the author if you need more details.) | |
58 | |
59 The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass. | |
60 It handles inline markup recognition. A common extension is the addition of | |
61 further implicit hyperlinks, like "RFC 2822". This can be done by subclassing | |
62 `states.Inliner`, adding a new method for the implicit markup, and adding a | |
63 ``(pattern, method)`` pair to the "implicit_dispatch" attribute of the | |
64 subclass. See `states.Inliner.implicit_inline()` for details. Explicit | |
65 inline markup can be customized in a `states.Inliner` subclass via the | |
66 ``patterns.initial`` and ``dispatch`` attributes (and new methods as | |
67 appropriate). | |
68 """ | |
69 | |
70 __docformat__ = 'reStructuredText' | |
71 | |
72 | |
73 import docutils.parsers | |
74 import docutils.statemachine | |
75 from docutils.parsers.rst import roles, states | |
76 from docutils import frontend, nodes, Component | |
77 from docutils.transforms import universal | |
78 | |
79 | |
80 class Parser(docutils.parsers.Parser): | |
81 | |
82 """The reStructuredText parser.""" | |
83 | |
84 supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx') | |
85 """Aliases this parser supports.""" | |
86 | |
87 settings_spec = ( | |
88 'reStructuredText Parser Options', | |
89 None, | |
90 (('Recognize and link to standalone PEP references (like "PEP 258").', | |
91 ['--pep-references'], | |
92 {'action': 'store_true', 'validator': frontend.validate_boolean}), | |
93 ('Base URL for PEP references ' | |
94 '(default "http://www.python.org/dev/peps/").', | |
95 ['--pep-base-url'], | |
96 {'metavar': '<URL>', 'default': 'http://www.python.org/dev/peps/', | |
97 'validator': frontend.validate_url_trailing_slash}), | |
98 ('Template for PEP file part of URL. (default "pep-%04d")', | |
99 ['--pep-file-url-template'], | |
100 {'metavar': '<URL>', 'default': 'pep-%04d'}), | |
101 ('Recognize and link to standalone RFC references (like "RFC 822").', | |
102 ['--rfc-references'], | |
103 {'action': 'store_true', 'validator': frontend.validate_boolean}), | |
104 ('Base URL for RFC references (default "http://tools.ietf.org/html/").', | |
105 ['--rfc-base-url'], | |
106 {'metavar': '<URL>', 'default': 'http://tools.ietf.org/html/', | |
107 'validator': frontend.validate_url_trailing_slash}), | |
108 ('Set number of spaces for tab expansion (default 8).', | |
109 ['--tab-width'], | |
110 {'metavar': '<width>', 'type': 'int', 'default': 8, | |
111 'validator': frontend.validate_nonnegative_int}), | |
112 ('Remove spaces before footnote references.', | |
113 ['--trim-footnote-reference-space'], | |
114 {'action': 'store_true', 'validator': frontend.validate_boolean}), | |
115 ('Leave spaces before footnote references.', | |
116 ['--leave-footnote-reference-space'], | |
117 {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}), | |
118 ('Disable directives that insert the contents of external file ' | |
119 '("include" & "raw"); replaced with a "warning" system message.', | |
120 ['--no-file-insertion'], | |
121 {'action': 'store_false', 'default': 1, | |
122 'dest': 'file_insertion_enabled', | |
123 'validator': frontend.validate_boolean}), | |
124 ('Enable directives that insert the contents of external file ' | |
125 '("include" & "raw"). Enabled by default.', | |
126 ['--file-insertion-enabled'], | |
127 {'action': 'store_true'}), | |
128 ('Disable the "raw" directives; replaced with a "warning" ' | |
129 'system message.', | |
130 ['--no-raw'], | |
131 {'action': 'store_false', 'default': 1, 'dest': 'raw_enabled', | |
132 'validator': frontend.validate_boolean}), | |
133 ('Enable the "raw" directive. Enabled by default.', | |
134 ['--raw-enabled'], | |
135 {'action': 'store_true'}), | |
136 ('Token name set for parsing code with Pygments: one of ' | |
137 '"long", "short", or "none (no parsing)". Default is "long".', | |
138 ['--syntax-highlight'], | |
139 {'choices': ['long', 'short', 'none'], | |
140 'default': 'long', 'metavar': '<format>'}), | |
141 ('Change straight quotation marks to typographic form: ' | |
142 'one of "yes", "no", "alt[ernative]" (default "no").', | |
143 ['--smart-quotes'], | |
144 {'default': False, 'metavar': '<yes/no/alt>', | |
145 'validator': frontend.validate_ternary}), | |
146 ('Characters to use as "smart quotes" for <language>. ', | |
147 ['--smartquotes-locales'], | |
148 {'metavar': '<language:quotes[,language:quotes,...]>', | |
149 'action': 'append', | |
150 'validator': frontend.validate_smartquotes_locales}), | |
151 ('Inline markup recognized at word boundaries only ' | |
152 '(adjacent to punctuation or whitespace). ' | |
153 'Force character-level inline markup recognition with ' | |
154 '"\\ " (backslash + space). Default.', | |
155 ['--word-level-inline-markup'], | |
156 {'action': 'store_false', 'dest': 'character_level_inline_markup'}), | |
157 ('Inline markup recognized anywhere, regardless of surrounding ' | |
158 'characters. Backslash-escapes must be used to avoid unwanted ' | |
159 'markup recognition. Useful for East Asian languages. ' | |
160 'Experimental.', | |
161 ['--character-level-inline-markup'], | |
162 {'action': 'store_true', 'default': False, | |
163 'dest': 'character_level_inline_markup'}), | |
164 )) | |
165 | |
166 config_section = 'restructuredtext parser' | |
167 config_section_dependencies = ('parsers',) | |
168 | |
169 def __init__(self, rfc2822=False, inliner=None): | |
170 if rfc2822: | |
171 self.initial_state = 'RFC2822Body' | |
172 else: | |
173 self.initial_state = 'Body' | |
174 self.state_classes = states.state_classes | |
175 self.inliner = inliner | |
176 | |
177 def get_transforms(self): | |
178 return Component.get_transforms(self) + [ | |
179 universal.SmartQuotes] | |
180 | |
181 def parse(self, inputstring, document): | |
182 """Parse `inputstring` and populate `document`, a document tree.""" | |
183 self.setup_parse(inputstring, document) | |
184 self.statemachine = states.RSTStateMachine( | |
185 state_classes=self.state_classes, | |
186 initial_state=self.initial_state, | |
187 debug=document.reporter.debug_flag) | |
188 inputlines = docutils.statemachine.string2lines( | |
189 inputstring, tab_width=document.settings.tab_width, | |
190 convert_whitespace=True) | |
191 self.statemachine.run(inputlines, document, inliner=self.inliner) | |
192 # restore the "default" default role after parsing a document | |
193 if '' in roles._roles: | |
194 del roles._roles[''] | |
195 self.finish_parse() | |
196 | |
197 | |
198 class DirectiveError(Exception): | |
199 | |
200 """ | |
201 Store a message and a system message level. | |
202 | |
203 To be thrown from inside directive code. | |
204 | |
205 Do not instantiate directly -- use `Directive.directive_error()` | |
206 instead! | |
207 """ | |
208 | |
209 def __init__(self, level, message): | |
210 """Set error `message` and `level`""" | |
211 Exception.__init__(self) | |
212 self.level = level | |
213 self.msg = message | |
214 | |
215 | |
216 class Directive(object): | |
217 | |
218 """ | |
219 Base class for reStructuredText directives. | |
220 | |
221 The following attributes may be set by subclasses. They are | |
222 interpreted by the directive parser (which runs the directive | |
223 class): | |
224 | |
225 - `required_arguments`: The number of required arguments (default: | |
226 0). | |
227 | |
228 - `optional_arguments`: The number of optional arguments (default: | |
229 0). | |
230 | |
231 - `final_argument_whitespace`: A boolean, indicating if the final | |
232 argument may contain whitespace (default: False). | |
233 | |
234 - `option_spec`: A dictionary, mapping known option names to | |
235 conversion functions such as `int` or `float` (default: {}, no | |
236 options). Several conversion functions are defined in the | |
237 directives/__init__.py module. | |
238 | |
239 Option conversion functions take a single parameter, the option | |
240 argument (a string or ``None``), validate it and/or convert it | |
241 to the appropriate form. Conversion functions may raise | |
242 `ValueError` and `TypeError` exceptions. | |
243 | |
244 - `has_content`: A boolean; True if content is allowed. Client | |
245 code must handle the case where content is required but not | |
246 supplied (an empty content list will be supplied). | |
247 | |
248 Arguments are normally single whitespace-separated words. The | |
249 final argument may contain whitespace and/or newlines if | |
250 `final_argument_whitespace` is True. | |
251 | |
252 If the form of the arguments is more complex, specify only one | |
253 argument (either required or optional) and set | |
254 `final_argument_whitespace` to True; the client code must do any | |
255 context-sensitive parsing. | |
256 | |
257 When a directive implementation is being run, the directive class | |
258 is instantiated, and the `run()` method is executed. During | |
259 instantiation, the following instance variables are set: | |
260 | |
261 - ``name`` is the directive type or name (string). | |
262 | |
263 - ``arguments`` is the list of positional arguments (strings). | |
264 | |
265 - ``options`` is a dictionary mapping option names (strings) to | |
266 values (type depends on option conversion functions; see | |
267 `option_spec` above). | |
268 | |
269 - ``content`` is a list of strings, the directive content line by line. | |
270 | |
271 - ``lineno`` is the absolute line number of the first line | |
272 of the directive. | |
273 | |
274 - ``content_offset`` is the line offset of the first line of the content from | |
275 the beginning of the current input. Used when initiating a nested parse. | |
276 | |
277 - ``block_text`` is a string containing the entire directive. | |
278 | |
279 - ``state`` is the state which called the directive function. | |
280 | |
281 - ``state_machine`` is the state machine which controls the state which called | |
282 the directive function. | |
283 | |
284 Directive functions return a list of nodes which will be inserted | |
285 into the document tree at the point where the directive was | |
286 encountered. This can be an empty list if there is nothing to | |
287 insert. | |
288 | |
289 For ordinary directives, the list must contain body elements or | |
290 structural elements. Some directives are intended specifically | |
291 for substitution definitions, and must return a list of `Text` | |
292 nodes and/or inline elements (suitable for inline insertion, in | |
293 place of the substitution reference). Such directives must verify | |
294 substitution definition context, typically using code like this:: | |
295 | |
296 if not isinstance(state, states.SubstitutionDef): | |
297 error = state_machine.reporter.error( | |
298 'Invalid context: the "%s" directive can only be used ' | |
299 'within a substitution definition.' % (name), | |
300 nodes.literal_block(block_text, block_text), line=lineno) | |
301 return [error] | |
302 """ | |
303 | |
304 # There is a "Creating reStructuredText Directives" how-to at | |
305 # <http://docutils.sf.net/docs/howto/rst-directives.html>. If you | |
306 # update this docstring, please update the how-to as well. | |
307 | |
308 required_arguments = 0 | |
309 """Number of required directive arguments.""" | |
310 | |
311 optional_arguments = 0 | |
312 """Number of optional arguments after the required arguments.""" | |
313 | |
314 final_argument_whitespace = False | |
315 """May the final argument contain whitespace?""" | |
316 | |
317 option_spec = None | |
318 """Mapping of option names to validator functions.""" | |
319 | |
320 has_content = False | |
321 """May the directive have content?""" | |
322 | |
323 def __init__(self, name, arguments, options, content, lineno, | |
324 content_offset, block_text, state, state_machine): | |
325 self.name = name | |
326 self.arguments = arguments | |
327 self.options = options | |
328 self.content = content | |
329 self.lineno = lineno | |
330 self.content_offset = content_offset | |
331 self.block_text = block_text | |
332 self.state = state | |
333 self.state_machine = state_machine | |
334 | |
335 def run(self): | |
336 raise NotImplementedError('Must override run() is subclass.') | |
337 | |
338 # Directive errors: | |
339 | |
340 def directive_error(self, level, message): | |
341 """ | |
342 Return a DirectiveError suitable for being thrown as an exception. | |
343 | |
344 Call "raise self.directive_error(level, message)" from within | |
345 a directive implementation to return one single system message | |
346 at level `level`, which automatically gets the directive block | |
347 and the line number added. | |
348 | |
349 Preferably use the `debug`, `info`, `warning`, `error`, or `severe` | |
350 wrapper methods, e.g. ``self.error(message)`` to generate an | |
351 ERROR-level directive error. | |
352 """ | |
353 return DirectiveError(level, message) | |
354 | |
355 def debug(self, message): | |
356 return self.directive_error(0, message) | |
357 | |
358 def info(self, message): | |
359 return self.directive_error(1, message) | |
360 | |
361 def warning(self, message): | |
362 return self.directive_error(2, message) | |
363 | |
364 def error(self, message): | |
365 return self.directive_error(3, message) | |
366 | |
367 def severe(self, message): | |
368 return self.directive_error(4, message) | |
369 | |
370 # Convenience methods: | |
371 | |
372 def assert_has_content(self): | |
373 """ | |
374 Throw an ERROR-level DirectiveError if the directive doesn't | |
375 have contents. | |
376 """ | |
377 if not self.content: | |
378 raise self.error('Content block expected for the "%s" directive; ' | |
379 'none found.' % self.name) | |
380 | |
381 def add_name(self, node): | |
382 """Append self.options['name'] to node['names'] if it exists. | |
383 | |
384 Also normalize the name string and register it as explicit target. | |
385 """ | |
386 if 'name' in self.options: | |
387 name = nodes.fully_normalize_name(self.options.pop('name')) | |
388 if 'name' in node: | |
389 del(node['name']) | |
390 node['names'].append(name) | |
391 self.state.document.note_explicit_target(node, node) | |
392 | |
393 | |
394 def convert_directive_function(directive_fn): | |
395 """ | |
396 Define & return a directive class generated from `directive_fn`. | |
397 | |
398 `directive_fn` uses the old-style, functional interface. | |
399 """ | |
400 | |
401 class FunctionalDirective(Directive): | |
402 | |
403 option_spec = getattr(directive_fn, 'options', None) | |
404 has_content = getattr(directive_fn, 'content', False) | |
405 _argument_spec = getattr(directive_fn, 'arguments', (0, 0, False)) | |
406 required_arguments, optional_arguments, final_argument_whitespace \ | |
407 = _argument_spec | |
408 | |
409 def run(self): | |
410 return directive_fn( | |
411 self.name, self.arguments, self.options, self.content, | |
412 self.lineno, self.content_offset, self.block_text, | |
413 self.state, self.state_machine) | |
414 | |
415 # Return new-style directive. | |
416 return FunctionalDirective |