comparison env/lib/python3.9/site-packages/docutils/writers/docutils_xml.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # $Id: docutils_xml.py 8368 2019-08-27 12:10:14Z milde $
2 # Author: David Goodger, Paul Tremblay, Guenter Milde
3 # Maintainer: docutils-develop@lists.sourceforge.net
4 # Copyright: This module has been placed in the public domain.
5
6 """
7 Simple document tree Writer, writes Docutils XML according to
8 http://docutils.sourceforge.net/docs/ref/docutils.dtd.
9 """
10
11 __docformat__ = 'reStructuredText'
12
13 import sys
14 import xml.sax.saxutils
15
16 import docutils
17 from docutils import frontend, writers, nodes
18
19 if sys.version_info >= (3, 0):
20 from io import StringIO # noqa
21 else:
22 from StringIO import StringIO # noqa
23
24
25 if sys.version_info >= (3, 0):
26 unicode = str # noqa
27
28
29 class RawXmlError(docutils.ApplicationError): pass
30
31
32 class Writer(writers.Writer):
33
34 supported = ('xml',)
35 """Formats this writer supports."""
36
37 settings_spec = (
38 '"Docutils XML" Writer Options',
39 None,
40 (('Generate XML with newlines before and after tags.',
41 ['--newlines'],
42 {'action': 'store_true', 'validator': frontend.validate_boolean}),
43 ('Generate XML with indents and newlines.',
44 ['--indents'], #@ TODO use integer value for number of spaces?
45 {'action': 'store_true', 'validator': frontend.validate_boolean}),
46 ('Omit the XML declaration. Use with caution.',
47 ['--no-xml-declaration'],
48 {'dest': 'xml_declaration', 'default': 1, 'action': 'store_false',
49 'validator': frontend.validate_boolean}),
50 ('Omit the DOCTYPE declaration.',
51 ['--no-doctype'],
52 {'dest': 'doctype_declaration', 'default': 1,
53 'action': 'store_false', 'validator': frontend.validate_boolean}),))
54
55 settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'}
56
57 config_section = 'docutils_xml writer'
58 config_section_dependencies = ('writers',)
59
60 output = None
61 """Final translated form of `document`."""
62
63 def __init__(self):
64 writers.Writer.__init__(self)
65 self.translator_class = XMLTranslator
66
67 def translate(self):
68 self.visitor = visitor = self.translator_class(self.document)
69 self.document.walkabout(visitor)
70 self.output = ''.join(visitor.output)
71
72
73 class XMLTranslator(nodes.GenericNodeVisitor):
74
75 xml_declaration = '<?xml version="1.0" encoding="%s"?>\n'
76 # TODO: add stylesheet options similar to HTML and LaTeX writers?
77 #xml_stylesheet = '<?xml-stylesheet type="text/xsl" href="%s"?>\n'
78 doctype = (
79 '<!DOCTYPE document PUBLIC'
80 ' "+//IDN docutils.sourceforge.net//DTD Docutils Generic//EN//XML"'
81 ' "http://docutils.sourceforge.net/docs/ref/docutils.dtd">\n')
82 generator = '<!-- Generated by Docutils %s -->\n'
83
84 xmlparser = xml.sax.make_parser()
85 """SAX parser instance to check/exctract raw XML."""
86 xmlparser.setFeature(
87 "http://xml.org/sax/features/external-general-entities", True)
88
89 def __init__(self, document):
90 nodes.NodeVisitor.__init__(self, document)
91
92 # Reporter
93 self.warn = self.document.reporter.warning
94 self.error = self.document.reporter.error
95
96 # Settings
97 self.settings = settings = document.settings
98 self.indent = self.newline = ''
99 if settings.newlines:
100 self.newline = '\n'
101 if settings.indents:
102 self.newline = '\n'
103 self.indent = ' ' #@ TODO make this configurable?
104 self.level = 0 # indentation level
105 self.in_simple = 0 # level of nesting inside mixed-content elements
106 self.fixed_text = 0 # level of nesting inside FixedText elements
107
108 # Output
109 self.output = []
110 if settings.xml_declaration:
111 self.output.append(
112 self.xml_declaration % settings.output_encoding)
113 if settings.doctype_declaration:
114 self.output.append(self.doctype)
115 self.output.append(self.generator % docutils.__version__)
116
117 # initialize XML parser
118 self.the_handle=TestXml()
119 self.xmlparser.setContentHandler(self.the_handle)
120
121 # generic visit and depart methods
122 # --------------------------------
123
124 simple_nodes = (nodes.TextElement,
125 nodes.image, nodes.colspec, nodes.transition) # empty elements
126
127 def default_visit(self, node):
128 """Default node visit method."""
129 if not self.in_simple:
130 self.output.append(self.indent*self.level)
131 self.output.append(node.starttag(xml.sax.saxutils.quoteattr))
132 self.level += 1
133 # @@ make nodes.literal an instance of FixedTextElement?
134 if isinstance(node, (nodes.FixedTextElement, nodes.literal)):
135 self.fixed_text += 1
136 if isinstance(node, self.simple_nodes):
137 self.in_simple += 1
138 if not self.in_simple:
139 self.output.append(self.newline)
140
141 def default_departure(self, node):
142 """Default node depart method."""
143 self.level -= 1
144 if not self.in_simple:
145 self.output.append(self.indent*self.level)
146 self.output.append(node.endtag())
147 if isinstance(node, (nodes.FixedTextElement, nodes.literal)):
148 self.fixed_text -= 1
149 if isinstance(node, self.simple_nodes):
150 self.in_simple -= 1
151 if not self.in_simple:
152 self.output.append(self.newline)
153
154
155 # specific visit and depart methods
156 # ---------------------------------
157
158 def visit_Text(self, node):
159 text = xml.sax.saxutils.escape(node.astext())
160 # indent text if we are not in a FixedText element:
161 if not self.fixed_text:
162 text = text.replace('\n', '\n'+self.indent*self.level)
163 self.output.append(text)
164
165 def depart_Text(self, node):
166 pass
167
168 def visit_raw(self, node):
169 if 'xml' not in node.get('format', '').split():
170 # skip other raw content?
171 # raise nodes.SkipNode
172 self.default_visit(node)
173 return
174 # wrap in <raw> element
175 self.default_visit(node) # or not?
176 xml_string = node.astext()
177 self.output.append(xml_string)
178 self.default_departure(node) # or not?
179 # Check validity of raw XML:
180 if isinstance(xml_string, unicode) and sys.version_info < (3, 0):
181 xml_string = xml_string.encode('utf8')
182 try:
183 self.xmlparser.parse(StringIO(xml_string))
184 except xml.sax._exceptions.SAXParseException as error:
185 col_num = self.the_handle.locator.getColumnNumber()
186 line_num = self.the_handle.locator.getLineNumber()
187 srcline = node.line
188 if not isinstance(node.parent, nodes.TextElement):
189 srcline += 2 # directive content start line
190 msg = 'Invalid raw XML in column %d, line offset %d:\n%s' % (
191 col_num, line_num, node.astext())
192 self.warn(msg, source=node.source, line=srcline+line_num-1)
193 raise nodes.SkipNode # content already processed
194
195
196 class TestXml(xml.sax.handler.ContentHandler):
197
198 def setDocumentLocator(self, locator):
199 self.locator = locator