Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/docutils/transforms/frontmatter.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac |
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 |
| parents | 79f47841a781 |
| children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/docutils/transforms/frontmatter.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,548 +0,0 @@ -# $Id: frontmatter.py 8389 2019-09-11 11:39:13Z milde $ -# Author: David Goodger, Ueli Schlaepfer <goodger@python.org> -# Copyright: This module has been placed in the public domain. - -""" -Transforms related to the front matter of a document or a section -(information found before the main text): - -- `DocTitle`: Used to transform a lone top level section's title to - the document title, promote a remaining lone top-level section's - title to the document subtitle, and determine the document's title - metadata (document['title']) based on the document title and/or the - "title" setting. - -- `SectionSubTitle`: Used to transform a lone subsection into a - subtitle. - -- `DocInfo`: Used to transform a bibliographic field list into docinfo - elements. -""" - -__docformat__ = 'reStructuredText' - -import re -import sys - -from docutils import nodes, utils -from docutils.transforms import TransformError, Transform - - -if sys.version_info >= (3, 0): - unicode = str # noqa - - -class TitlePromoter(Transform): - - """ - Abstract base class for DocTitle and SectionSubTitle transforms. - """ - - def promote_title(self, node): - """ - Transform the following tree:: - - <node> - <section> - <title> - ... - - into :: - - <node> - <title> - ... - - `node` is normally a document. - """ - # Type check - if not isinstance(node, nodes.Element): - raise TypeError('node must be of Element-derived type.') - - # `node` must not have a title yet. - assert not (len(node) and isinstance(node[0], nodes.title)) - section, index = self.candidate_index(node) - if index is None: - return False - - # Transfer the section's attributes to the node: - # NOTE: Change `replace` to False to NOT replace attributes that - # already exist in node with those in section. - # NOTE: Remove `and_source` to NOT copy the 'source' - # attribute from section - node.update_all_atts_concatenating(section, replace=True, and_source=True) - - # setup_child is called automatically for all nodes. - node[:] = (section[:1] # section title - + node[:index] # everything that was in the - # node before the section - + section[1:]) # everything that was in the section - assert isinstance(node[0], nodes.title) - return True - - def promote_subtitle(self, node): - """ - Transform the following node tree:: - - <node> - <title> - <section> - <title> - ... - - into :: - - <node> - <title> - <subtitle> - ... - """ - # Type check - if not isinstance(node, nodes.Element): - raise TypeError('node must be of Element-derived type.') - - subsection, index = self.candidate_index(node) - if index is None: - return False - subtitle = nodes.subtitle() - - # Transfer the subsection's attributes to the new subtitle - # NOTE: Change `replace` to False to NOT replace attributes - # that already exist in node with those in section. - # NOTE: Remove `and_source` to NOT copy the 'source' - # attribute from section. - subtitle.update_all_atts_concatenating(subsection, replace=True, and_source=True) - - # Transfer the contents of the subsection's title to the - # subtitle: - subtitle[:] = subsection[0][:] - node[:] = (node[:1] # title - + [subtitle] - # everything that was before the section: - + node[1:index] - # everything that was in the subsection: - + subsection[1:]) - return True - - def candidate_index(self, node): - """ - Find and return the promotion candidate and its index. - - Return (None, None) if no valid candidate was found. - """ - index = node.first_child_not_matching_class( - nodes.PreBibliographic) - if (index is None or len(node) > (index + 1) - or not isinstance(node[index], nodes.section)): - return None, None - else: - return node[index], index - - -class DocTitle(TitlePromoter): - - """ - In reStructuredText_, there is no way to specify a document title - and subtitle explicitly. Instead, we can supply the document title - (and possibly the subtitle as well) implicitly, and use this - two-step transform to "raise" or "promote" the title(s) (and their - corresponding section contents) to the document level. - - 1. If the document contains a single top-level section as its - first non-comment element, the top-level section's title - becomes the document's title, and the top-level section's - contents become the document's immediate contents. The lone - top-level section header must be the first non-comment element - in the document. - - For example, take this input text:: - - ================= - Top-Level Title - ================= - - A paragraph. - - Once parsed, it looks like this:: - - <document> - <section names="top-level title"> - <title> - Top-Level Title - <paragraph> - A paragraph. - - After running the DocTitle transform, we have:: - - <document names="top-level title"> - <title> - Top-Level Title - <paragraph> - A paragraph. - - 2. If step 1 successfully determines the document title, we - continue by checking for a subtitle. - - If the lone top-level section itself contains a single - second-level section as its first non-comment element, that - section's title is promoted to the document's subtitle, and - that section's contents become the document's immediate - contents. Given this input text:: - - ================= - Top-Level Title - ================= - - Second-Level Title - ~~~~~~~~~~~~~~~~~~ - - A paragraph. - - After parsing and running the Section Promotion transform, the - result is:: - - <document names="top-level title"> - <title> - Top-Level Title - <subtitle names="second-level title"> - Second-Level Title - <paragraph> - A paragraph. - - (Note that the implicit hyperlink target generated by the - "Second-Level Title" is preserved on the "subtitle" element - itself.) - - Any comment elements occurring before the document title or - subtitle are accumulated and inserted as the first body elements - after the title(s). - - This transform also sets the document's metadata title - (document['title']). - - .. _reStructuredText: http://docutils.sf.net/rst.html - """ - - default_priority = 320 - - def set_metadata(self): - """ - Set document['title'] metadata title from the following - sources, listed in order of priority: - - * Existing document['title'] attribute. - * "title" setting. - * Document title node (as promoted by promote_title). - """ - if not self.document.hasattr('title'): - if self.document.settings.title is not None: - self.document['title'] = self.document.settings.title - elif len(self.document) and isinstance(self.document[0], nodes.title): - self.document['title'] = self.document[0].astext() - - def apply(self): - if getattr(self.document.settings, 'doctitle_xform', 1): - # promote_(sub)title defined in TitlePromoter base class. - if self.promote_title(self.document): - # If a title has been promoted, also try to promote a - # subtitle. - self.promote_subtitle(self.document) - # Set document['title']. - self.set_metadata() - - -class SectionSubTitle(TitlePromoter): - - """ - This works like document subtitles, but for sections. For example, :: - - <section> - <title> - Title - <section> - <title> - Subtitle - ... - - is transformed into :: - - <section> - <title> - Title - <subtitle> - Subtitle - ... - - For details refer to the docstring of DocTitle. - """ - - default_priority = 350 - - def apply(self): - if not getattr(self.document.settings, 'sectsubtitle_xform', 1): - return - for section in self.document._traverse(nodes.section): - # On our way through the node tree, we are modifying it - # but only the not-yet-visited part, so that the iterator - # returned by _traverse() is not corrupted. - self.promote_subtitle(section) - - -class DocInfo(Transform): - - """ - This transform is specific to the reStructuredText_ markup syntax; - see "Bibliographic Fields" in the `reStructuredText Markup - Specification`_ for a high-level description. This transform - should be run *after* the `DocTitle` transform. - - Given a field list as the first non-comment element after the - document title and subtitle (if present), registered bibliographic - field names are transformed to the corresponding DTD elements, - becoming child elements of the "docinfo" element (except for a - dedication and/or an abstract, which become "topic" elements after - "docinfo"). - - For example, given this document fragment after parsing:: - - <document> - <title> - Document Title - <field_list> - <field> - <field_name> - Author - <field_body> - <paragraph> - A. Name - <field> - <field_name> - Status - <field_body> - <paragraph> - $RCSfile$ - ... - - After running the bibliographic field list transform, the - resulting document tree would look like this:: - - <document> - <title> - Document Title - <docinfo> - <author> - A. Name - <status> - frontmatter.py - ... - - The "Status" field contained an expanded RCS keyword, which is - normally (but optionally) cleaned up by the transform. The sole - contents of the field body must be a paragraph containing an - expanded RCS keyword of the form "$keyword: expansion text $". Any - RCS keyword can be processed in any bibliographic field. The - dollar signs and leading RCS keyword name are removed. Extra - processing is done for the following RCS keywords: - - - "RCSfile" expands to the name of the file in the RCS or CVS - repository, which is the name of the source file with a ",v" - suffix appended. The transform will remove the ",v" suffix. - - - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC - time zone). The RCS Keywords transform will extract just the - date itself and transform it to an ISO 8601 format date, as in - "2000-12-31". - - (Since the source file for this text is itself stored under CVS, - we can't show an example of the "Date" RCS keyword because we - can't prevent any RCS keywords used in this explanation from - being expanded. Only the "RCSfile" keyword is stable; its - expansion text changes only if the file name changes.) - - .. _reStructuredText: http://docutils.sf.net/rst.html - .. _reStructuredText Markup Specification: - http://docutils.sf.net/docs/ref/rst/restructuredtext.html - """ - - default_priority = 340 - - biblio_nodes = { - 'author': nodes.author, - 'authors': nodes.authors, - 'organization': nodes.organization, - 'address': nodes.address, - 'contact': nodes.contact, - 'version': nodes.version, - 'revision': nodes.revision, - 'status': nodes.status, - 'date': nodes.date, - 'copyright': nodes.copyright, - 'dedication': nodes.topic, - 'abstract': nodes.topic} - """Canonical field name (lowcased) to node class name mapping for - bibliographic fields (field_list).""" - - def apply(self): - if not getattr(self.document.settings, 'docinfo_xform', 1): - return - document = self.document - index = document.first_child_not_matching_class( - nodes.PreBibliographic) - if index is None: - return - candidate = document[index] - if isinstance(candidate, nodes.field_list): - biblioindex = document.first_child_not_matching_class( - (nodes.Titular, nodes.Decorative)) - nodelist = self.extract_bibliographic(candidate) - del document[index] # untransformed field list (candidate) - document[biblioindex:biblioindex] = nodelist - - def extract_bibliographic(self, field_list): - docinfo = nodes.docinfo() - bibliofields = self.language.bibliographic_fields - labels = self.language.labels - topics = {'dedication': None, 'abstract': None} - for field in field_list: - try: - name = field[0][0].astext() - normedname = nodes.fully_normalize_name(name) - if not (len(field) == 2 and normedname in bibliofields - and self.check_empty_biblio_field(field, name)): - raise TransformError - canonical = bibliofields[normedname] - biblioclass = self.biblio_nodes[canonical] - if issubclass(biblioclass, nodes.TextElement): - if not self.check_compound_biblio_field(field, name): - raise TransformError - utils.clean_rcs_keywords( - field[1][0], self.rcs_keyword_substitutions) - docinfo.append(biblioclass('', '', *field[1][0])) - elif issubclass(biblioclass, nodes.authors): - self.extract_authors(field, name, docinfo) - elif issubclass(biblioclass, nodes.topic): - if topics[canonical]: - field[-1] += self.document.reporter.warning( - 'There can only be one "%s" field.' % name, - base_node=field) - raise TransformError - title = nodes.title(name, labels[canonical]) - title[0].rawsource = labels[canonical] - topics[canonical] = biblioclass( - '', title, classes=[canonical], *field[1].children) - else: - docinfo.append(biblioclass('', *field[1].children)) - except TransformError: - if len(field[-1]) == 1 \ - and isinstance(field[-1][0], nodes.paragraph): - utils.clean_rcs_keywords( - field[-1][0], self.rcs_keyword_substitutions) - # if normedname not in bibliofields: - classvalue = nodes.make_id(normedname) - if classvalue: - field['classes'].append(classvalue) - docinfo.append(field) - nodelist = [] - if len(docinfo) != 0: - nodelist.append(docinfo) - for name in ('dedication', 'abstract'): - if topics[name]: - nodelist.append(topics[name]) - return nodelist - - def check_empty_biblio_field(self, field, name): - if len(field[-1]) < 1: - field[-1] += self.document.reporter.warning( - 'Cannot extract empty bibliographic field "%s".' % name, - base_node=field) - return None - return 1 - - def check_compound_biblio_field(self, field, name): - if len(field[-1]) > 1: - field[-1] += self.document.reporter.warning( - 'Cannot extract compound bibliographic field "%s".' % name, - base_node=field) - return None - if not isinstance(field[-1][0], nodes.paragraph): - field[-1] += self.document.reporter.warning( - 'Cannot extract bibliographic field "%s" containing ' - 'anything other than a single paragraph.' % name, - base_node=field) - return None - return 1 - - rcs_keyword_substitutions = [ - (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+' - r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'), - (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'), - (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),] - - def extract_authors(self, field, name, docinfo): - try: - if len(field[1]) == 1: - if isinstance(field[1][0], nodes.paragraph): - authors = self.authors_from_one_paragraph(field) - elif isinstance(field[1][0], nodes.bullet_list): - authors = self.authors_from_bullet_list(field) - else: - raise TransformError - else: - authors = self.authors_from_paragraphs(field) - authornodes = [nodes.author('', '', *author) - for author in authors if author] - if len(authornodes) >= 1: - docinfo.append(nodes.authors('', *authornodes)) - else: - raise TransformError - except TransformError: - field[-1] += self.document.reporter.warning( - 'Bibliographic field "%s" incompatible with extraction: ' - 'it must contain either a single paragraph (with authors ' - 'separated by one of "%s"), multiple paragraphs (one per ' - 'author), or a bullet list with one paragraph (one author) ' - 'per item.' - % (name, ''.join(self.language.author_separators)), - base_node=field) - raise - - def authors_from_one_paragraph(self, field): - """Return list of Text nodes for authornames. - - The set of separators is locale dependent (default: ";"- or ","). - """ - # @@ keep original formatting? (e.g. ``:authors: A. Test, *et-al*``) - text = ''.join(unicode(node) - for node in field[1].traverse(nodes.Text)) - if not text: - raise TransformError - for authorsep in self.language.author_separators: - # don't split at escaped `authorsep`: - pattern = '(?<!\x00)%s' % authorsep - authornames = re.split(pattern, text) - if len(authornames) > 1: - break - authornames = (name.strip() for name in authornames) - authors = [[nodes.Text(name, utils.unescape(name, True))] - for name in authornames if name] - return authors - - def authors_from_bullet_list(self, field): - authors = [] - for item in field[1][0]: - if isinstance(item, nodes.comment): - continue - if len(item) != 1 or not isinstance(item[0], nodes.paragraph): - raise TransformError - authors.append(item[0].children) - if not authors: - raise TransformError - return authors - - def authors_from_paragraphs(self, field): - for item in field[1]: - if not isinstance(item, (nodes.paragraph, nodes.comment)): - raise TransformError - authors = [item.children for item in field[1] - if not isinstance(item, nodes.comment)] - return authors
