# HG changeset patch # User saskia-hiltemann # Date 1412685690 14400 # Node ID a4813532bbc6ff3d6d2b3be91fca96a2e1513c7e # Parent 4a6ebda2a3ae27f91048b8c992b4b8505d738210 Added MarkDown support diff -r 4a6ebda2a3ae -r a4813532bbc6 Markdown/markdown2.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Markdown/markdown2.py Tue Oct 07 08:41:30 2014 -0400 @@ -0,0 +1,2440 @@ +#!/usr/bin/env python +# Copyright (c) 2012 Trent Mick. +# Copyright (c) 2007-2008 ActiveState Corp. +# License: MIT (http://www.opensource.org/licenses/mit-license.php) + +from __future__ import generators + +r"""A fast and complete Python implementation of Markdown. + +[from http://daringfireball.net/projects/markdown/] +> Markdown is a text-to-HTML filter; it translates an easy-to-read / +> easy-to-write structured text format into HTML. Markdown's text +> format is most similar to that of plain text email, and supports +> features such as headers, *emphasis*, code blocks, blockquotes, and +> links. +> +> Markdown's syntax is designed not as a generic markup language, but +> specifically to serve as a front-end to (X)HTML. You can use span-level +> HTML tags anywhere in a Markdown document, and you can use block level +> HTML tags (like
tags.
+ """
+ yield 0, ""
+ for tup in inner:
+ yield tup
+ yield 0, "
"
+
+ def wrap(self, source, outfile):
+ """Return the source with a code, pre, and div."""
+ return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
+
+ formatter_opts.setdefault("cssclass", "codehilite")
+ formatter = HtmlCodeFormatter(**formatter_opts)
+ return pygments.highlight(codeblock, lexer, formatter)
+
+ def _code_block_sub(self, match, is_fenced_code_block=False):
+ lexer_name = None
+ if is_fenced_code_block:
+ lexer_name = match.group(1)
+ if lexer_name:
+ formatter_opts = self.extras['fenced-code-blocks'] or {}
+ codeblock = match.group(2)
+ codeblock = codeblock[:-1] # drop one trailing newline
+ else:
+ codeblock = match.group(1)
+ codeblock = self._outdent(codeblock)
+ codeblock = self._detab(codeblock)
+ codeblock = codeblock.lstrip('\n') # trim leading newlines
+ codeblock = codeblock.rstrip() # trim trailing whitespace
+
+ # Note: "code-color" extra is DEPRECATED.
+ if "code-color" in self.extras and codeblock.startswith(":::"):
+ lexer_name, rest = codeblock.split('\n', 1)
+ lexer_name = lexer_name[3:].strip()
+ codeblock = rest.lstrip("\n") # Remove lexer declaration line.
+ formatter_opts = self.extras['code-color'] or {}
+
+ if lexer_name:
+ def unhash_code( codeblock ):
+ for key, sanitized in list(self.html_spans.items()):
+ codeblock = codeblock.replace(key, sanitized)
+ replacements = [
+ ("&", "&"),
+ ("<", "<"),
+ (">", ">")
+ ]
+ for old, new in replacements:
+ codeblock = codeblock.replace(old, new)
+ return codeblock
+ lexer = self._get_pygments_lexer(lexer_name)
+ if lexer:
+ codeblock = unhash_code( codeblock )
+ colored = self._color_with_pygments(codeblock, lexer,
+ **formatter_opts)
+ return "\n\n%s\n\n" % colored
+
+ codeblock = self._encode_code(codeblock)
+ pre_class_str = self._html_class_str_from_tag("pre")
+ code_class_str = self._html_class_str_from_tag("code")
+ return "\n\n%s\n
\n\n" % (
+ pre_class_str, code_class_str, codeblock)
+
+ def _html_class_str_from_tag(self, tag):
+ """Get the appropriate ' class="..."' string (note the leading
+ space), if any, for the given tag.
+ """
+ if "html-classes" not in self.extras:
+ return ""
+ try:
+ html_classes_from_tag = self.extras["html-classes"]
+ except TypeError:
+ return ""
+ else:
+ if tag in html_classes_from_tag:
+ return ' class="%s"' % html_classes_from_tag[tag]
+ return ""
+
+ def _do_code_blocks(self, text):
+ """Process Markdown `` blocks."""
+ code_block_re = re.compile(r'''
+ (?:\n\n|\A\n?)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ # Lookahead to make sure this block isn't already in a code block.
+ # Needed when syntax highlighting is being used.
+ (?![^<]*\
)
+ ''' % (self.tab_width, self.tab_width),
+ re.M | re.X)
+ return code_block_re.sub(self._code_block_sub, text)
+
+ _fenced_code_block_re = re.compile(r'''
+ (?:\n\n|\A\n?)
+ ^```([\w+-]+)?[ \t]*\n # opening fence, $1 = optional lang
+ (.*?) # $2 = code block content
+ ^```[ \t]*\n # closing fence
+ ''', re.M | re.X | re.S)
+
+ def _fenced_code_block_sub(self, match):
+ return self._code_block_sub(match, is_fenced_code_block=True);
+
+ def _do_fenced_code_blocks(self, text):
+ """Process ```-fenced unindented code blocks ('fenced-code-blocks' extra)."""
+ return self._fenced_code_block_re.sub(self._fenced_code_block_sub, text)
+
+ # Rules for a code span:
+ # - backslash escapes are not interpreted in a code span
+ # - to include one or or a run of more backticks the delimiters must
+ # be a longer run of backticks
+ # - cannot start or end a code span with a backtick; pad with a
+ # space and that space will be removed in the emitted HTML
+ # See `test/tm-cases/escapes.text` for a number of edge-case
+ # examples.
+ _code_span_re = re.compile(r'''
+ (?%s
" % c
+
+ def _do_code_spans(self, text):
+ # * Backtick quotes are used for
spans.
+ #
+ # * You can use multiple backticks as the delimiters if you want to
+ # include literal backticks in the code span. So, this input:
+ #
+ # Just type ``foo `bar` baz`` at the prompt.
+ #
+ # Will translate to:
+ #
+ # Just type foo `bar` baz
at the prompt.
`bar`
...
+ return self._code_span_re.sub(self._code_span_sub, text)
+
+ def _encode_code(self, text):
+ """Encode/escape certain characters inside Markdown code runs.
+ The point is that in code, these characters are literals,
+ and lose their special Markdown meanings.
+ """
+ replacements = [
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
+ ('&', '&'),
+ # Do the angle bracket song and dance:
+ ('<', '<'),
+ ('>', '>'),
+ ]
+ for before, after in replacements:
+ text = text.replace(before, after)
+ hashed = _hash_text(text)
+ self._escape_table[text] = hashed
+ return hashed
+
+ _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
+ _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
+ _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
+ _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
+ def _do_italics_and_bold(self, text):
+ # must go first:
+ if "code-friendly" in self.extras:
+ text = self._code_friendly_strong_re.sub(r"\1", text)
+ text = self._code_friendly_em_re.sub(r"\1", text)
+ else:
+ text = self._strong_re.sub(r"\2", text)
+ text = self._em_re.sub(r"\2", text)
+ return text
+
+ # "smarty-pants" extra: Very liberal in interpreting a single prime as an
+ # apostrophe; e.g. ignores the fact that "round", "bout", "twer", and
+ # "twixt" can be written without an initial apostrophe. This is fine because
+ # using scare quotes (single quotation marks) is rare.
+ _apostrophe_year_re = re.compile(r"'(\d\d)(?=(\s|,|;|\.|\?|!|$))")
+ _contractions = ["tis", "twas", "twer", "neath", "o", "n",
+ "round", "bout", "twixt", "nuff", "fraid", "sup"]
+ def _do_smart_contractions(self, text):
+ text = self._apostrophe_year_re.sub(r"’\1", text)
+ for c in self._contractions:
+ text = text.replace("'%s" % c, "’%s" % c)
+ text = text.replace("'%s" % c.capitalize(),
+ "’%s" % c.capitalize())
+ return text
+
+ # Substitute double-quotes before single-quotes.
+ _opening_single_quote_re = re.compile(r"(?
+ See "test/tm-cases/smarty_pants.text" for a full discussion of the
+ support here and
+ .+?)', re.S) + def _dedent_two_spaces_sub(self, match): + return re.sub(r'(?m)^ ', '', match.group(1)) + + def _block_quote_sub(self, match): + bq = match.group(1) + bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting + bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines + bq = self._run_block_gamut(bq) # recurse + + bq = re.sub('(?m)^', ' ', bq) + # These leading spaces screw with
content, so we need to fix that: + bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) + + return "\n%s\n\n\n" % bq + + def _do_block_quotes(self, text): + if '>' not in text: + return text + return self._block_quote_re.sub(self._block_quote_sub, text) + + def _form_paragraphs(self, text): + # Strip leading and trailing lines: + text = text.strip('\n') + + # Wraptags. + grafs = [] + for i, graf in enumerate(re.split(r"\n{2,}", text)): + if graf in self.html_blocks: + # Unhashify HTML blocks + grafs.append(self.html_blocks[graf]) + else: + cuddled_list = None + if "cuddled-lists" in self.extras: + # Need to put back trailing '\n' for `_list_item_re` + # match at the end of the paragraph. + li = self._list_item_re.search(graf + '\n') + # Two of the same list marker in this paragraph: a likely + # candidate for a list cuddled to preceding paragraph + # text (issue 33). Note the `[-1]` is a quick way to + # consider numeric bullets (e.g. "1." and "2.") to be + # equal. + if (li and len(li.group(2)) <= 3 and li.group("next_marker") + and li.group("marker")[-1] == li.group("next_marker")[-1]): + start = li.start() + cuddled_list = self._do_lists(graf[start:]).rstrip("\n") + assert cuddled_list.startswith("
tags. + graf = self._run_span_gamut(graf) + grafs.append("
" + graf.lstrip(" \t") + "
") + + if cuddled_list: + grafs.append(cuddled_list) + + return "\n\n".join(grafs) + + def _add_footnotes(self, text): + if self.footnotes: + footer = [ + '%s
" % backlink) + footer.append('