Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/docutils/utils/math/latex2mathml.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 # :Id: $Id: latex2mathml.py 8366 2019-08-27 12:09:19Z milde $ | |
| 5 # :Copyright: © 2010 Günter Milde. | |
| 6 # Based on rst2mathml.py from the latex_math sandbox project | |
| 7 # © 2005 Jens Jørgen Mortensen | |
| 8 # :License: Released under the terms of the `2-Clause BSD license`_, in short: | |
| 9 # | |
| 10 # Copying and distribution of this file, with or without modification, | |
| 11 # are permitted in any medium without royalty provided the copyright | |
| 12 # notice and this notice are preserved. | |
| 13 # This file is offered as-is, without any warranty. | |
| 14 # | |
| 15 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause | |
| 16 | |
| 17 | |
| 18 """Convert LaTex math code into presentational MathML""" | |
| 19 | |
| 20 # Based on the `latex_math` sandbox project by Jens Jørgen Mortensen | |
| 21 | |
| 22 import docutils.utils.math.tex2unichar as tex2unichar | |
| 23 | |
| 24 # TeX spacing combining | |
| 25 over = {'acute': u'\u00B4', # u'\u0301', | |
| 26 'bar': u'\u00AF', # u'\u0304', | |
| 27 'breve': u'\u02D8', # u'\u0306', | |
| 28 'check': u'\u02C7', # u'\u030C', | |
| 29 'dot': u'\u02D9', # u'\u0307', | |
| 30 'ddot': u'\u00A8', # u'\u0308', | |
| 31 'dddot': u'\u20DB', | |
| 32 'grave': u'`', # u'\u0300', | |
| 33 'hat': u'^', # u'\u0302', | |
| 34 'mathring': u'\u02DA', # u'\u030A', | |
| 35 'overleftrightarrow': u'\u20e1', | |
| 36 # 'overline': # u'\u0305', | |
| 37 'tilde': u'\u02DC', # u'\u0303', | |
| 38 'vec': u'\u20D7'} | |
| 39 | |
| 40 Greek = { # Capital Greek letters: (upright in TeX style) | |
| 41 'Phi':u'\u03a6', 'Xi':u'\u039e', 'Sigma':u'\u03a3', | |
| 42 'Psi':u'\u03a8', 'Delta':u'\u0394', 'Theta':u'\u0398', | |
| 43 'Upsilon':u'\u03d2', 'Pi':u'\u03a0', 'Omega':u'\u03a9', | |
| 44 'Gamma':u'\u0393', 'Lambda':u'\u039b'} | |
| 45 | |
| 46 letters = tex2unichar.mathalpha | |
| 47 | |
| 48 special = tex2unichar.mathbin # Binary symbols | |
| 49 special.update(tex2unichar.mathrel) # Relation symbols, arrow symbols | |
| 50 special.update(tex2unichar.mathord) # Miscellaneous symbols | |
| 51 special.update(tex2unichar.mathop) # Variable-sized symbols | |
| 52 special.update(tex2unichar.mathopen) # Braces | |
| 53 special.update(tex2unichar.mathclose) # Braces | |
| 54 special.update(tex2unichar.mathfence) | |
| 55 | |
| 56 sumintprod = ''.join([special[symbol] for symbol in | |
| 57 ['sum', 'int', 'oint', 'prod']]) | |
| 58 | |
| 59 functions = ['arccos', 'arcsin', 'arctan', 'arg', 'cos', 'cosh', | |
| 60 'cot', 'coth', 'csc', 'deg', 'det', 'dim', | |
| 61 'exp', 'gcd', 'hom', 'inf', 'ker', 'lg', | |
| 62 'lim', 'liminf', 'limsup', 'ln', 'log', 'max', | |
| 63 'min', 'Pr', 'sec', 'sin', 'sinh', 'sup', | |
| 64 'tan', 'tanh', | |
| 65 'injlim', 'varinjlim', 'varlimsup', | |
| 66 'projlim', 'varliminf', 'varprojlim'] | |
| 67 | |
| 68 | |
| 69 mathbb = { | |
| 70 'A': u'\U0001D538', | |
| 71 'B': u'\U0001D539', | |
| 72 'C': u'\u2102', | |
| 73 'D': u'\U0001D53B', | |
| 74 'E': u'\U0001D53C', | |
| 75 'F': u'\U0001D53D', | |
| 76 'G': u'\U0001D53E', | |
| 77 'H': u'\u210D', | |
| 78 'I': u'\U0001D540', | |
| 79 'J': u'\U0001D541', | |
| 80 'K': u'\U0001D542', | |
| 81 'L': u'\U0001D543', | |
| 82 'M': u'\U0001D544', | |
| 83 'N': u'\u2115', | |
| 84 'O': u'\U0001D546', | |
| 85 'P': u'\u2119', | |
| 86 'Q': u'\u211A', | |
| 87 'R': u'\u211D', | |
| 88 'S': u'\U0001D54A', | |
| 89 'T': u'\U0001D54B', | |
| 90 'U': u'\U0001D54C', | |
| 91 'V': u'\U0001D54D', | |
| 92 'W': u'\U0001D54E', | |
| 93 'X': u'\U0001D54F', | |
| 94 'Y': u'\U0001D550', | |
| 95 'Z': u'\u2124', | |
| 96 } | |
| 97 | |
| 98 mathscr = { | |
| 99 'A': u'\U0001D49C', | |
| 100 'B': u'\u212C', # bernoulli function | |
| 101 'C': u'\U0001D49E', | |
| 102 'D': u'\U0001D49F', | |
| 103 'E': u'\u2130', | |
| 104 'F': u'\u2131', | |
| 105 'G': u'\U0001D4A2', | |
| 106 'H': u'\u210B', # hamiltonian | |
| 107 'I': u'\u2110', | |
| 108 'J': u'\U0001D4A5', | |
| 109 'K': u'\U0001D4A6', | |
| 110 'L': u'\u2112', # lagrangian | |
| 111 'M': u'\u2133', # physics m-matrix | |
| 112 'N': u'\U0001D4A9', | |
| 113 'O': u'\U0001D4AA', | |
| 114 'P': u'\U0001D4AB', | |
| 115 'Q': u'\U0001D4AC', | |
| 116 'R': u'\u211B', | |
| 117 'S': u'\U0001D4AE', | |
| 118 'T': u'\U0001D4AF', | |
| 119 'U': u'\U0001D4B0', | |
| 120 'V': u'\U0001D4B1', | |
| 121 'W': u'\U0001D4B2', | |
| 122 'X': u'\U0001D4B3', | |
| 123 'Y': u'\U0001D4B4', | |
| 124 'Z': u'\U0001D4B5', | |
| 125 'a': u'\U0001D4B6', | |
| 126 'b': u'\U0001D4B7', | |
| 127 'c': u'\U0001D4B8', | |
| 128 'd': u'\U0001D4B9', | |
| 129 'e': u'\u212F', | |
| 130 'f': u'\U0001D4BB', | |
| 131 'g': u'\u210A', | |
| 132 'h': u'\U0001D4BD', | |
| 133 'i': u'\U0001D4BE', | |
| 134 'j': u'\U0001D4BF', | |
| 135 'k': u'\U0001D4C0', | |
| 136 'l': u'\U0001D4C1', | |
| 137 'm': u'\U0001D4C2', | |
| 138 'n': u'\U0001D4C3', | |
| 139 'o': u'\u2134', # order of | |
| 140 'p': u'\U0001D4C5', | |
| 141 'q': u'\U0001D4C6', | |
| 142 'r': u'\U0001D4C7', | |
| 143 's': u'\U0001D4C8', | |
| 144 't': u'\U0001D4C9', | |
| 145 'u': u'\U0001D4CA', | |
| 146 'v': u'\U0001D4CB', | |
| 147 'w': u'\U0001D4CC', | |
| 148 'x': u'\U0001D4CD', | |
| 149 'y': u'\U0001D4CE', | |
| 150 'z': u'\U0001D4CF', | |
| 151 } | |
| 152 | |
| 153 negatables = {'=': u'\u2260', | |
| 154 r'\in': u'\u2209', | |
| 155 r'\equiv': u'\u2262'} | |
| 156 | |
| 157 # LaTeX to MathML translation stuff: | |
| 158 class math(object): | |
| 159 """Base class for MathML elements.""" | |
| 160 | |
| 161 nchildren = 1000000 | |
| 162 """Required number of children""" | |
| 163 | |
| 164 def __init__(self, children=None, inline=None): | |
| 165 """math([children]) -> MathML element | |
| 166 | |
| 167 children can be one child or a list of children.""" | |
| 168 | |
| 169 self.children = [] | |
| 170 if children is not None: | |
| 171 if isinstance(children, list): | |
| 172 for child in children: | |
| 173 self.append(child) | |
| 174 else: | |
| 175 # Only one child: | |
| 176 self.append(children) | |
| 177 | |
| 178 if inline is not None: | |
| 179 self.inline = inline | |
| 180 | |
| 181 def __repr__(self): | |
| 182 if hasattr(self, 'children'): | |
| 183 return self.__class__.__name__ + '(%s)' % \ | |
| 184 ','.join([repr(child) for child in self.children]) | |
| 185 else: | |
| 186 return self.__class__.__name__ | |
| 187 | |
| 188 def full(self): | |
| 189 """Room for more children?""" | |
| 190 | |
| 191 return len(self.children) >= self.nchildren | |
| 192 | |
| 193 def append(self, child): | |
| 194 """append(child) -> element | |
| 195 | |
| 196 Appends child and returns self if self is not full or first | |
| 197 non-full parent.""" | |
| 198 | |
| 199 assert not self.full() | |
| 200 self.children.append(child) | |
| 201 child.parent = self | |
| 202 node = self | |
| 203 while node.full(): | |
| 204 node = node.parent | |
| 205 return node | |
| 206 | |
| 207 def delete_child(self): | |
| 208 """delete_child() -> child | |
| 209 | |
| 210 Delete last child and return it.""" | |
| 211 | |
| 212 child = self.children[-1] | |
| 213 del self.children[-1] | |
| 214 return child | |
| 215 | |
| 216 def close(self): | |
| 217 """close() -> parent | |
| 218 | |
| 219 Close element and return first non-full element.""" | |
| 220 | |
| 221 parent = self.parent | |
| 222 while parent.full(): | |
| 223 parent = parent.parent | |
| 224 return parent | |
| 225 | |
| 226 def xml(self): | |
| 227 """xml() -> xml-string""" | |
| 228 | |
| 229 return self.xml_start() + self.xml_body() + self.xml_end() | |
| 230 | |
| 231 def xml_start(self): | |
| 232 if not hasattr(self, 'inline'): | |
| 233 return ['<%s>' % self.__class__.__name__] | |
| 234 xmlns = 'http://www.w3.org/1998/Math/MathML' | |
| 235 if self.inline: | |
| 236 return ['<math xmlns="%s">' % xmlns] | |
| 237 else: | |
| 238 return ['<math xmlns="%s" mode="display">' % xmlns] | |
| 239 | |
| 240 def xml_end(self): | |
| 241 return ['</%s>' % self.__class__.__name__] | |
| 242 | |
| 243 def xml_body(self): | |
| 244 xml = [] | |
| 245 for child in self.children: | |
| 246 xml.extend(child.xml()) | |
| 247 return xml | |
| 248 | |
| 249 class mrow(math): | |
| 250 def xml_start(self): | |
| 251 return ['\n<%s>' % self.__class__.__name__] | |
| 252 | |
| 253 class mtable(math): | |
| 254 def xml_start(self): | |
| 255 return ['\n<%s>' % self.__class__.__name__] | |
| 256 | |
| 257 class mtr(mrow): pass | |
| 258 class mtd(mrow): pass | |
| 259 | |
| 260 class mx(math): | |
| 261 """Base class for mo, mi, and mn""" | |
| 262 | |
| 263 nchildren = 0 | |
| 264 def __init__(self, data): | |
| 265 self.data = data | |
| 266 | |
| 267 def xml_body(self): | |
| 268 return [self.data] | |
| 269 | |
| 270 class mo(mx): | |
| 271 translation = {'<': '<', '>': '>'} | |
| 272 def xml_body(self): | |
| 273 return [self.translation.get(self.data, self.data)] | |
| 274 | |
| 275 class mi(mx): pass | |
| 276 class mn(mx): pass | |
| 277 | |
| 278 class msub(math): | |
| 279 nchildren = 2 | |
| 280 | |
| 281 class msup(math): | |
| 282 nchildren = 2 | |
| 283 | |
| 284 class msqrt(math): | |
| 285 nchildren = 1 | |
| 286 | |
| 287 class mroot(math): | |
| 288 nchildren = 2 | |
| 289 | |
| 290 class mfrac(math): | |
| 291 nchildren = 2 | |
| 292 | |
| 293 class msubsup(math): | |
| 294 nchildren = 3 | |
| 295 def __init__(self, children=None, reversed=False): | |
| 296 self.reversed = reversed | |
| 297 math.__init__(self, children) | |
| 298 | |
| 299 def xml(self): | |
| 300 if self.reversed: | |
| 301 ## self.children[1:3] = self.children[2:0:-1] | |
| 302 self.children[1:3] = [self.children[2], self.children[1]] | |
| 303 self.reversed = False | |
| 304 return math.xml(self) | |
| 305 | |
| 306 class mfenced(math): | |
| 307 translation = {'\\{': '{', '\\langle': u'\u2329', | |
| 308 '\\}': '}', '\\rangle': u'\u232A', | |
| 309 '.': ''} | |
| 310 def __init__(self, par): | |
| 311 self.openpar = par | |
| 312 math.__init__(self) | |
| 313 | |
| 314 def xml_start(self): | |
| 315 open = self.translation.get(self.openpar, self.openpar) | |
| 316 close = self.translation.get(self.closepar, self.closepar) | |
| 317 return ['<mfenced open="%s" close="%s">' % (open, close)] | |
| 318 | |
| 319 class mspace(math): | |
| 320 nchildren = 0 | |
| 321 | |
| 322 class mstyle(math): | |
| 323 def __init__(self, children=None, nchildren=None, **kwargs): | |
| 324 if nchildren is not None: | |
| 325 self.nchildren = nchildren | |
| 326 math.__init__(self, children) | |
| 327 self.attrs = kwargs | |
| 328 | |
| 329 def xml_start(self): | |
| 330 return ['<mstyle '] + ['%s="%s"' % item | |
| 331 for item in self.attrs.items()] + ['>'] | |
| 332 | |
| 333 class mover(math): | |
| 334 nchildren = 2 | |
| 335 def __init__(self, children=None, reversed=False): | |
| 336 self.reversed = reversed | |
| 337 math.__init__(self, children) | |
| 338 | |
| 339 def xml(self): | |
| 340 if self.reversed: | |
| 341 self.children.reverse() | |
| 342 self.reversed = False | |
| 343 return math.xml(self) | |
| 344 | |
| 345 class munder(math): | |
| 346 nchildren = 2 | |
| 347 | |
| 348 class munderover(math): | |
| 349 nchildren = 3 | |
| 350 def __init__(self, children=None): | |
| 351 math.__init__(self, children) | |
| 352 | |
| 353 class mtext(math): | |
| 354 nchildren = 0 | |
| 355 def __init__(self, text): | |
| 356 self.text = text | |
| 357 | |
| 358 def xml_body(self): | |
| 359 return [self.text] | |
| 360 | |
| 361 def parse_latex_math(string, inline=True): | |
| 362 """parse_latex_math(string [,inline]) -> MathML-tree | |
| 363 | |
| 364 Returns a MathML-tree parsed from string. inline=True is for | |
| 365 inline math and inline=False is for displayed math. | |
| 366 | |
| 367 tree is the whole tree and node is the current element.""" | |
| 368 | |
| 369 # Normalize white-space: | |
| 370 string = ' '.join(string.split()) | |
| 371 | |
| 372 if inline: | |
| 373 node = mrow() | |
| 374 tree = math(node, inline=True) | |
| 375 else: | |
| 376 node = mtd() | |
| 377 tree = math(mtable(mtr(node)), inline=False) | |
| 378 | |
| 379 while len(string) > 0: | |
| 380 n = len(string) | |
| 381 c = string[0] | |
| 382 skip = 1 # number of characters consumed | |
| 383 if n > 1: | |
| 384 c2 = string[1] | |
| 385 else: | |
| 386 c2 = '' | |
| 387 if c == ' ': | |
| 388 pass | |
| 389 elif c == '\\': | |
| 390 if c2 in '{}': | |
| 391 node = node.append(mo(c2)) | |
| 392 skip = 2 | |
| 393 elif c2 == ' ': | |
| 394 node = node.append(mspace()) | |
| 395 skip = 2 | |
| 396 elif c2 == ',': # TODO: small space | |
| 397 node = node.append(mspace()) | |
| 398 skip = 2 | |
| 399 elif c2.isalpha(): | |
| 400 # We have a LaTeX-name: | |
| 401 i = 2 | |
| 402 while i < n and string[i].isalpha(): | |
| 403 i += 1 | |
| 404 name = string[1:i] | |
| 405 node, skip = handle_keyword(name, node, string[i:]) | |
| 406 skip += i | |
| 407 elif c2 == '\\': | |
| 408 # End of a row: | |
| 409 entry = mtd() | |
| 410 row = mtr(entry) | |
| 411 node.close().close().append(row) | |
| 412 node = entry | |
| 413 skip = 2 | |
| 414 else: | |
| 415 raise SyntaxError(u'Syntax error: "%s%s"' % (c, c2)) | |
| 416 elif c.isalpha(): | |
| 417 node = node.append(mi(c)) | |
| 418 elif c.isdigit(): | |
| 419 node = node.append(mn(c)) | |
| 420 elif c in "+-*/=()[]|<>,.!?':;@": | |
| 421 node = node.append(mo(c)) | |
| 422 elif c == '_': | |
| 423 child = node.delete_child() | |
| 424 if isinstance(child, msup): | |
| 425 sub = msubsup(child.children, reversed=True) | |
| 426 elif isinstance(child, mo) and child.data in sumintprod: | |
| 427 sub = munder(child) | |
| 428 else: | |
| 429 sub = msub(child) | |
| 430 node.append(sub) | |
| 431 node = sub | |
| 432 elif c == '^': | |
| 433 child = node.delete_child() | |
| 434 if isinstance(child, msub): | |
| 435 sup = msubsup(child.children) | |
| 436 elif isinstance(child, mo) and child.data in sumintprod: | |
| 437 sup = mover(child) | |
| 438 elif (isinstance(child, munder) and | |
| 439 child.children[0].data in sumintprod): | |
| 440 sup = munderover(child.children) | |
| 441 else: | |
| 442 sup = msup(child) | |
| 443 node.append(sup) | |
| 444 node = sup | |
| 445 elif c == '{': | |
| 446 row = mrow() | |
| 447 node.append(row) | |
| 448 node = row | |
| 449 elif c == '}': | |
| 450 node = node.close() | |
| 451 elif c == '&': | |
| 452 entry = mtd() | |
| 453 node.close().append(entry) | |
| 454 node = entry | |
| 455 else: | |
| 456 raise SyntaxError(u'Illegal character: "%s"' % c) | |
| 457 string = string[skip:] | |
| 458 return tree | |
| 459 | |
| 460 | |
| 461 def handle_keyword(name, node, string): | |
| 462 skip = 0 | |
| 463 if len(string) > 0 and string[0] == ' ': | |
| 464 string = string[1:] | |
| 465 skip = 1 | |
| 466 if name == 'begin': | |
| 467 if not string.startswith('{matrix}'): | |
| 468 raise SyntaxError(u'Environment not supported! ' | |
| 469 u'Supported environment: "matrix".') | |
| 470 skip += 8 | |
| 471 entry = mtd() | |
| 472 table = mtable(mtr(entry)) | |
| 473 node.append(table) | |
| 474 node = entry | |
| 475 elif name == 'end': | |
| 476 if not string.startswith('{matrix}'): | |
| 477 raise SyntaxError(u'Expected "\\end{matrix}"!') | |
| 478 skip += 8 | |
| 479 node = node.close().close().close() | |
| 480 elif name in ('text', 'mathrm'): | |
| 481 if string[0] != '{': | |
| 482 raise SyntaxError(u'Expected "\\text{...}"!') | |
| 483 i = string.find('}') | |
| 484 if i == -1: | |
| 485 raise SyntaxError(u'Expected "\\text{...}"!') | |
| 486 node = node.append(mtext(string[1:i])) | |
| 487 skip += i + 1 | |
| 488 elif name == 'sqrt': | |
| 489 sqrt = msqrt() | |
| 490 node.append(sqrt) | |
| 491 node = sqrt | |
| 492 elif name == 'frac': | |
| 493 frac = mfrac() | |
| 494 node.append(frac) | |
| 495 node = frac | |
| 496 elif name == 'left': | |
| 497 for par in ['(', '[', '|', '\\{', '\\langle', '.']: | |
| 498 if string.startswith(par): | |
| 499 break | |
| 500 else: | |
| 501 raise SyntaxError(u'Missing left-brace!') | |
| 502 fenced = mfenced(par) | |
| 503 node.append(fenced) | |
| 504 row = mrow() | |
| 505 fenced.append(row) | |
| 506 node = row | |
| 507 skip += len(par) | |
| 508 elif name == 'right': | |
| 509 for par in [')', ']', '|', '\\}', '\\rangle', '.']: | |
| 510 if string.startswith(par): | |
| 511 break | |
| 512 else: | |
| 513 raise SyntaxError(u'Missing right-brace!') | |
| 514 node = node.close() | |
| 515 node.closepar = par | |
| 516 node = node.close() | |
| 517 skip += len(par) | |
| 518 elif name == 'not': | |
| 519 for operator in negatables: | |
| 520 if string.startswith(operator): | |
| 521 break | |
| 522 else: | |
| 523 raise SyntaxError(u'Expected something to negate: "\\not ..."!') | |
| 524 node = node.append(mo(negatables[operator])) | |
| 525 skip += len(operator) | |
| 526 elif name == 'mathbf': | |
| 527 style = mstyle(nchildren=1, fontweight='bold') | |
| 528 node.append(style) | |
| 529 node = style | |
| 530 elif name == 'mathbb': | |
| 531 if string[0] != '{' or not string[1].isupper() or string[2] != '}': | |
| 532 raise SyntaxError(u'Expected something like "\\mathbb{A}"!') | |
| 533 node = node.append(mi(mathbb[string[1]])) | |
| 534 skip += 3 | |
| 535 elif name in ('mathscr', 'mathcal'): | |
| 536 if string[0] != '{' or string[2] != '}': | |
| 537 raise SyntaxError(u'Expected something like "\\mathscr{A}"!') | |
| 538 node = node.append(mi(mathscr[string[1]])) | |
| 539 skip += 3 | |
| 540 elif name == 'colon': # "normal" colon, not binary operator | |
| 541 node = node.append(mo(':')) # TODO: add ``lspace="0pt"`` | |
| 542 elif name in Greek: # Greek capitals (upright in "TeX style") | |
| 543 node = node.append(mo(Greek[name])) | |
| 544 # TODO: "ISO style" sets them italic. Could we use a class argument | |
| 545 # to enable styling via CSS? | |
| 546 elif name in letters: | |
| 547 node = node.append(mi(letters[name])) | |
| 548 elif name in special: | |
| 549 node = node.append(mo(special[name])) | |
| 550 elif name in functions: | |
| 551 node = node.append(mo(name)) | |
| 552 elif name in over: | |
| 553 ovr = mover(mo(over[name]), reversed=True) | |
| 554 node.append(ovr) | |
| 555 node = ovr | |
| 556 else: | |
| 557 raise SyntaxError(u'Unknown LaTeX command: ' + name) | |
| 558 | |
| 559 return node, skip | |
| 560 | |
| 561 def tex2mathml(tex_math, inline=True): | |
| 562 """Return string with MathML code corresponding to `tex_math`. | |
| 563 | |
| 564 `inline`=True is for inline math and `inline`=False for displayed math. | |
| 565 """ | |
| 566 | |
| 567 mathml_tree = parse_latex_math(tex_math, inline=inline) | |
| 568 return ''.join(mathml_tree.xml()) |
