comparison env/lib/python3.9/site-packages/docutils/utils/punctuation_chars.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # :Id: $Id: punctuation_chars.py 8016 2017-01-17 15:06:17Z milde $
4 # :Copyright: © 2011, 2017 Günter Milde.
5 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
6 #
7 # Copying and distribution of this file, with or without modification,
8 # are permitted in any medium without royalty provided the copyright
9 # notice and this notice are preserved.
10 # This file is offered as-is, without any warranty.
11 #
12 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
13 #
14 # This file is generated by
15 # ``docutils/tools/dev/generate_punctuation_chars.py``.
16 # ::
17
18 import sys, re
19 import unicodedata
20
21 """Docutils character category patterns.
22
23 Patterns for the implementation of the `inline markup recognition rules`_
24 in the reStructuredText parser `docutils.parsers.rst.states.py` based
25 on Unicode character categories.
26 The patterns are used inside ``[ ]`` in regular expressions.
27
28 Rule (5) requires determination of matching open/close pairs. However, the
29 pairing of open/close quotes is ambiguous due to different typographic
30 conventions in different languages. The ``quote_pairs`` function tests
31 whether two characters form an open/close pair.
32
33 The patterns are generated by
34 ``docutils/tools/dev/generate_punctuation_chars.py`` to prevent dependence
35 on the Python version and avoid the time-consuming generation with every
36 Docutils run. See there for motives and implementation details.
37
38 The category of some characters changed with the development of the
39 Unicode standard. The current lists are generated with the help of the
40 "unicodedata" module of Python 2.7.13 (based on Unicode version 5.2.0).
41
42 .. _inline markup recognition rules:
43 http://docutils.sf.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
44 """
45
46 openers = (u'"\'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768'
47 u'\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea'
48 u'\u27ec\u27ee\u2983\u2985\u2987\u2989\u298b\u298d\u298f\u2991'
49 u'\u2993\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26\u2e28'
50 u'\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d'
51 u'\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41'
52 u'\ufe43\ufe47\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff5f\uff62'
53 u'\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c\u2e1c\u2e20'
54 u'\u201a\u201e\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d'
55 u'\u2e1d\u2e21\u201b\u201f')
56 closers = (u'"\')>\\]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a\u2769'
57 u'\u276b\u276d\u276f\u2771\u2773\u2775\u27c6\u27e7\u27e9\u27eb'
58 u'\u27ed\u27ef\u2984\u2986\u2988\u298a\u298c\u298e\u2990\u2992'
59 u'\u2994\u2996\u2998\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29'
60 u'\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e'
61 u'\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42'
62 u'\ufe44\ufe48\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff60\uff63'
63 u'\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d\u2e1d\u2e21'
64 u'\u201b\u201f\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c'
65 u'\u2e1c\u2e20\u201a\u201e')
66 delimiters = (u'\\-/:\u058a\xa1\xb7\xbf\u037e\u0387\u055a-\u055f\u0589'
67 u'\u05be\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c'
68 u'\u060d\u061b\u061e\u061f\u066a-\u066d\u06d4\u0700-\u070d'
69 u'\u07f7-\u07f9\u0830-\u083e\u0964\u0965\u0970\u0df4\u0e4f'
70 u'\u0e5a\u0e5b\u0f04-\u0f12\u0f85\u0fd0-\u0fd4\u104a-\u104f'
71 u'\u10fb\u1361-\u1368\u1400\u166d\u166e\u16eb-\u16ed\u1735'
72 u'\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u180a\u1944\u1945'
73 u'\u19de\u19df\u1a1e\u1a1f\u1aa0-\u1aa6\u1aa8-\u1aad\u1b5a-'
74 u'\u1b60\u1c3b-\u1c3f\u1c7e\u1c7f\u1cd3\u2010-\u2017\u2020-'
75 u'\u2027\u2030-\u2038\u203b-\u203e\u2041-\u2043\u2047-'
76 u'\u2051\u2053\u2055-\u205e\u2cf9-\u2cfc\u2cfe\u2cff\u2e00'
77 u'\u2e01\u2e06-\u2e08\u2e0b\u2e0e-\u2e1b\u2e1e\u2e1f\u2e2a-'
78 u'\u2e2e\u2e30\u2e31\u3001-\u3003\u301c\u3030\u303d\u30a0'
79 u'\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7'
80 u'\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f'
81 u'\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf\uabeb'
82 u'\ufe10-\ufe16\ufe19\ufe30-\ufe32\ufe45\ufe46\ufe49-\ufe4c'
83 u'\ufe50-\ufe52\ufe54-\ufe58\ufe5f-\ufe61\ufe63\ufe68\ufe6a'
84 u'\ufe6b\uff01-\uff03\uff05-\uff07\uff0a\uff0c-\uff0f\uff1a'
85 u'\uff1b\uff1f\uff20\uff3c\uff61\uff64\uff65')
86 if sys.maxunicode >= 0x10FFFF: # "wide" build
87 delimiters += (u'\U00010100\U00010101\U0001039f\U000103d0\U00010857'
88 u'\U0001091f\U0001093f\U00010a50-\U00010a58\U00010a7f'
89 u'\U00010b39-\U00010b3f\U000110bb\U000110bc\U000110be-'
90 u'\U000110c1\U00012470-\U00012473')
91 closing_delimiters = u'\\\\.,;!?'
92
93
94 # Matching open/close quotes
95 # --------------------------
96
97 quote_pairs = {# open char: matching closing characters # usage example
98 u'\xbb': u'\xbb', # » » Swedish
99 u'\u2018': u'\u201a', # ‘ ‚ Albanian/Greek/Turkish
100 u'\u2019': u'\u2019', # ’ ’ Swedish
101 u'\u201a': u'\u2018\u2019', # ‚ ‘ German ‚ ’ Polish
102 u'\u201c': u'\u201e', # “ „ Albanian/Greek/Turkish
103 u'\u201e': u'\u201c\u201d', # „ “ German „ ” Polish
104 u'\u201d': u'\u201d', # ” ” Swedish
105 u'\u203a': u'\u203a', # › › Swedish
106 }
107 """Additional open/close quote pairs."""
108
109 def match_chars(c1, c2):
110 """Test whether `c1` and `c2` are a matching open/close character pair.
111
112 Matching open/close pairs are at the same position in
113 `punctuation_chars.openers` and `punctuation_chars.closers`.
114 The pairing of open/close quotes is ambiguous due to different
115 typographic conventions in different languages,
116 so we test for additional matches stored in `quote_pairs`.
117 """
118 try:
119 i = openers.index(c1)
120 except ValueError: # c1 not in openers
121 return False
122 return c2 == closers[i] or c2 in quote_pairs.get(c1, u'')