Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/lxml/includes/libxml/encoding.h @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac |
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 |
| parents | 79f47841a781 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:79f47841a781 | 5:9b1c78e6ba9c |
|---|---|
| 1 /* | |
| 2 * Summary: interface for the encoding conversion functions | |
| 3 * Description: interface for the encoding conversion functions needed for | |
| 4 * XML basic encoding and iconv() support. | |
| 5 * | |
| 6 * Related specs are | |
| 7 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies | |
| 8 * [ISO-10646] UTF-8 and UTF-16 in Annexes | |
| 9 * [ISO-8859-1] ISO Latin-1 characters codes. | |
| 10 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- | |
| 11 * Worldwide Character Encoding -- Version 1.0", Addison- | |
| 12 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is | |
| 13 * described in Unicode Technical Report #4. | |
| 14 * [US-ASCII] Coded Character Set--7-bit American Standard Code for | |
| 15 * Information Interchange, ANSI X3.4-1986. | |
| 16 * | |
| 17 * Copy: See Copyright for the status of this software. | |
| 18 * | |
| 19 * Author: Daniel Veillard | |
| 20 */ | |
| 21 | |
| 22 #ifndef __XML_CHAR_ENCODING_H__ | |
| 23 #define __XML_CHAR_ENCODING_H__ | |
| 24 | |
| 25 #include <libxml/xmlversion.h> | |
| 26 | |
| 27 #ifdef LIBXML_ICONV_ENABLED | |
| 28 #include <iconv.h> | |
| 29 #endif | |
| 30 #ifdef LIBXML_ICU_ENABLED | |
| 31 #include <unicode/ucnv.h> | |
| 32 #endif | |
| 33 #ifdef __cplusplus | |
| 34 extern "C" { | |
| 35 #endif | |
| 36 | |
| 37 /* | |
| 38 * xmlCharEncoding: | |
| 39 * | |
| 40 * Predefined values for some standard encodings. | |
| 41 * Libxml does not do beforehand translation on UTF8 and ISOLatinX. | |
| 42 * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default. | |
| 43 * | |
| 44 * Anything else would have to be translated to UTF8 before being | |
| 45 * given to the parser itself. The BOM for UTF16 and the encoding | |
| 46 * declaration are looked at and a converter is looked for at that | |
| 47 * point. If not found the parser stops here as asked by the XML REC. A | |
| 48 * converter can be registered by the user using xmlRegisterCharEncodingHandler | |
| 49 * but the current form doesn't allow stateful transcoding (a serious | |
| 50 * problem agreed !). If iconv has been found it will be used | |
| 51 * automatically and allow stateful transcoding, the simplest is then | |
| 52 * to be sure to enable iconv and to provide iconv libs for the encoding | |
| 53 * support needed. | |
| 54 * | |
| 55 * Note that the generic "UTF-16" is not a predefined value. Instead, only | |
| 56 * the specific UTF-16LE and UTF-16BE are present. | |
| 57 */ | |
| 58 typedef enum { | |
| 59 XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ | |
| 60 XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ | |
| 61 XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ | |
| 62 XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ | |
| 63 XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ | |
| 64 XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ | |
| 65 XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ | |
| 66 XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ | |
| 67 XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ | |
| 68 XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ | |
| 69 XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ | |
| 70 XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ | |
| 71 XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ | |
| 72 XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ | |
| 73 XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ | |
| 74 XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ | |
| 75 XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ | |
| 76 XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ | |
| 77 XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ | |
| 78 XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ | |
| 79 XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ | |
| 80 XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ | |
| 81 XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ | |
| 82 XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ | |
| 83 } xmlCharEncoding; | |
| 84 | |
| 85 /** | |
| 86 * xmlCharEncodingInputFunc: | |
| 87 * @out: a pointer to an array of bytes to store the UTF-8 result | |
| 88 * @outlen: the length of @out | |
| 89 * @in: a pointer to an array of chars in the original encoding | |
| 90 * @inlen: the length of @in | |
| 91 * | |
| 92 * Take a block of chars in the original encoding and try to convert | |
| 93 * it to an UTF-8 block of chars out. | |
| 94 * | |
| 95 * Returns the number of bytes written, -1 if lack of space, or -2 | |
| 96 * if the transcoding failed. | |
| 97 * The value of @inlen after return is the number of octets consumed | |
| 98 * if the return value is positive, else unpredictiable. | |
| 99 * The value of @outlen after return is the number of octets consumed. | |
| 100 */ | |
| 101 typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen, | |
| 102 const unsigned char *in, int *inlen); | |
| 103 | |
| 104 | |
| 105 /** | |
| 106 * xmlCharEncodingOutputFunc: | |
| 107 * @out: a pointer to an array of bytes to store the result | |
| 108 * @outlen: the length of @out | |
| 109 * @in: a pointer to an array of UTF-8 chars | |
| 110 * @inlen: the length of @in | |
| 111 * | |
| 112 * Take a block of UTF-8 chars in and try to convert it to another | |
| 113 * encoding. | |
| 114 * Note: a first call designed to produce heading info is called with | |
| 115 * in = NULL. If stateful this should also initialize the encoder state. | |
| 116 * | |
| 117 * Returns the number of bytes written, -1 if lack of space, or -2 | |
| 118 * if the transcoding failed. | |
| 119 * The value of @inlen after return is the number of octets consumed | |
| 120 * if the return value is positive, else unpredictiable. | |
| 121 * The value of @outlen after return is the number of octets produced. | |
| 122 */ | |
| 123 typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, | |
| 124 const unsigned char *in, int *inlen); | |
| 125 | |
| 126 | |
| 127 /* | |
| 128 * Block defining the handlers for non UTF-8 encodings. | |
| 129 * If iconv is supported, there are two extra fields. | |
| 130 */ | |
| 131 #ifdef LIBXML_ICU_ENABLED | |
| 132 /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */ | |
| 133 #define ICU_PIVOT_BUF_SIZE 1024 | |
| 134 struct _uconv_t { | |
| 135 UConverter *uconv; /* for conversion between an encoding and UTF-16 */ | |
| 136 UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ | |
| 137 UChar pivot_buf[ICU_PIVOT_BUF_SIZE]; | |
| 138 UChar *pivot_source; | |
| 139 UChar *pivot_target; | |
| 140 }; | |
| 141 typedef struct _uconv_t uconv_t; | |
| 142 #endif | |
| 143 | |
| 144 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; | |
| 145 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; | |
| 146 struct _xmlCharEncodingHandler { | |
| 147 char *name; | |
| 148 xmlCharEncodingInputFunc input; | |
| 149 xmlCharEncodingOutputFunc output; | |
| 150 #ifdef LIBXML_ICONV_ENABLED | |
| 151 iconv_t iconv_in; | |
| 152 iconv_t iconv_out; | |
| 153 #endif /* LIBXML_ICONV_ENABLED */ | |
| 154 #ifdef LIBXML_ICU_ENABLED | |
| 155 uconv_t *uconv_in; | |
| 156 uconv_t *uconv_out; | |
| 157 #endif /* LIBXML_ICU_ENABLED */ | |
| 158 }; | |
| 159 | |
| 160 #ifdef __cplusplus | |
| 161 } | |
| 162 #endif | |
| 163 #include <libxml/tree.h> | |
| 164 #ifdef __cplusplus | |
| 165 extern "C" { | |
| 166 #endif | |
| 167 | |
| 168 /* | |
| 169 * Interfaces for encoding handlers. | |
| 170 */ | |
| 171 XMLPUBFUN void XMLCALL | |
| 172 xmlInitCharEncodingHandlers (void); | |
| 173 XMLPUBFUN void XMLCALL | |
| 174 xmlCleanupCharEncodingHandlers (void); | |
| 175 XMLPUBFUN void XMLCALL | |
| 176 xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); | |
| 177 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL | |
| 178 xmlGetCharEncodingHandler (xmlCharEncoding enc); | |
| 179 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL | |
| 180 xmlFindCharEncodingHandler (const char *name); | |
| 181 XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL | |
| 182 xmlNewCharEncodingHandler (const char *name, | |
| 183 xmlCharEncodingInputFunc input, | |
| 184 xmlCharEncodingOutputFunc output); | |
| 185 | |
| 186 /* | |
| 187 * Interfaces for encoding names and aliases. | |
| 188 */ | |
| 189 XMLPUBFUN int XMLCALL | |
| 190 xmlAddEncodingAlias (const char *name, | |
| 191 const char *alias); | |
| 192 XMLPUBFUN int XMLCALL | |
| 193 xmlDelEncodingAlias (const char *alias); | |
| 194 XMLPUBFUN const char * XMLCALL | |
| 195 xmlGetEncodingAlias (const char *alias); | |
| 196 XMLPUBFUN void XMLCALL | |
| 197 xmlCleanupEncodingAliases (void); | |
| 198 XMLPUBFUN xmlCharEncoding XMLCALL | |
| 199 xmlParseCharEncoding (const char *name); | |
| 200 XMLPUBFUN const char * XMLCALL | |
| 201 xmlGetCharEncodingName (xmlCharEncoding enc); | |
| 202 | |
| 203 /* | |
| 204 * Interfaces directly used by the parsers. | |
| 205 */ | |
| 206 XMLPUBFUN xmlCharEncoding XMLCALL | |
| 207 xmlDetectCharEncoding (const unsigned char *in, | |
| 208 int len); | |
| 209 | |
| 210 XMLPUBFUN int XMLCALL | |
| 211 xmlCharEncOutFunc (xmlCharEncodingHandler *handler, | |
| 212 xmlBufferPtr out, | |
| 213 xmlBufferPtr in); | |
| 214 | |
| 215 XMLPUBFUN int XMLCALL | |
| 216 xmlCharEncInFunc (xmlCharEncodingHandler *handler, | |
| 217 xmlBufferPtr out, | |
| 218 xmlBufferPtr in); | |
| 219 XMLPUBFUN int XMLCALL | |
| 220 xmlCharEncFirstLine (xmlCharEncodingHandler *handler, | |
| 221 xmlBufferPtr out, | |
| 222 xmlBufferPtr in); | |
| 223 XMLPUBFUN int XMLCALL | |
| 224 xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); | |
| 225 | |
| 226 /* | |
| 227 * Export a few useful functions | |
| 228 */ | |
| 229 #ifdef LIBXML_OUTPUT_ENABLED | |
| 230 XMLPUBFUN int XMLCALL | |
| 231 UTF8Toisolat1 (unsigned char *out, | |
| 232 int *outlen, | |
| 233 const unsigned char *in, | |
| 234 int *inlen); | |
| 235 #endif /* LIBXML_OUTPUT_ENABLED */ | |
| 236 XMLPUBFUN int XMLCALL | |
| 237 isolat1ToUTF8 (unsigned char *out, | |
| 238 int *outlen, | |
| 239 const unsigned char *in, | |
| 240 int *inlen); | |
| 241 #ifdef __cplusplus | |
| 242 } | |
| 243 #endif | |
| 244 | |
| 245 #endif /* __XML_CHAR_ENCODING_H__ */ |
