Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/lxml/includes/libxml/HTMLparser.h @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 /* | |
| 2 * Summary: interface for an HTML 4.0 non-verifying parser | |
| 3 * Description: this module implements an HTML 4.0 non-verifying parser | |
| 4 * with API compatible with the XML parser ones. It should | |
| 5 * be able to parse "real world" HTML, even if severely | |
| 6 * broken from a specification point of view. | |
| 7 * | |
| 8 * Copy: See Copyright for the status of this software. | |
| 9 * | |
| 10 * Author: Daniel Veillard | |
| 11 */ | |
| 12 | |
| 13 #ifndef __HTML_PARSER_H__ | |
| 14 #define __HTML_PARSER_H__ | |
| 15 #include <libxml/xmlversion.h> | |
| 16 #include <libxml/parser.h> | |
| 17 | |
| 18 #ifdef LIBXML_HTML_ENABLED | |
| 19 | |
| 20 #ifdef __cplusplus | |
| 21 extern "C" { | |
| 22 #endif | |
| 23 | |
| 24 /* | |
| 25 * Most of the back-end structures from XML and HTML are shared. | |
| 26 */ | |
| 27 typedef xmlParserCtxt htmlParserCtxt; | |
| 28 typedef xmlParserCtxtPtr htmlParserCtxtPtr; | |
| 29 typedef xmlParserNodeInfo htmlParserNodeInfo; | |
| 30 typedef xmlSAXHandler htmlSAXHandler; | |
| 31 typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; | |
| 32 typedef xmlParserInput htmlParserInput; | |
| 33 typedef xmlParserInputPtr htmlParserInputPtr; | |
| 34 typedef xmlDocPtr htmlDocPtr; | |
| 35 typedef xmlNodePtr htmlNodePtr; | |
| 36 | |
| 37 /* | |
| 38 * Internal description of an HTML element, representing HTML 4.01 | |
| 39 * and XHTML 1.0 (which share the same structure). | |
| 40 */ | |
| 41 typedef struct _htmlElemDesc htmlElemDesc; | |
| 42 typedef htmlElemDesc *htmlElemDescPtr; | |
| 43 struct _htmlElemDesc { | |
| 44 const char *name; /* The tag name */ | |
| 45 char startTag; /* Whether the start tag can be implied */ | |
| 46 char endTag; /* Whether the end tag can be implied */ | |
| 47 char saveEndTag; /* Whether the end tag should be saved */ | |
| 48 char empty; /* Is this an empty element ? */ | |
| 49 char depr; /* Is this a deprecated element ? */ | |
| 50 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ | |
| 51 char isinline; /* is this a block 0 or inline 1 element */ | |
| 52 const char *desc; /* the description */ | |
| 53 | |
| 54 /* NRK Jan.2003 | |
| 55 * New fields encapsulating HTML structure | |
| 56 * | |
| 57 * Bugs: | |
| 58 * This is a very limited representation. It fails to tell us when | |
| 59 * an element *requires* subelements (we only have whether they're | |
| 60 * allowed or not), and it doesn't tell us where CDATA and PCDATA | |
| 61 * are allowed. Some element relationships are not fully represented: | |
| 62 * these are flagged with the word MODIFIER | |
| 63 */ | |
| 64 const char** subelts; /* allowed sub-elements of this element */ | |
| 65 const char* defaultsubelt; /* subelement for suggested auto-repair | |
| 66 if necessary or NULL */ | |
| 67 const char** attrs_opt; /* Optional Attributes */ | |
| 68 const char** attrs_depr; /* Additional deprecated attributes */ | |
| 69 const char** attrs_req; /* Required attributes */ | |
| 70 }; | |
| 71 | |
| 72 /* | |
| 73 * Internal description of an HTML entity. | |
| 74 */ | |
| 75 typedef struct _htmlEntityDesc htmlEntityDesc; | |
| 76 typedef htmlEntityDesc *htmlEntityDescPtr; | |
| 77 struct _htmlEntityDesc { | |
| 78 unsigned int value; /* the UNICODE value for the character */ | |
| 79 const char *name; /* The entity name */ | |
| 80 const char *desc; /* the description */ | |
| 81 }; | |
| 82 | |
| 83 /* | |
| 84 * There is only few public functions. | |
| 85 */ | |
| 86 XMLPUBFUN const htmlElemDesc * XMLCALL | |
| 87 htmlTagLookup (const xmlChar *tag); | |
| 88 XMLPUBFUN const htmlEntityDesc * XMLCALL | |
| 89 htmlEntityLookup(const xmlChar *name); | |
| 90 XMLPUBFUN const htmlEntityDesc * XMLCALL | |
| 91 htmlEntityValueLookup(unsigned int value); | |
| 92 | |
| 93 XMLPUBFUN int XMLCALL | |
| 94 htmlIsAutoClosed(htmlDocPtr doc, | |
| 95 htmlNodePtr elem); | |
| 96 XMLPUBFUN int XMLCALL | |
| 97 htmlAutoCloseTag(htmlDocPtr doc, | |
| 98 const xmlChar *name, | |
| 99 htmlNodePtr elem); | |
| 100 XMLPUBFUN const htmlEntityDesc * XMLCALL | |
| 101 htmlParseEntityRef(htmlParserCtxtPtr ctxt, | |
| 102 const xmlChar **str); | |
| 103 XMLPUBFUN int XMLCALL | |
| 104 htmlParseCharRef(htmlParserCtxtPtr ctxt); | |
| 105 XMLPUBFUN void XMLCALL | |
| 106 htmlParseElement(htmlParserCtxtPtr ctxt); | |
| 107 | |
| 108 XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
| 109 htmlNewParserCtxt(void); | |
| 110 | |
| 111 XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
| 112 htmlCreateMemoryParserCtxt(const char *buffer, | |
| 113 int size); | |
| 114 | |
| 115 XMLPUBFUN int XMLCALL | |
| 116 htmlParseDocument(htmlParserCtxtPtr ctxt); | |
| 117 XMLPUBFUN htmlDocPtr XMLCALL | |
| 118 htmlSAXParseDoc (const xmlChar *cur, | |
| 119 const char *encoding, | |
| 120 htmlSAXHandlerPtr sax, | |
| 121 void *userData); | |
| 122 XMLPUBFUN htmlDocPtr XMLCALL | |
| 123 htmlParseDoc (const xmlChar *cur, | |
| 124 const char *encoding); | |
| 125 XMLPUBFUN htmlDocPtr XMLCALL | |
| 126 htmlSAXParseFile(const char *filename, | |
| 127 const char *encoding, | |
| 128 htmlSAXHandlerPtr sax, | |
| 129 void *userData); | |
| 130 XMLPUBFUN htmlDocPtr XMLCALL | |
| 131 htmlParseFile (const char *filename, | |
| 132 const char *encoding); | |
| 133 XMLPUBFUN int XMLCALL | |
| 134 UTF8ToHtml (unsigned char *out, | |
| 135 int *outlen, | |
| 136 const unsigned char *in, | |
| 137 int *inlen); | |
| 138 XMLPUBFUN int XMLCALL | |
| 139 htmlEncodeEntities(unsigned char *out, | |
| 140 int *outlen, | |
| 141 const unsigned char *in, | |
| 142 int *inlen, int quoteChar); | |
| 143 XMLPUBFUN int XMLCALL | |
| 144 htmlIsScriptAttribute(const xmlChar *name); | |
| 145 XMLPUBFUN int XMLCALL | |
| 146 htmlHandleOmittedElem(int val); | |
| 147 | |
| 148 #ifdef LIBXML_PUSH_ENABLED | |
| 149 /** | |
| 150 * Interfaces for the Push mode. | |
| 151 */ | |
| 152 XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
| 153 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, | |
| 154 void *user_data, | |
| 155 const char *chunk, | |
| 156 int size, | |
| 157 const char *filename, | |
| 158 xmlCharEncoding enc); | |
| 159 XMLPUBFUN int XMLCALL | |
| 160 htmlParseChunk (htmlParserCtxtPtr ctxt, | |
| 161 const char *chunk, | |
| 162 int size, | |
| 163 int terminate); | |
| 164 #endif /* LIBXML_PUSH_ENABLED */ | |
| 165 | |
| 166 XMLPUBFUN void XMLCALL | |
| 167 htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); | |
| 168 | |
| 169 /* | |
| 170 * New set of simpler/more flexible APIs | |
| 171 */ | |
| 172 /** | |
| 173 * xmlParserOption: | |
| 174 * | |
| 175 * This is the set of XML parser options that can be passed down | |
| 176 * to the xmlReadDoc() and similar calls. | |
| 177 */ | |
| 178 typedef enum { | |
| 179 HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */ | |
| 180 HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */ | |
| 181 HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ | |
| 182 HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ | |
| 183 HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ | |
| 184 HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ | |
| 185 HTML_PARSE_NONET = 1<<11,/* Forbid network access */ | |
| 186 HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */ | |
| 187 HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */ | |
| 188 HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */ | |
| 189 } htmlParserOption; | |
| 190 | |
| 191 XMLPUBFUN void XMLCALL | |
| 192 htmlCtxtReset (htmlParserCtxtPtr ctxt); | |
| 193 XMLPUBFUN int XMLCALL | |
| 194 htmlCtxtUseOptions (htmlParserCtxtPtr ctxt, | |
| 195 int options); | |
| 196 XMLPUBFUN htmlDocPtr XMLCALL | |
| 197 htmlReadDoc (const xmlChar *cur, | |
| 198 const char *URL, | |
| 199 const char *encoding, | |
| 200 int options); | |
| 201 XMLPUBFUN htmlDocPtr XMLCALL | |
| 202 htmlReadFile (const char *URL, | |
| 203 const char *encoding, | |
| 204 int options); | |
| 205 XMLPUBFUN htmlDocPtr XMLCALL | |
| 206 htmlReadMemory (const char *buffer, | |
| 207 int size, | |
| 208 const char *URL, | |
| 209 const char *encoding, | |
| 210 int options); | |
| 211 XMLPUBFUN htmlDocPtr XMLCALL | |
| 212 htmlReadFd (int fd, | |
| 213 const char *URL, | |
| 214 const char *encoding, | |
| 215 int options); | |
| 216 XMLPUBFUN htmlDocPtr XMLCALL | |
| 217 htmlReadIO (xmlInputReadCallback ioread, | |
| 218 xmlInputCloseCallback ioclose, | |
| 219 void *ioctx, | |
| 220 const char *URL, | |
| 221 const char *encoding, | |
| 222 int options); | |
| 223 XMLPUBFUN htmlDocPtr XMLCALL | |
| 224 htmlCtxtReadDoc (xmlParserCtxtPtr ctxt, | |
| 225 const xmlChar *cur, | |
| 226 const char *URL, | |
| 227 const char *encoding, | |
| 228 int options); | |
| 229 XMLPUBFUN htmlDocPtr XMLCALL | |
| 230 htmlCtxtReadFile (xmlParserCtxtPtr ctxt, | |
| 231 const char *filename, | |
| 232 const char *encoding, | |
| 233 int options); | |
| 234 XMLPUBFUN htmlDocPtr XMLCALL | |
| 235 htmlCtxtReadMemory (xmlParserCtxtPtr ctxt, | |
| 236 const char *buffer, | |
| 237 int size, | |
| 238 const char *URL, | |
| 239 const char *encoding, | |
| 240 int options); | |
| 241 XMLPUBFUN htmlDocPtr XMLCALL | |
| 242 htmlCtxtReadFd (xmlParserCtxtPtr ctxt, | |
| 243 int fd, | |
| 244 const char *URL, | |
| 245 const char *encoding, | |
| 246 int options); | |
| 247 XMLPUBFUN htmlDocPtr XMLCALL | |
| 248 htmlCtxtReadIO (xmlParserCtxtPtr ctxt, | |
| 249 xmlInputReadCallback ioread, | |
| 250 xmlInputCloseCallback ioclose, | |
| 251 void *ioctx, | |
| 252 const char *URL, | |
| 253 const char *encoding, | |
| 254 int options); | |
| 255 | |
| 256 /* NRK/Jan2003: further knowledge of HTML structure | |
| 257 */ | |
| 258 typedef enum { | |
| 259 HTML_NA = 0 , /* something we don't check at all */ | |
| 260 HTML_INVALID = 0x1 , | |
| 261 HTML_DEPRECATED = 0x2 , | |
| 262 HTML_VALID = 0x4 , | |
| 263 HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */ | |
| 264 } htmlStatus ; | |
| 265 | |
| 266 /* Using htmlElemDesc rather than name here, to emphasise the fact | |
| 267 that otherwise there's a lookup overhead | |
| 268 */ | |
| 269 XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ; | |
| 270 XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ; | |
| 271 XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ; | |
| 272 XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ; | |
| 273 /** | |
| 274 * htmlDefaultSubelement: | |
| 275 * @elt: HTML element | |
| 276 * | |
| 277 * Returns the default subelement for this element | |
| 278 */ | |
| 279 #define htmlDefaultSubelement(elt) elt->defaultsubelt | |
| 280 /** | |
| 281 * htmlElementAllowedHereDesc: | |
| 282 * @parent: HTML parent element | |
| 283 * @elt: HTML element | |
| 284 * | |
| 285 * Checks whether an HTML element description may be a | |
| 286 * direct child of the specified element. | |
| 287 * | |
| 288 * Returns 1 if allowed; 0 otherwise. | |
| 289 */ | |
| 290 #define htmlElementAllowedHereDesc(parent,elt) \ | |
| 291 htmlElementAllowedHere((parent), (elt)->name) | |
| 292 /** | |
| 293 * htmlRequiredAttrs: | |
| 294 * @elt: HTML element | |
| 295 * | |
| 296 * Returns the attributes required for the specified element. | |
| 297 */ | |
| 298 #define htmlRequiredAttrs(elt) (elt)->attrs_req | |
| 299 | |
| 300 | |
| 301 #ifdef __cplusplus | |
| 302 } | |
| 303 #endif | |
| 304 | |
| 305 #endif /* LIBXML_HTML_ENABLED */ | |
| 306 #endif /* __HTML_PARSER_H__ */ |
