Mercurial > repos > pfrommolt > ngsrich
comparison NGSrich_0.5.5/src/org/jdom/output/Format.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:89ad0a9cca52 |
---|---|
1 /*-- | |
2 | |
3 $Id: Format.java,v 1.14 2009/07/23 05:54:23 jhunter Exp $ | |
4 | |
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin. | |
6 All rights reserved. | |
7 | |
8 Redistribution and use in source and binary forms, with or without | |
9 modification, are permitted provided that the following conditions | |
10 are met: | |
11 | |
12 1. Redistributions of source code must retain the above copyright | |
13 notice, this list of conditions, and the following disclaimer. | |
14 | |
15 2. Redistributions in binary form must reproduce the above copyright | |
16 notice, this list of conditions, and the disclaimer that follows | |
17 these conditions in the documentation and/or other materials | |
18 provided with the distribution. | |
19 | |
20 3. The name "JDOM" must not be used to endorse or promote products | |
21 derived from this software without prior written permission. For | |
22 written permission, please contact <request_AT_jdom_DOT_org>. | |
23 | |
24 4. Products derived from this software may not be called "JDOM", nor | |
25 may "JDOM" appear in their name, without prior written permission | |
26 from the JDOM Project Management <request_AT_jdom_DOT_org>. | |
27 | |
28 In addition, we request (but do not require) that you include in the | |
29 end-user documentation provided with the redistribution and/or in the | |
30 software itself an acknowledgement equivalent to the following: | |
31 "This product includes software developed by the | |
32 JDOM Project (http://www.jdom.org/)." | |
33 Alternatively, the acknowledgment may be graphical using the logos | |
34 available at http://www.jdom.org/images/logos. | |
35 | |
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT | |
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
47 SUCH DAMAGE. | |
48 | |
49 This software consists of voluntary contributions made by many | |
50 individuals on behalf of the JDOM Project and was originally | |
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and | |
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information | |
53 on the JDOM Project, please see <http://www.jdom.org/>. | |
54 | |
55 */ | |
56 | |
57 package org.jdom.output; | |
58 | |
59 import java.lang.reflect.Method; | |
60 import org.jdom.Verifier; | |
61 | |
62 /** | |
63 * Class to encapsulate XMLOutputter format options. | |
64 * Typical users can use the standard format configurations obtained by | |
65 * {@link #getRawFormat} (no whitespace changes), | |
66 * {@link #getPrettyFormat} (whitespace beautification), and | |
67 * {@link #getCompactFormat} (whitespace normalization). | |
68 * <p> | |
69 * Several modes are available to effect the way textual content is printed. | |
70 * See the documentation for {@link TextMode} for details. | |
71 * | |
72 * @version $Revision: 1.14 $, $Date: 2009/07/23 05:54:23 $ | |
73 * @author Jason Hunter | |
74 */ | |
75 public class Format implements Cloneable { | |
76 | |
77 private static final String CVS_ID = | |
78 "@(#) $RCSfile: Format.java,v $ $Revision: 1.14 $ $Date: 2009/07/23 05:54:23 $ $Name: jdom_1_1_1 $"; | |
79 | |
80 /** | |
81 * Returns a new Format object that performs no whitespace changes, uses | |
82 * the UTF-8 encoding, doesn't expand empty elements, includes the | |
83 * declaration and encoding, and uses the default entity escape strategy. | |
84 * Tweaks can be made to the returned Format instance without affecting | |
85 * other instances. | |
86 | |
87 * @return a Format with no whitespace changes | |
88 */ | |
89 public static Format getRawFormat() { | |
90 return new Format(); | |
91 } | |
92 | |
93 /** | |
94 * Returns a new Format object that performs whitespace beautification with | |
95 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements, | |
96 * includes the declaration and encoding, and uses the default entity | |
97 * escape strategy. | |
98 * Tweaks can be made to the returned Format instance without affecting | |
99 * other instances. | |
100 * | |
101 * @return a Format with whitespace beautification | |
102 */ | |
103 public static Format getPrettyFormat() { | |
104 Format f = new Format(); | |
105 f.setIndent(STANDARD_INDENT); | |
106 f.setTextMode(TextMode.TRIM); | |
107 return f; | |
108 } | |
109 | |
110 /** | |
111 * Returns a new Format object that performs whitespace normalization, uses | |
112 * the UTF-8 encoding, doesn't expand empty elements, includes the | |
113 * declaration and encoding, and uses the default entity escape strategy. | |
114 * Tweaks can be made to the returned Format instance without affecting | |
115 * other instances. | |
116 * | |
117 * @return a Format with whitespace normalization | |
118 */ | |
119 public static Format getCompactFormat() { | |
120 Format f = new Format(); | |
121 f.setTextMode(TextMode.NORMALIZE); | |
122 return f; | |
123 } | |
124 | |
125 /** standard value to indent by, if we are indenting */ | |
126 private static final String STANDARD_INDENT = " "; | |
127 | |
128 /** standard string with which to end a line */ | |
129 private static final String STANDARD_LINE_SEPARATOR = "\r\n"; | |
130 | |
131 /** standard encoding */ | |
132 private static final String STANDARD_ENCODING = "UTF-8"; | |
133 | |
134 | |
135 /** The default indent is no spaces (as original document) */ | |
136 String indent = null; | |
137 | |
138 /** New line separator */ | |
139 String lineSeparator = STANDARD_LINE_SEPARATOR; | |
140 | |
141 /** The encoding format */ | |
142 String encoding = STANDARD_ENCODING; | |
143 | |
144 /** Whether or not to output the XML declaration | |
145 * - default is <code>false</code> */ | |
146 boolean omitDeclaration = false; | |
147 | |
148 /** Whether or not to output the encoding in the XML declaration | |
149 * - default is <code>false</code> */ | |
150 boolean omitEncoding = false; | |
151 | |
152 /** Whether or not to expand empty elements to | |
153 * <tagName></tagName> - default is <code>false</code> */ | |
154 boolean expandEmptyElements = false; | |
155 | |
156 /** Whether TrAX output escaping disabling/enabling PIs are ignored | |
157 * or processed - default is <code>false</code> */ | |
158 boolean ignoreTrAXEscapingPIs = false; | |
159 | |
160 /** text handling mode */ | |
161 TextMode mode = TextMode.PRESERVE; | |
162 | |
163 /** entity escape logic */ | |
164 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding); | |
165 | |
166 /** | |
167 * Creates a new Format instance with default (raw) behavior. | |
168 */ | |
169 private Format() { } | |
170 | |
171 /** | |
172 * Sets the {@link EscapeStrategy} to use for character escaping. | |
173 * | |
174 * @param strategy the EscapeStrategy to use | |
175 * @return a pointer to this Format for chaining | |
176 */ | |
177 public Format setEscapeStrategy(EscapeStrategy strategy) { | |
178 escapeStrategy = strategy; | |
179 return this; | |
180 } | |
181 | |
182 /** | |
183 * Returns the current escape strategy | |
184 * | |
185 * @return the current escape strategy | |
186 */ | |
187 public EscapeStrategy getEscapeStrategy() { | |
188 return escapeStrategy; | |
189 } | |
190 | |
191 /** | |
192 * This will set the newline separator (<code>lineSeparator</code>). | |
193 * The default is <code>\r\n</code>. To make it output | |
194 * the system default line ending string, call | |
195 * <code>setLineSeparator(System.getProperty("line.separator"))</code>. | |
196 * | |
197 * <p> | |
198 * To output "UNIX-style" documents, call | |
199 * <code>setLineSeparator("\n")</code>. To output "Mac-style" | |
200 * documents, call <code>setLineSeparator("\r")</code>. DOS-style | |
201 * documents use CR-LF ("\r\n"), which is the default. | |
202 * </p> | |
203 * | |
204 * <p> | |
205 * Note that this only applies to newlines generated by the | |
206 * outputter. If you parse an XML document that contains newlines | |
207 * embedded inside a text node, and you do not set TextMode.NORMALIZE, | |
208 * then the newlines will be output | |
209 * verbatim, as "\n" which is how parsers normalize them. | |
210 * </p> | |
211 * | |
212 * <p> | |
213 * If the format's "indent" property is null (as is the default | |
214 * for the Raw and Compact formats), then this value only effects the | |
215 * newlines written after the declaration and doctype. | |
216 * </p> | |
217 * | |
218 * @see #setTextMode | |
219 * | |
220 * @param separator <code>String</code> line separator to use. | |
221 * @return a pointer to this Format for chaining | |
222 */ | |
223 public Format setLineSeparator(String separator) { | |
224 this.lineSeparator = separator; | |
225 return this; | |
226 } | |
227 | |
228 /** | |
229 * Returns the current line separator. | |
230 * | |
231 * @return the current line separator | |
232 */ | |
233 public String getLineSeparator() { | |
234 return lineSeparator; | |
235 } | |
236 | |
237 /** | |
238 * This will set whether the XML declaration | |
239 * (<code><?xml version="1.0" | |
240 * encoding="UTF-8"?></code>) | |
241 * includes the encoding of the document. It is common to omit | |
242 * this in uses such as WML and other wireless device protocols. | |
243 * | |
244 * @param omitEncoding <code>boolean</code> indicating whether or not | |
245 * the XML declaration should indicate the document encoding. | |
246 * @return a pointer to this Format for chaining | |
247 */ | |
248 public Format setOmitEncoding(boolean omitEncoding) { | |
249 this.omitEncoding = omitEncoding; | |
250 return this; | |
251 } | |
252 | |
253 /** | |
254 * Returns whether the XML declaration encoding will be omitted. | |
255 * | |
256 * @return whether the XML declaration encoding will be omitted | |
257 */ | |
258 public boolean getOmitEncoding() { | |
259 return omitEncoding; | |
260 } | |
261 | |
262 /** | |
263 * This will set whether the XML declaration | |
264 * (<code><?xml version="1.0"?gt;</code>) | |
265 * will be omitted or not. It is common to omit this in uses such | |
266 * as SOAP and XML-RPC calls. | |
267 * | |
268 * @param omitDeclaration <code>boolean</code> indicating whether or not | |
269 * the XML declaration should be omitted. | |
270 * @return a pointer to this Format for chaining | |
271 */ | |
272 public Format setOmitDeclaration(boolean omitDeclaration) { | |
273 this.omitDeclaration = omitDeclaration; | |
274 return this; | |
275 } | |
276 | |
277 /** | |
278 * Returns whether the XML declaration will be omitted. | |
279 * | |
280 * @return whether the XML declaration will be omitted | |
281 */ | |
282 public boolean getOmitDeclaration() { | |
283 return omitDeclaration; | |
284 } | |
285 | |
286 /** | |
287 * This will set whether empty elements are expanded from | |
288 * <code><tagName/></code> to | |
289 * <code><tagName></tagName></code>. | |
290 * | |
291 * @param expandEmptyElements <code>boolean</code> indicating whether or not | |
292 * empty elements should be expanded. | |
293 * @return a pointer to this Format for chaining | |
294 */ | |
295 public Format setExpandEmptyElements(boolean expandEmptyElements) { | |
296 this.expandEmptyElements = expandEmptyElements; | |
297 return this; | |
298 } | |
299 | |
300 /** | |
301 * Returns whether empty elements are expanded. | |
302 * | |
303 * @return whether empty elements are expanded | |
304 */ | |
305 public boolean getExpandEmptyElements() { | |
306 return expandEmptyElements; | |
307 } | |
308 | |
309 /** | |
310 * This will set whether JAXP TrAX processing instructions for | |
311 * disabling/enabling output escaping are ignored. Disabling | |
312 * output escaping allows using XML text as element content and | |
313 * outputing it verbatim, i.e. as element children would be. | |
314 * <p> | |
315 * When processed, these processing instructions are removed from | |
316 * the generated XML text and control whether the element text | |
317 * content is output verbatim or with escaping of the pre-defined | |
318 * entities in XML 1.0. The text to be output verbatim shall be | |
319 * surrounded by the | |
320 * <code><?javax.xml.transform.disable-output-escaping ?></code> | |
321 * and <code><?javax.xml.transform.enable-output-escaping ?></code> | |
322 * PIs.</p> | |
323 * <p> | |
324 * When ignored, the processing instructions are present in the | |
325 * generated XML text and the pre-defined entities in XML 1.0 are | |
326 * escaped. | |
327 * <p> | |
328 * Default: <code>false</code>.</p> | |
329 * | |
330 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating | |
331 * whether or not TrAX ouput escaping PIs are ignored. | |
332 * | |
333 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING | |
334 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING | |
335 */ | |
336 public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) { | |
337 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs; | |
338 } | |
339 | |
340 /** | |
341 * Returns whether JAXP TrAX processing instructions for | |
342 * disabling/enabling output escaping are ignored. | |
343 * | |
344 * @return whether or not TrAX ouput escaping PIs are ignored. | |
345 */ | |
346 public boolean getIgnoreTrAXEscapingPIs() { | |
347 return ignoreTrAXEscapingPIs; | |
348 } | |
349 | |
350 /** | |
351 * This sets the text output style. Options are available as static | |
352 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}. | |
353 * | |
354 * @return a pointer to this Format for chaining | |
355 */ | |
356 public Format setTextMode(Format.TextMode mode) { | |
357 this.mode = mode; | |
358 return this; | |
359 } | |
360 | |
361 /** | |
362 * Returns the current text output style. | |
363 * | |
364 * @return the current text output style | |
365 */ | |
366 public Format.TextMode getTextMode() { | |
367 return mode; | |
368 } | |
369 | |
370 /** | |
371 * This will set the indent <code>String</code> to use; this | |
372 * is usually a <code>String</code> of empty spaces. If you pass | |
373 * the empty string (""), then no indentation will happen but newlines | |
374 * will still be generated. Passing null will result in no indentation | |
375 * and no newlines generated. Default: none (null) | |
376 * | |
377 * @param indent <code>String</code> to use for indentation. | |
378 * @return a pointer to this Format for chaining | |
379 */ | |
380 public Format setIndent(String indent) { | |
381 this.indent = indent; | |
382 return this; | |
383 } | |
384 | |
385 /** | |
386 * Returns the indent string in use. | |
387 * | |
388 * @return the indent string in use | |
389 */ | |
390 public String getIndent() { | |
391 return indent; | |
392 } | |
393 | |
394 /** | |
395 * Sets the output encoding. The name should be an accepted XML | |
396 * encoding. | |
397 * | |
398 * @param encoding the encoding format. Use XML-style names like | |
399 * "UTF-8" or "ISO-8859-1" or "US-ASCII" | |
400 * @return a pointer to this Format for chaining | |
401 */ | |
402 public Format setEncoding(String encoding) { | |
403 this.encoding = encoding; | |
404 escapeStrategy = new DefaultEscapeStrategy(encoding); | |
405 return this; | |
406 } | |
407 | |
408 /** | |
409 * Returns the configured output encoding. | |
410 * | |
411 * @return the output encoding | |
412 */ | |
413 public String getEncoding() { | |
414 return encoding; | |
415 } | |
416 | |
417 protected Object clone() { | |
418 Format format = null; | |
419 | |
420 try { | |
421 format = (Format) super.clone(); | |
422 } | |
423 catch (CloneNotSupportedException ce) { | |
424 } | |
425 | |
426 return format; | |
427 } | |
428 | |
429 | |
430 /** | |
431 * Handle common charsets quickly and easily. Use reflection | |
432 * to query the JDK 1.4 CharsetEncoder class for unknown charsets. | |
433 * If JDK 1.4 isn't around, default to no special encoding. | |
434 */ | |
435 class DefaultEscapeStrategy implements EscapeStrategy { | |
436 private int bits; | |
437 Object encoder; | |
438 Method canEncode; | |
439 | |
440 public DefaultEscapeStrategy(String encoding) { | |
441 if ("UTF-8".equalsIgnoreCase(encoding) || | |
442 "UTF-16".equalsIgnoreCase(encoding)) { | |
443 bits = 16; | |
444 } | |
445 else if ("ISO-8859-1".equalsIgnoreCase(encoding) || | |
446 "Latin1".equalsIgnoreCase(encoding)) { | |
447 bits = 8; | |
448 } | |
449 else if ("US-ASCII".equalsIgnoreCase(encoding) || | |
450 "ASCII".equalsIgnoreCase(encoding)) { | |
451 bits = 7; | |
452 } | |
453 else { | |
454 bits = 0; | |
455 //encoder = Charset.forName(encoding).newEncoder(); | |
456 try { | |
457 Class charsetClass = Class.forName("java.nio.charset.Charset"); | |
458 Class encoderClass = Class.forName("java.nio.charset.CharsetEncoder"); | |
459 Method forName = charsetClass.getMethod("forName", new Class[]{String.class}); | |
460 Object charsetObj = forName.invoke(null, new Object[]{encoding}); | |
461 Method newEncoder = charsetClass.getMethod("newEncoder", null); | |
462 encoder = newEncoder.invoke(charsetObj, null); | |
463 canEncode = encoderClass.getMethod("canEncode", new Class[]{char.class}); | |
464 } | |
465 catch (Exception ignored) { | |
466 } | |
467 } | |
468 } | |
469 | |
470 public boolean shouldEscape(char ch) { | |
471 if (bits == 16) { | |
472 if (Verifier.isHighSurrogate(ch)) | |
473 return true; // Safer this way per http://unicode.org/faq/utf_bom.html#utf8-4 | |
474 else | |
475 return false; | |
476 } | |
477 if (bits == 8) { | |
478 if ((int) ch > 255) | |
479 return true; | |
480 else | |
481 return false; | |
482 } | |
483 if (bits == 7) { | |
484 if ((int) ch > 127) | |
485 return true; | |
486 else | |
487 return false; | |
488 } | |
489 else { | |
490 if (Verifier.isHighSurrogate(ch)) | |
491 return true; // Safer this way per http://unicode.org/faq/utf_bom.html#utf8-4 | |
492 | |
493 if (canEncode != null && encoder != null) { | |
494 try { | |
495 Boolean val = (Boolean) canEncode.invoke(encoder, new Object[]{new Character(ch)}); | |
496 return !val.booleanValue(); | |
497 } | |
498 catch (Exception ignored) { | |
499 } | |
500 } | |
501 // Return false if we don't know. This risks not escaping | |
502 // things which should be escaped, but also means people won't | |
503 // start getting loads of unnecessary escapes. | |
504 return false; | |
505 } | |
506 } | |
507 } | |
508 | |
509 | |
510 /** | |
511 * Class to signify how text should be handled on output. The following | |
512 * table provides details. | |
513 * | |
514 * <table> | |
515 * <tr> | |
516 * <th align="left"> | |
517 * Text Mode | |
518 * </th> | |
519 * <th> | |
520 * Resulting behavior. | |
521 * </th> | |
522 * </tr> | |
523 * | |
524 * <tr valign="top"> | |
525 * <td> | |
526 * <i>PRESERVE (Default)</i> | |
527 * </td> | |
528 * <td> | |
529 * All content is printed in the format it was created, no whitespace | |
530 * or line separators are are added or removed. | |
531 * </td> | |
532 * </tr> | |
533 * | |
534 * <tr valign="top"> | |
535 * <td> | |
536 * TRIM_FULL_WHITE | |
537 * </td> | |
538 * <td> | |
539 * Content between tags consisting of all whitespace is not printed. | |
540 * If the content contains even one non-whitespace character, it is | |
541 * printed verbatim, whitespace and all. | |
542 * </td> | |
543 * </tr> | |
544 * | |
545 * <tr valign="top"> | |
546 * <td> | |
547 * TRIM | |
548 * </td> | |
549 * <td> | |
550 * Same as TrimAllWhite, plus leading/trailing whitespace are | |
551 * trimmed. | |
552 * </td> | |
553 * </tr> | |
554 * | |
555 * <tr valign="top"> | |
556 * <td> | |
557 * NORMALIZE | |
558 * </td> | |
559 * <td> | |
560 * Same as TextTrim, plus addition interior whitespace is compressed | |
561 * to a single space. | |
562 * </td> | |
563 * </tr> | |
564 * </table> | |
565 * | |
566 * In most cases textual content is aligned with the surrounding tags | |
567 * (after the appropriate text mode is applied). In the case where the only | |
568 * content between the start and end tags is textual, the start tag, text, | |
569 * and end tag are all printed on the same line. If the document being | |
570 * output already has whitespace, it's wise to turn on TRIM mode so the | |
571 * pre-existing whitespace can be trimmed before adding new whitespace. | |
572 * <p> | |
573 * When a element has a xml:space attribute with the value of "preserve", | |
574 * all formating is turned off and reverts back to the default until the | |
575 * element and its contents have been printed. If a nested element contains | |
576 * another xml:space with the value "default" formatting is turned back on | |
577 * for the child element and then off for the remainder of the parent | |
578 * element. | |
579 */ | |
580 public static class TextMode { | |
581 /** | |
582 * Mode for literal text preservation. | |
583 */ | |
584 public static final TextMode PRESERVE = new TextMode("PRESERVE"); | |
585 | |
586 /** | |
587 * Mode for text trimming (left and right trim). | |
588 */ | |
589 public static final TextMode TRIM = new TextMode("TRIM"); | |
590 | |
591 /** | |
592 * Mode for text normalization (left and right trim plus internal | |
593 * whitespace is normalized to a single space. | |
594 * @see org.jdom.Element#getTextNormalize | |
595 */ | |
596 public static final TextMode NORMALIZE = new TextMode("NORMALIZE"); | |
597 | |
598 /** | |
599 * Mode for text trimming of content consisting of nothing but | |
600 * whitespace but otherwise not changing output. | |
601 */ | |
602 public static final TextMode TRIM_FULL_WHITE = | |
603 new TextMode("TRIM_FULL_WHITE"); | |
604 | |
605 private final String name; | |
606 | |
607 private TextMode(String name) { | |
608 this.name = name; | |
609 } | |
610 | |
611 public String toString() { | |
612 return name; | |
613 } | |
614 } | |
615 } |