comparison NGSrich_0.5.5/src/org/jdom/output/Format.java @ 0:89ad0a9cca52 default tip

Uploaded
author pfrommolt
date Mon, 21 Nov 2011 08:12:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89ad0a9cca52
1 /*--
2
3 $Id: Format.java,v 1.14 2009/07/23 05:54:23 jhunter Exp $
4
5 Copyright (C) 2000-2007 Jason Hunter & Brett McLaughlin.
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 1. Redistributions of source code must retain the above copyright
13 notice, this list of conditions, and the following disclaimer.
14
15 2. Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions, and the disclaimer that follows
17 these conditions in the documentation and/or other materials
18 provided with the distribution.
19
20 3. The name "JDOM" must not be used to endorse or promote products
21 derived from this software without prior written permission. For
22 written permission, please contact <request_AT_jdom_DOT_org>.
23
24 4. Products derived from this software may not be called "JDOM", nor
25 may "JDOM" appear in their name, without prior written permission
26 from the JDOM Project Management <request_AT_jdom_DOT_org>.
27
28 In addition, we request (but do not require) that you include in the
29 end-user documentation provided with the redistribution and/or in the
30 software itself an acknowledgement equivalent to the following:
31 "This product includes software developed by the
32 JDOM Project (http://www.jdom.org/)."
33 Alternatively, the acknowledgment may be graphical using the logos
34 available at http://www.jdom.org/images/logos.
35
36 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 SUCH DAMAGE.
48
49 This software consists of voluntary contributions made by many
50 individuals on behalf of the JDOM Project and was originally
51 created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52 Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
53 on the JDOM Project, please see <http://www.jdom.org/>.
54
55 */
56
57 package org.jdom.output;
58
59 import java.lang.reflect.Method;
60 import org.jdom.Verifier;
61
62 /**
63 * Class to encapsulate XMLOutputter format options.
64 * Typical users can use the standard format configurations obtained by
65 * {@link #getRawFormat} (no whitespace changes),
66 * {@link #getPrettyFormat} (whitespace beautification), and
67 * {@link #getCompactFormat} (whitespace normalization).
68 * <p>
69 * Several modes are available to effect the way textual content is printed.
70 * See the documentation for {@link TextMode} for details.
71 *
72 * @version $Revision: 1.14 $, $Date: 2009/07/23 05:54:23 $
73 * @author Jason Hunter
74 */
75 public class Format implements Cloneable {
76
77 private static final String CVS_ID =
78 "@(#) $RCSfile: Format.java,v $ $Revision: 1.14 $ $Date: 2009/07/23 05:54:23 $ $Name: jdom_1_1_1 $";
79
80 /**
81 * Returns a new Format object that performs no whitespace changes, uses
82 * the UTF-8 encoding, doesn't expand empty elements, includes the
83 * declaration and encoding, and uses the default entity escape strategy.
84 * Tweaks can be made to the returned Format instance without affecting
85 * other instances.
86
87 * @return a Format with no whitespace changes
88 */
89 public static Format getRawFormat() {
90 return new Format();
91 }
92
93 /**
94 * Returns a new Format object that performs whitespace beautification with
95 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
96 * includes the declaration and encoding, and uses the default entity
97 * escape strategy.
98 * Tweaks can be made to the returned Format instance without affecting
99 * other instances.
100 *
101 * @return a Format with whitespace beautification
102 */
103 public static Format getPrettyFormat() {
104 Format f = new Format();
105 f.setIndent(STANDARD_INDENT);
106 f.setTextMode(TextMode.TRIM);
107 return f;
108 }
109
110 /**
111 * Returns a new Format object that performs whitespace normalization, uses
112 * the UTF-8 encoding, doesn't expand empty elements, includes the
113 * declaration and encoding, and uses the default entity escape strategy.
114 * Tweaks can be made to the returned Format instance without affecting
115 * other instances.
116 *
117 * @return a Format with whitespace normalization
118 */
119 public static Format getCompactFormat() {
120 Format f = new Format();
121 f.setTextMode(TextMode.NORMALIZE);
122 return f;
123 }
124
125 /** standard value to indent by, if we are indenting */
126 private static final String STANDARD_INDENT = " ";
127
128 /** standard string with which to end a line */
129 private static final String STANDARD_LINE_SEPARATOR = "\r\n";
130
131 /** standard encoding */
132 private static final String STANDARD_ENCODING = "UTF-8";
133
134
135 /** The default indent is no spaces (as original document) */
136 String indent = null;
137
138 /** New line separator */
139 String lineSeparator = STANDARD_LINE_SEPARATOR;
140
141 /** The encoding format */
142 String encoding = STANDARD_ENCODING;
143
144 /** Whether or not to output the XML declaration
145 * - default is <code>false</code> */
146 boolean omitDeclaration = false;
147
148 /** Whether or not to output the encoding in the XML declaration
149 * - default is <code>false</code> */
150 boolean omitEncoding = false;
151
152 /** Whether or not to expand empty elements to
153 * &lt;tagName&gt;&lt;/tagName&gt; - default is <code>false</code> */
154 boolean expandEmptyElements = false;
155
156 /** Whether TrAX output escaping disabling/enabling PIs are ignored
157 * or processed - default is <code>false</code> */
158 boolean ignoreTrAXEscapingPIs = false;
159
160 /** text handling mode */
161 TextMode mode = TextMode.PRESERVE;
162
163 /** entity escape logic */
164 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding);
165
166 /**
167 * Creates a new Format instance with default (raw) behavior.
168 */
169 private Format() { }
170
171 /**
172 * Sets the {@link EscapeStrategy} to use for character escaping.
173 *
174 * @param strategy the EscapeStrategy to use
175 * @return a pointer to this Format for chaining
176 */
177 public Format setEscapeStrategy(EscapeStrategy strategy) {
178 escapeStrategy = strategy;
179 return this;
180 }
181
182 /**
183 * Returns the current escape strategy
184 *
185 * @return the current escape strategy
186 */
187 public EscapeStrategy getEscapeStrategy() {
188 return escapeStrategy;
189 }
190
191 /**
192 * This will set the newline separator (<code>lineSeparator</code>).
193 * The default is <code>\r\n</code>. To make it output
194 * the system default line ending string, call
195 * <code>setLineSeparator(System.getProperty("line.separator"))</code>.
196 *
197 * <p>
198 * To output "UNIX-style" documents, call
199 * <code>setLineSeparator("\n")</code>. To output "Mac-style"
200 * documents, call <code>setLineSeparator("\r")</code>. DOS-style
201 * documents use CR-LF ("\r\n"), which is the default.
202 * </p>
203 *
204 * <p>
205 * Note that this only applies to newlines generated by the
206 * outputter. If you parse an XML document that contains newlines
207 * embedded inside a text node, and you do not set TextMode.NORMALIZE,
208 * then the newlines will be output
209 * verbatim, as "\n" which is how parsers normalize them.
210 * </p>
211 *
212 * <p>
213 * If the format's "indent" property is null (as is the default
214 * for the Raw and Compact formats), then this value only effects the
215 * newlines written after the declaration and doctype.
216 * </p>
217 *
218 * @see #setTextMode
219 *
220 * @param separator <code>String</code> line separator to use.
221 * @return a pointer to this Format for chaining
222 */
223 public Format setLineSeparator(String separator) {
224 this.lineSeparator = separator;
225 return this;
226 }
227
228 /**
229 * Returns the current line separator.
230 *
231 * @return the current line separator
232 */
233 public String getLineSeparator() {
234 return lineSeparator;
235 }
236
237 /**
238 * This will set whether the XML declaration
239 * (<code>&lt;&#063;xml version="1&#046;0"
240 * encoding="UTF-8"&#063;&gt;</code>)
241 * includes the encoding of the document. It is common to omit
242 * this in uses such as WML and other wireless device protocols.
243 *
244 * @param omitEncoding <code>boolean</code> indicating whether or not
245 * the XML declaration should indicate the document encoding.
246 * @return a pointer to this Format for chaining
247 */
248 public Format setOmitEncoding(boolean omitEncoding) {
249 this.omitEncoding = omitEncoding;
250 return this;
251 }
252
253 /**
254 * Returns whether the XML declaration encoding will be omitted.
255 *
256 * @return whether the XML declaration encoding will be omitted
257 */
258 public boolean getOmitEncoding() {
259 return omitEncoding;
260 }
261
262 /**
263 * This will set whether the XML declaration
264 * (<code>&lt;&#063;xml version="1&#046;0"&#063;gt;</code>)
265 * will be omitted or not. It is common to omit this in uses such
266 * as SOAP and XML-RPC calls.
267 *
268 * @param omitDeclaration <code>boolean</code> indicating whether or not
269 * the XML declaration should be omitted.
270 * @return a pointer to this Format for chaining
271 */
272 public Format setOmitDeclaration(boolean omitDeclaration) {
273 this.omitDeclaration = omitDeclaration;
274 return this;
275 }
276
277 /**
278 * Returns whether the XML declaration will be omitted.
279 *
280 * @return whether the XML declaration will be omitted
281 */
282 public boolean getOmitDeclaration() {
283 return omitDeclaration;
284 }
285
286 /**
287 * This will set whether empty elements are expanded from
288 * <code>&lt;tagName/&gt;</code> to
289 * <code>&lt;tagName&gt;&lt;/tagName&gt;</code>.
290 *
291 * @param expandEmptyElements <code>boolean</code> indicating whether or not
292 * empty elements should be expanded.
293 * @return a pointer to this Format for chaining
294 */
295 public Format setExpandEmptyElements(boolean expandEmptyElements) {
296 this.expandEmptyElements = expandEmptyElements;
297 return this;
298 }
299
300 /**
301 * Returns whether empty elements are expanded.
302 *
303 * @return whether empty elements are expanded
304 */
305 public boolean getExpandEmptyElements() {
306 return expandEmptyElements;
307 }
308
309 /**
310 * This will set whether JAXP TrAX processing instructions for
311 * disabling/enabling output escaping are ignored. Disabling
312 * output escaping allows using XML text as element content and
313 * outputing it verbatim, i&#46;e&#46; as element children would be.
314 * <p>
315 * When processed, these processing instructions are removed from
316 * the generated XML text and control whether the element text
317 * content is output verbatim or with escaping of the pre-defined
318 * entities in XML 1.0. The text to be output verbatim shall be
319 * surrounded by the
320 * <code>&lt;?javax.xml.transform.disable-output-escaping ?&gt;</code>
321 * and <code>&lt;?javax.xml.transform.enable-output-escaping ?&gt;</code>
322 * PIs.</p>
323 * <p>
324 * When ignored, the processing instructions are present in the
325 * generated XML text and the pre-defined entities in XML 1.0 are
326 * escaped.
327 * <p>
328 * Default: <code>false</code>.</p>
329 *
330 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
331 * whether or not TrAX ouput escaping PIs are ignored.
332 *
333 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
334 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
335 */
336 public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) {
337 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
338 }
339
340 /**
341 * Returns whether JAXP TrAX processing instructions for
342 * disabling/enabling output escaping are ignored.
343 *
344 * @return whether or not TrAX ouput escaping PIs are ignored.
345 */
346 public boolean getIgnoreTrAXEscapingPIs() {
347 return ignoreTrAXEscapingPIs;
348 }
349
350 /**
351 * This sets the text output style. Options are available as static
352 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
353 *
354 * @return a pointer to this Format for chaining
355 */
356 public Format setTextMode(Format.TextMode mode) {
357 this.mode = mode;
358 return this;
359 }
360
361 /**
362 * Returns the current text output style.
363 *
364 * @return the current text output style
365 */
366 public Format.TextMode getTextMode() {
367 return mode;
368 }
369
370 /**
371 * This will set the indent <code>String</code> to use; this
372 * is usually a <code>String</code> of empty spaces. If you pass
373 * the empty string (""), then no indentation will happen but newlines
374 * will still be generated. Passing null will result in no indentation
375 * and no newlines generated. Default: none (null)
376 *
377 * @param indent <code>String</code> to use for indentation.
378 * @return a pointer to this Format for chaining
379 */
380 public Format setIndent(String indent) {
381 this.indent = indent;
382 return this;
383 }
384
385 /**
386 * Returns the indent string in use.
387 *
388 * @return the indent string in use
389 */
390 public String getIndent() {
391 return indent;
392 }
393
394 /**
395 * Sets the output encoding. The name should be an accepted XML
396 * encoding.
397 *
398 * @param encoding the encoding format. Use XML-style names like
399 * "UTF-8" or "ISO-8859-1" or "US-ASCII"
400 * @return a pointer to this Format for chaining
401 */
402 public Format setEncoding(String encoding) {
403 this.encoding = encoding;
404 escapeStrategy = new DefaultEscapeStrategy(encoding);
405 return this;
406 }
407
408 /**
409 * Returns the configured output encoding.
410 *
411 * @return the output encoding
412 */
413 public String getEncoding() {
414 return encoding;
415 }
416
417 protected Object clone() {
418 Format format = null;
419
420 try {
421 format = (Format) super.clone();
422 }
423 catch (CloneNotSupportedException ce) {
424 }
425
426 return format;
427 }
428
429
430 /**
431 * Handle common charsets quickly and easily. Use reflection
432 * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
433 * If JDK 1.4 isn't around, default to no special encoding.
434 */
435 class DefaultEscapeStrategy implements EscapeStrategy {
436 private int bits;
437 Object encoder;
438 Method canEncode;
439
440 public DefaultEscapeStrategy(String encoding) {
441 if ("UTF-8".equalsIgnoreCase(encoding) ||
442 "UTF-16".equalsIgnoreCase(encoding)) {
443 bits = 16;
444 }
445 else if ("ISO-8859-1".equalsIgnoreCase(encoding) ||
446 "Latin1".equalsIgnoreCase(encoding)) {
447 bits = 8;
448 }
449 else if ("US-ASCII".equalsIgnoreCase(encoding) ||
450 "ASCII".equalsIgnoreCase(encoding)) {
451 bits = 7;
452 }
453 else {
454 bits = 0;
455 //encoder = Charset.forName(encoding).newEncoder();
456 try {
457 Class charsetClass = Class.forName("java.nio.charset.Charset");
458 Class encoderClass = Class.forName("java.nio.charset.CharsetEncoder");
459 Method forName = charsetClass.getMethod("forName", new Class[]{String.class});
460 Object charsetObj = forName.invoke(null, new Object[]{encoding});
461 Method newEncoder = charsetClass.getMethod("newEncoder", null);
462 encoder = newEncoder.invoke(charsetObj, null);
463 canEncode = encoderClass.getMethod("canEncode", new Class[]{char.class});
464 }
465 catch (Exception ignored) {
466 }
467 }
468 }
469
470 public boolean shouldEscape(char ch) {
471 if (bits == 16) {
472 if (Verifier.isHighSurrogate(ch))
473 return true; // Safer this way per http://unicode.org/faq/utf_bom.html#utf8-4
474 else
475 return false;
476 }
477 if (bits == 8) {
478 if ((int) ch > 255)
479 return true;
480 else
481 return false;
482 }
483 if (bits == 7) {
484 if ((int) ch > 127)
485 return true;
486 else
487 return false;
488 }
489 else {
490 if (Verifier.isHighSurrogate(ch))
491 return true; // Safer this way per http://unicode.org/faq/utf_bom.html#utf8-4
492
493 if (canEncode != null && encoder != null) {
494 try {
495 Boolean val = (Boolean) canEncode.invoke(encoder, new Object[]{new Character(ch)});
496 return !val.booleanValue();
497 }
498 catch (Exception ignored) {
499 }
500 }
501 // Return false if we don't know. This risks not escaping
502 // things which should be escaped, but also means people won't
503 // start getting loads of unnecessary escapes.
504 return false;
505 }
506 }
507 }
508
509
510 /**
511 * Class to signify how text should be handled on output. The following
512 * table provides details.
513 *
514 * <table>
515 * <tr>
516 * <th align="left">
517 * Text Mode
518 * </th>
519 * <th>
520 * Resulting behavior.
521 * </th>
522 * </tr>
523 *
524 * <tr valign="top">
525 * <td>
526 * <i>PRESERVE (Default)</i>
527 * </td>
528 * <td>
529 * All content is printed in the format it was created, no whitespace
530 * or line separators are are added or removed.
531 * </td>
532 * </tr>
533 *
534 * <tr valign="top">
535 * <td>
536 * TRIM_FULL_WHITE
537 * </td>
538 * <td>
539 * Content between tags consisting of all whitespace is not printed.
540 * If the content contains even one non-whitespace character, it is
541 * printed verbatim, whitespace and all.
542 * </td>
543 * </tr>
544 *
545 * <tr valign="top">
546 * <td>
547 * TRIM
548 * </td>
549 * <td>
550 * Same as TrimAllWhite, plus leading/trailing whitespace are
551 * trimmed.
552 * </td>
553 * </tr>
554 *
555 * <tr valign="top">
556 * <td>
557 * NORMALIZE
558 * </td>
559 * <td>
560 * Same as TextTrim, plus addition interior whitespace is compressed
561 * to a single space.
562 * </td>
563 * </tr>
564 * </table>
565 *
566 * In most cases textual content is aligned with the surrounding tags
567 * (after the appropriate text mode is applied). In the case where the only
568 * content between the start and end tags is textual, the start tag, text,
569 * and end tag are all printed on the same line. If the document being
570 * output already has whitespace, it's wise to turn on TRIM mode so the
571 * pre-existing whitespace can be trimmed before adding new whitespace.
572 * <p>
573 * When a element has a xml:space attribute with the value of "preserve",
574 * all formating is turned off and reverts back to the default until the
575 * element and its contents have been printed. If a nested element contains
576 * another xml:space with the value "default" formatting is turned back on
577 * for the child element and then off for the remainder of the parent
578 * element.
579 */
580 public static class TextMode {
581 /**
582 * Mode for literal text preservation.
583 */
584 public static final TextMode PRESERVE = new TextMode("PRESERVE");
585
586 /**
587 * Mode for text trimming (left and right trim).
588 */
589 public static final TextMode TRIM = new TextMode("TRIM");
590
591 /**
592 * Mode for text normalization (left and right trim plus internal
593 * whitespace is normalized to a single space.
594 * @see org.jdom.Element#getTextNormalize
595 */
596 public static final TextMode NORMALIZE = new TextMode("NORMALIZE");
597
598 /**
599 * Mode for text trimming of content consisting of nothing but
600 * whitespace but otherwise not changing output.
601 */
602 public static final TextMode TRIM_FULL_WHITE =
603 new TextMode("TRIM_FULL_WHITE");
604
605 private final String name;
606
607 private TextMode(String name) {
608 this.name = name;
609 }
610
611 public String toString() {
612 return name;
613 }
614 }
615 }