1:
37:
38: package ;
39:
40: import ;
41: import ;
42: import ;
43: import ;
44: import ;
45: import ;
46: import ;
47: import ;
48: import ;
49: import ;
50: import ;
51: import ;
52: import ;
53: import ;
54: import ;
55: import ;
56: import ;
57: import ;
58: import ;
59:
60:
65: public class StreamSerializer
66: {
67:
68: static final int SPACE = 0x20;
69: static final int BANG = 0x21;
70: static final int APOS = 0x27;
71: static final int SLASH = 0x2f;
72: static final int BRA = 0x3c;
73: static final int KET = 0x3e;
74: static final int EQ = 0x3d;
75:
76:
79: static final Map HTML_BOOLEAN_ATTRIBUTES = new HashMap();
80: static
81: {
82: HashSet set;
83:
84: set = new HashSet();
85: set.add("nohref");
86: HTML_BOOLEAN_ATTRIBUTES.put("area", set);
87:
88: set = new HashSet();
89: set.add("ismap");
90: HTML_BOOLEAN_ATTRIBUTES.put("img", set);
91:
92: set = new HashSet();
93: set.add("declare");
94: HTML_BOOLEAN_ATTRIBUTES.put("object", set);
95:
96: set = new HashSet();
97: set.add("noshade");
98: HTML_BOOLEAN_ATTRIBUTES.put("hr", set);
99:
100: set = new HashSet();
101: set.add("compact");
102: HTML_BOOLEAN_ATTRIBUTES.put("dl", set);
103: HTML_BOOLEAN_ATTRIBUTES.put("ol", set);
104: HTML_BOOLEAN_ATTRIBUTES.put("ul", set);
105: HTML_BOOLEAN_ATTRIBUTES.put("dir", set);
106: HTML_BOOLEAN_ATTRIBUTES.put("menu", set);
107:
108: set = new HashSet();
109: set.add("checked");
110: set.add("disabled");
111: set.add("readonly");
112: set.add("ismap");
113: HTML_BOOLEAN_ATTRIBUTES.put("input", set);
114:
115: set = new HashSet();
116: set.add("multiple");
117: set.add("disabled");
118: HTML_BOOLEAN_ATTRIBUTES.put("select", set);
119:
120: set = new HashSet();
121: set.add("disabled");
122: HTML_BOOLEAN_ATTRIBUTES.put("optgroup", set);
123:
124: set = new HashSet();
125: set.add("selected");
126: set.add("disabled");
127: HTML_BOOLEAN_ATTRIBUTES.put("option", set);
128:
129: set = new HashSet();
130: set.add("disabled");
131: set.add("readonly");
132: HTML_BOOLEAN_ATTRIBUTES.put("textarea", set);
133:
134: set = new HashSet();
135: set.add("disabled");
136: HTML_BOOLEAN_ATTRIBUTES.put("button", set);
137:
138: set = new HashSet();
139: set.add("nowrap");
140: HTML_BOOLEAN_ATTRIBUTES.put("th", set);
141: HTML_BOOLEAN_ATTRIBUTES.put("td", set);
142:
143: set = new HashSet();
144: set.add("noresize");
145: HTML_BOOLEAN_ATTRIBUTES.put("frame", set);
146:
147: set = new HashSet();
148: set.add("defer");
149: HTML_BOOLEAN_ATTRIBUTES.put("script", set);
150: }
151:
152: protected final String encoding;
153: final Charset charset;
154: final CharsetEncoder encoder;
155: final int mode;
156: final Map namespaces;
157: protected String eol;
158: Collection cdataSectionElements = Collections.EMPTY_SET;
159:
160: protected boolean discardDefaultContent;
161: protected boolean xmlDeclaration = true;
162:
163: public StreamSerializer()
164: {
165: this(Stylesheet.OUTPUT_XML, null, null);
166: }
167:
168: public StreamSerializer(String encoding)
169: {
170: this(Stylesheet.OUTPUT_XML, encoding, null);
171: }
172:
173: public StreamSerializer(int mode, String encoding, String eol)
174: {
175: this.mode = mode;
176: if (encoding == null)
177: {
178: encoding = "UTF-8";
179: }
180: this.encoding = encoding.intern();
181: charset = Charset.forName(this.encoding);
182: encoder = charset.newEncoder();
183: this.eol = (eol != null) ? eol : System.getProperty("line.separator");
184: namespaces = new HashMap();
185: }
186:
187: void setCdataSectionElements(Collection c)
188: {
189: cdataSectionElements = c;
190: }
191:
192: public void serialize(final Node node, final OutputStream out)
193: throws IOException
194: {
195: serialize(node, out, false);
196: }
197:
198: void serialize(Node node, final OutputStream out,
199: boolean convertToCdata)
200: throws IOException
201: {
202: while (node != null)
203: {
204: Node next = node.getNextSibling();
205: doSerialize(node, out, convertToCdata);
206: node = next;
207: }
208: }
209:
210: private void doSerialize(final Node node, final OutputStream out,
211: boolean convertToCdata)
212: throws IOException
213: {
214: if (out == null)
215: {
216: throw new NullPointerException("no output stream");
217: }
218: String value, prefix;
219: Node children;
220: String uri = node.getNamespaceURI();
221: boolean defined = false;
222: short nt = node.getNodeType();
223: if (convertToCdata && nt == Node.TEXT_NODE)
224: {
225: nt = Node.CDATA_SECTION_NODE;
226: }
227: switch (nt)
228: {
229: case Node.ATTRIBUTE_NODE:
230: prefix = node.getPrefix();
231: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) ||
232: XMLConstants.XMLNS_ATTRIBUTE.equals(prefix) ||
233: (prefix != null && prefix.startsWith("xmlns:")))
234: {
235: String nsuri = node.getNodeValue();
236: if (isDefined(nsuri))
237: {
238: break;
239: }
240: String name = node.getLocalName();
241: if (name == null)
242: {
243: name = node.getNodeName();
244: }
245: define(nsuri, name);
246: }
247: else if (uri != null && !isDefined(uri))
248: {
249: prefix = define(uri, prefix);
250: String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
251: out.write(SPACE);
252: out.write(encodeText(nsname));
253: out.write(EQ);
254: String nsvalue = "'" + encode(uri, true, true) + "'";
255: out.write(nsvalue.getBytes(encoding));
256: defined = true;
257: }
258: out.write(SPACE);
259: String a_nodeName = node.getNodeName();
260: out.write(encodeText(a_nodeName));
261: String a_nodeValue = node.getNodeValue();
262: if (mode == Stylesheet.OUTPUT_HTML &&
263: a_nodeName.equals(a_nodeValue) &&
264: isHTMLBoolean((Attr) node, a_nodeName))
265: {
266: break;
267: }
268: out.write(EQ);
269: value = "'" + encode(a_nodeValue, true, true) + "'";
270: out.write(encodeText(value));
271: break;
272: case Node.ELEMENT_NODE:
273: value = node.getNodeName();
274: out.write(BRA);
275: out.write(encodeText(value));
276: if (uri != null && !isDefined(uri))
277: {
278: prefix = define(uri, node.getPrefix());
279: String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
280: out.write(SPACE);
281: out.write(encodeText(nsname));
282: out.write(EQ);
283: String nsvalue = "'" + encode(uri, true, true) + "'";
284: out.write(encodeText(nsvalue));
285: defined = true;
286: }
287: NamedNodeMap attrs = node.getAttributes();
288: if (attrs != null)
289: {
290: int len = attrs.getLength();
291: for (int i = 0; i < len; i++)
292: {
293: Attr attr = (Attr) attrs.item(i);
294: if (discardDefaultContent && !attr.getSpecified())
295: {
296:
297: }
298: else
299: {
300: serialize(attr, out, false);
301: }
302: }
303: }
304: convertToCdata = cdataSectionElements.contains(value);
305: children = node.getFirstChild();
306: if (children == null)
307: {
308: out.write(SLASH);
309: out.write(KET);
310: }
311: else
312: {
313: out.write(KET);
314: serialize(children, out, convertToCdata);
315: out.write(BRA);
316: out.write(SLASH);
317: out.write(encodeText(value));
318: out.write(KET);
319: }
320: break;
321: case Node.TEXT_NODE:
322: value = node.getNodeValue();
323: if (!"yes".equals(node.getUserData("disable-output-escaping")))
324: {
325: value = encode(value, false, false);
326: }
327: out.write(encodeText(value));
328: break;
329: case Node.CDATA_SECTION_NODE:
330: value = "<![CDATA[" + node.getNodeValue() + "]]>";
331: out.write(encodeText(value));
332: break;
333: case Node.COMMENT_NODE:
334: value = "<!--" + node.getNodeValue() + "-->";
335: out.write(encodeText(value));
336: Node cp = node.getParentNode();
337: if (cp != null && cp.getNodeType() == Node.DOCUMENT_NODE)
338: {
339: out.write(encodeText(eol));
340: }
341: break;
342: case Node.DOCUMENT_NODE:
343: case Node.DOCUMENT_FRAGMENT_NODE:
344: if (mode == Stylesheet.OUTPUT_XML)
345: {
346: if ("UTF-16".equalsIgnoreCase(encoding))
347: {
348: out.write(0xfe);
349: out.write(0xff);
350: }
351: if (!"yes".equals(node.getUserData("omit-xml-declaration")) &&
352: xmlDeclaration)
353: {
354: Document doc = (node instanceof Document) ?
355: (Document) node : null;
356: String version = (doc != null) ? doc.getXmlVersion() : null;
357: if (version == null)
358: {
359: version = (String) node.getUserData("version");
360: }
361: if (version == null)
362: {
363: version = "1.0";
364: }
365: out.write(BRA);
366: out.write(0x3f);
367: out.write("xml version='".getBytes("US-ASCII"));
368: out.write(version.getBytes("US-ASCII"));
369: out.write(APOS);
370: if (!("UTF-8".equalsIgnoreCase(encoding)))
371: {
372: out.write(" encoding='".getBytes("US-ASCII"));
373: out.write(encoding.getBytes("US-ASCII"));
374: out.write(APOS);
375: }
376: if ((doc != null && doc.getXmlStandalone()) ||
377: "yes".equals(node.getUserData("standalone")))
378: {
379: out.write(" standalone='yes'".getBytes("US-ASCII"));
380: }
381: out.write(0x3f);
382: out.write(KET);
383: out.write(encodeText(eol));
384: }
385:
386:
387: }
388: else if (mode == Stylesheet.OUTPUT_HTML)
389: {
390:
391: String mediaType = (String) node.getUserData("media-type");
392: if (mediaType == null)
393: {
394: mediaType = "text/html";
395: }
396: String contentType = mediaType + "; charset=" +
397: ((encoding.indexOf(' ') != -1) ?
398: "\"" + encoding + "\"" :
399: encoding);
400: Document doc = (node instanceof Document) ? (Document) node :
401: node.getOwnerDocument();
402: Node html = null;
403: for (Node ctx = node.getFirstChild(); ctx != null;
404: ctx = ctx.getNextSibling())
405: {
406: if (ctx.getNodeType() == Node.ELEMENT_NODE)
407: {
408: html = ctx;
409: break;
410: }
411: }
412: if (html == null)
413: {
414: html = doc.createElement("html");
415: node.appendChild(html);
416: }
417: Node head = null;
418: for (Node ctx = html.getFirstChild(); ctx != null;
419: ctx = ctx.getNextSibling())
420: {
421: if (ctx.getNodeType() == Node.ELEMENT_NODE)
422: {
423: String name = ctx.getLocalName();
424: if (name == null)
425: {
426: name = ctx.getNodeName();
427: }
428: if ("head".equalsIgnoreCase(name))
429: {
430: head = ctx;
431: break;
432: }
433: }
434: }
435: if (head == null)
436: {
437: head = doc.createElement("head");
438: Node c1 = null;
439: for (Node ctx = html.getFirstChild(); ctx != null;
440: ctx = ctx.getNextSibling())
441: {
442: if (ctx.getNodeType() == Node.ELEMENT_NODE)
443: {
444: c1 = ctx;
445: break;
446: }
447: }
448: if (c1 != null)
449: {
450: html.insertBefore(head, c1);
451: }
452: else
453: {
454: html.appendChild(head);
455: }
456: }
457: Node meta = null;
458: Node metaContent = null;
459: for (Node ctx = head.getFirstChild(); ctx != null;
460: ctx = ctx.getNextSibling())
461: {
462: if (ctx.getNodeType() == Node.ELEMENT_NODE)
463: {
464: String name = ctx.getLocalName();
465: if (name == null)
466: {
467: name = ctx.getNodeName();
468: }
469: if ("meta".equalsIgnoreCase(name))
470: {
471: NamedNodeMap metaAttrs = ctx.getAttributes();
472: int len = metaAttrs.getLength();
473: String httpEquiv = null;
474: Node content = null;
475: for (int i = 0; i < len; i++)
476: {
477: Node attr = metaAttrs.item(i);
478: String attrName = attr.getNodeName();
479: if ("http-equiv".equalsIgnoreCase(attrName))
480: {
481: httpEquiv = attr.getNodeValue();
482: }
483: else if ("content".equalsIgnoreCase(attrName))
484: {
485: content = attr;
486: }
487: }
488: if ("Content-Type".equalsIgnoreCase(httpEquiv))
489: {
490: meta = ctx;
491: metaContent = content;
492: break;
493: }
494: }
495: }
496: }
497: if (meta == null)
498: {
499: meta = doc.createElement("meta");
500:
501: Node first = head.getFirstChild();
502: if (first == null)
503: {
504: head.appendChild(meta);
505: }
506: else
507: {
508: head.insertBefore(meta, first);
509: }
510: Node metaHttpEquiv = doc.createAttribute("http-equiv");
511: meta.getAttributes().setNamedItem(metaHttpEquiv);
512: metaHttpEquiv.setNodeValue("Content-Type");
513: }
514: if (metaContent == null)
515: {
516: metaContent = doc.createAttribute("content");
517: meta.getAttributes().setNamedItem(metaContent);
518: }
519: metaContent.setNodeValue(contentType);
520:
521: }
522: children = node.getFirstChild();
523: if (children != null)
524: {
525: serialize(children, out, convertToCdata);
526: }
527: break;
528: case Node.DOCUMENT_TYPE_NODE:
529: DocumentType doctype = (DocumentType) node;
530: out.write(BRA);
531: out.write(BANG);
532: out.write(encodeText("DOCTYPE "));
533: value = doctype.getNodeName();
534: out.write(encodeText(value));
535: String publicId = doctype.getPublicId();
536: if (publicId != null)
537: {
538: out.write(encodeText(" PUBLIC "));
539: out.write(APOS);
540: out.write(encodeText(publicId));
541: out.write(APOS);
542: }
543: String systemId = doctype.getSystemId();
544: if (systemId != null)
545: {
546: out.write(encodeText(" SYSTEM "));
547: out.write(APOS);
548: out.write(encodeText(systemId));
549: out.write(APOS);
550: }
551: String internalSubset = doctype.getInternalSubset();
552: if (internalSubset != null)
553: {
554: out.write(encodeText(internalSubset));
555: }
556: out.write(KET);
557: out.write(eol.getBytes(encoding));
558: break;
559: case Node.ENTITY_REFERENCE_NODE:
560: value = "&" + node.getNodeValue() + ";";
561: out.write(encodeText(value));
562: break;
563: case Node.PROCESSING_INSTRUCTION_NODE:
564: value = "<?" + node.getNodeName() + " " + node.getNodeValue() + "?>";
565: out.write(encodeText(value));
566: Node pp = node.getParentNode();
567: if (pp != null && pp.getNodeType() == Node.DOCUMENT_NODE)
568: {
569: out.write(encodeText(eol));
570: }
571: break;
572: }
573: if (defined)
574: {
575: undefine(uri);
576: }
577: }
578:
579: boolean isDefined(String uri)
580: {
581: return XMLConstants.XML_NS_URI.equals(uri) ||
582: XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) ||
583: namespaces.containsKey(uri);
584: }
585:
586: String define(String uri, String prefix)
587: {
588: while (namespaces.containsValue(prefix))
589: {
590:
591: prefix = prefix + "_";
592: }
593: namespaces.put(uri, prefix);
594: return prefix;
595: }
596:
597: void undefine(String uri)
598: {
599: namespaces.remove(uri);
600: }
601:
602: final byte[] encodeText(String text)
603: throws IOException
604: {
605: encoder.reset();
606: if (!encoder.canEncode(text))
607: {
608:
609: StringBuffer buf = new StringBuffer();
610: int len = text.length();
611: for (int i = 0; i < len; i++)
612: {
613: char c = text.charAt(i);
614: if (encoder.canEncode(c))
615: {
616: buf.append(c);
617: }
618: else
619: {
620:
621: String hex = Integer.toHexString((int) c);
622: buf.append("&#x");
623: buf.append(hex);
624: buf.append(';');
625: }
626: }
627: text = buf.toString();
628: }
629: ByteBuffer encoded = encoder.encode(CharBuffer.wrap(text));
630: int len = encoded.limit() - encoded.position();
631: if (encoded.hasArray())
632: {
633: byte[] ret = encoded.array();
634: if (ret.length > len)
635: {
636:
637: byte[] ret2 = new byte[len];
638: System.arraycopy(ret, 0, ret2, 0, len);
639: ret = ret2;
640: }
641: return ret;
642: }
643: encoded.flip();
644: byte[] ret = new byte[len];
645: encoded.get(ret, 0, len);
646: return ret;
647: }
648:
649: String encode(String text, boolean encodeCtl, boolean inAttr)
650: {
651: int len = text.length();
652: StringBuffer buf = null;
653: for (int i = 0; i < len; i++)
654: {
655: char c = text.charAt(i);
656: if (c == '<')
657: {
658: if (buf == null)
659: {
660: buf = new StringBuffer(text.substring(0, i));
661: }
662: buf.append("<");
663: }
664: else if (c == '>')
665: {
666: if (buf == null)
667: {
668: buf = new StringBuffer(text.substring(0, i));
669: }
670: buf.append(">");
671: }
672: else if (c == '&')
673: {
674: if (mode == Stylesheet.OUTPUT_HTML && (i + 1) < len &&
675: text.charAt(i + 1) == '{')
676: {
677: if (buf != null)
678: {
679: buf.append(c);
680: }
681: }
682: else
683: {
684: if (buf == null)
685: {
686: buf = new StringBuffer(text.substring(0, i));
687: }
688: buf.append("&");
689: }
690: }
691: else if (c == '\'' && inAttr)
692: {
693: if (buf == null)
694: {
695: buf = new StringBuffer(text.substring(0, i));
696: }
697: if (mode == Stylesheet.OUTPUT_HTML)
698:
699: buf.append("'");
700: else
701: buf.append("'");
702: }
703: else if (c == '"' && inAttr)
704: {
705: if (buf == null)
706: {
707: buf = new StringBuffer(text.substring(0, i));
708: }
709: buf.append(""");
710: }
711: else if (encodeCtl)
712: {
713: if (c < 0x20)
714: {
715: if (buf == null)
716: {
717: buf = new StringBuffer(text.substring(0, i));
718: }
719: buf.append('&');
720: buf.append('#');
721: buf.append((int) c);
722: buf.append(';');
723: }
724: else if (buf != null)
725: {
726: buf.append(c);
727: }
728: }
729: else if (buf != null)
730: {
731: buf.append(c);
732: }
733: }
734: return (buf == null) ? text : buf.toString();
735: }
736:
737: String toString(Node node)
738: {
739: ByteArrayOutputStream out = new ByteArrayOutputStream();
740: try
741: {
742: serialize(node, out);
743: return new String(out.toByteArray(), encoding);
744: }
745: catch (IOException e)
746: {
747: throw new RuntimeException(e.getMessage());
748: }
749: }
750:
751: boolean isHTMLBoolean(Attr attr, String attrName)
752: {
753: attrName = attrName.toLowerCase();
754: Node element = attr.getOwnerElement();
755: String elementName = element.getLocalName();
756: if (elementName == null)
757: {
758: elementName = element.getNodeName();
759: }
760: elementName = elementName.toLowerCase();
761: Collection attributes =
762: (Collection) HTML_BOOLEAN_ATTRIBUTES.get(elementName);
763: return (attributes != null && attributes.contains(attrName));
764: }
765:
766: }