Source for gnu.xml.transform.StreamSerializer

   1: /* StreamSerializer.java -- 
   2:    Copyright (C) 2004,2006 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package gnu.xml.transform;
  39: 
  40: import java.io.ByteArrayOutputStream;
  41: import java.io.IOException;
  42: import java.io.OutputStream;
  43: import java.io.UnsupportedEncodingException;
  44: import java.nio.ByteBuffer;
  45: import java.nio.CharBuffer;
  46: import java.nio.charset.Charset;
  47: import java.nio.charset.CharsetEncoder;
  48: import java.util.Collection;
  49: import java.util.Collections;
  50: import java.util.HashMap;
  51: import java.util.HashSet;
  52: import java.util.Iterator;
  53: import java.util.LinkedList;
  54: import java.util.Map;
  55: import javax.xml.XMLConstants;
  56: import org.w3c.dom.Attr;
  57: import org.w3c.dom.Document;
  58: import org.w3c.dom.DocumentType;
  59: import org.w3c.dom.NamedNodeMap;
  60: import org.w3c.dom.Node;
  61: 
  62: /**
  63:  * Serializes a DOM node to an output stream.
  64:  *
  65:  * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
  66:  */
  67: public class StreamSerializer
  68: {
  69:   
  70:   static final int SPACE = 0x20;
  71:   static final int BANG = 0x21; // !
  72:   static final int APOS = 0x27; // '
  73:   static final int SLASH = 0x2f; // /
  74:   static final int BRA = 0x3c; // <
  75:   static final int KET = 0x3e; // >
  76:   static final int EQ = 0x3d; // =
  77: 
  78:   /**
  79:    * HTML 4.01 boolean attributes
  80:    */
  81:   static final Map HTML_BOOLEAN_ATTRIBUTES = new HashMap();
  82:   static
  83:   {
  84:     HashSet set;
  85:     
  86:     set = new HashSet();
  87:     set.add("nohref");
  88:     HTML_BOOLEAN_ATTRIBUTES.put("area", set);
  89: 
  90:     set = new HashSet();
  91:     set.add("ismap");
  92:     HTML_BOOLEAN_ATTRIBUTES.put("img", set);
  93: 
  94:     set = new HashSet();
  95:     set.add("declare");
  96:     HTML_BOOLEAN_ATTRIBUTES.put("object", set);
  97:     
  98:     set = new HashSet();
  99:     set.add("noshade");
 100:     HTML_BOOLEAN_ATTRIBUTES.put("hr", set);
 101:     
 102:     set = new HashSet();
 103:     set.add("compact");
 104:     HTML_BOOLEAN_ATTRIBUTES.put("dl", set);
 105:     HTML_BOOLEAN_ATTRIBUTES.put("ol", set);
 106:     HTML_BOOLEAN_ATTRIBUTES.put("ul", set);
 107:     HTML_BOOLEAN_ATTRIBUTES.put("dir", set);
 108:     HTML_BOOLEAN_ATTRIBUTES.put("menu", set);
 109:     
 110:     set = new HashSet();
 111:     set.add("checked");
 112:     set.add("disabled");
 113:     set.add("readonly");
 114:     set.add("ismap");
 115:     HTML_BOOLEAN_ATTRIBUTES.put("input", set);
 116:     
 117:     set = new HashSet();
 118:     set.add("multiple");
 119:     set.add("disabled");
 120:     HTML_BOOLEAN_ATTRIBUTES.put("select", set);
 121:     
 122:     set = new HashSet();
 123:     set.add("disabled");
 124:     HTML_BOOLEAN_ATTRIBUTES.put("optgroup", set);
 125:     
 126:     set = new HashSet();
 127:     set.add("selected");
 128:     set.add("disabled");
 129:     HTML_BOOLEAN_ATTRIBUTES.put("option", set);
 130:     
 131:     set = new HashSet();
 132:     set.add("disabled");
 133:     set.add("readonly");
 134:     HTML_BOOLEAN_ATTRIBUTES.put("textarea", set);
 135:     
 136:     set = new HashSet();
 137:     set.add("disabled");
 138:     HTML_BOOLEAN_ATTRIBUTES.put("button", set);
 139:     
 140:     set = new HashSet();
 141:     set.add("nowrap");
 142:     HTML_BOOLEAN_ATTRIBUTES.put("th", set);
 143:     HTML_BOOLEAN_ATTRIBUTES.put("td", set);
 144:     
 145:     set = new HashSet();
 146:     set.add("noresize");
 147:     HTML_BOOLEAN_ATTRIBUTES.put("frame", set);
 148:     
 149:     set = new HashSet();
 150:     set.add("defer");
 151:     HTML_BOOLEAN_ATTRIBUTES.put("script", set);
 152:   }
 153: 
 154:   // HTML namespace URIs
 155:   static final HashSet HTML_URIS = new HashSet();
 156:   static {
 157:     HTML_URIS.add("http://www.w3.org/1999/xhtml");
 158:   }
 159: 
 160:   protected final String encoding;
 161:   final Charset charset;
 162:   final CharsetEncoder encoder;
 163:   final int mode;
 164:   final LinkedList namespaces;
 165:   protected String eol;
 166:   Collection cdataSectionElements = Collections.EMPTY_SET;
 167: 
 168:   protected boolean discardDefaultContent;
 169:   protected boolean xmlDeclaration = true;
 170:   
 171:   // has a META element with the encoding been added?
 172:   private boolean htmlEncoded;
 173: 
 174:   public StreamSerializer()
 175:   {
 176:     this(Stylesheet.OUTPUT_XML, null, null);
 177:   }
 178: 
 179:   public StreamSerializer(String encoding)
 180:   {
 181:     this(Stylesheet.OUTPUT_XML, encoding, null);
 182:   }
 183: 
 184:   public StreamSerializer(int mode, String encoding, String eol)
 185:   {
 186:     this.mode = mode;
 187:     if (encoding == null)
 188:       encoding = (mode == Stylesheet.OUTPUT_HTML) ? "ISO-8859-1" : "UTF-8";
 189:     this.encoding = encoding.intern();
 190:     charset = Charset.forName(this.encoding);
 191:     encoder = charset.newEncoder();
 192:     this.eol = (eol != null) ? eol : System.getProperty("line.separator");
 193:     namespaces = new LinkedList();
 194:   }
 195: 
 196:   void setCdataSectionElements(Collection c)
 197:   {
 198:     cdataSectionElements = c;
 199:   }
 200: 
 201:   public void serialize(final Node node, final OutputStream out)
 202:     throws IOException
 203:   {
 204:     serialize(node, out, false);
 205:   }
 206:   
 207:   void serialize(Node node, final OutputStream out,
 208:                  boolean convertToCdata)
 209:     throws IOException
 210:   {
 211:     while (node != null)
 212:       {
 213:         Node next = node.getNextSibling();
 214:         doSerialize(node, out, convertToCdata);
 215:         node = next;
 216:       }
 217:   }
 218: 
 219:   private void doSerialize(final Node node, final OutputStream out,
 220:                            boolean convertToCdata)
 221:     throws IOException
 222:   {
 223:     if (out == null)
 224:       throw new NullPointerException("no output stream");
 225:     htmlEncoded = false;
 226:     String value, prefix;
 227:     Node children;
 228:     String uri = node.getNamespaceURI();
 229:     short nt = node.getNodeType();
 230:     if (convertToCdata && nt == Node.TEXT_NODE)
 231:       nt = Node.CDATA_SECTION_NODE;
 232:     switch (nt)
 233:       {
 234:       case Node.ATTRIBUTE_NODE:
 235:         prefix = node.getPrefix();
 236:         if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) ||
 237:             XMLConstants.XMLNS_ATTRIBUTE.equals(prefix) ||
 238:             (prefix != null && prefix.startsWith("xmlns:")))
 239:           {
 240:             String nsuri = node.getNodeValue();
 241:             if (isDefined(nsuri, prefix))
 242:               break;
 243:             String name = node.getLocalName();
 244:             if (name == null)
 245:               {
 246:                 // Namespace-unaware
 247:                 name = node.getNodeName();
 248:                 int ci = name.indexOf(':');
 249:                 if (ci != -1)
 250:                   name = name.substring(ci + 1);
 251:               }
 252:             define(nsuri, name);
 253:           }
 254:         else if (uri != null && !isDefined(uri, prefix))
 255:           {
 256:             prefix = define(uri, prefix);
 257:             String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
 258:             out.write(SPACE);
 259:             out.write(encodeText(nsname));
 260:             out.write(EQ);
 261:             String nsvalue = "\"" + encode(uri, true, true) + "\"";
 262:             out.write(nsvalue.getBytes(encoding));
 263:           }
 264:         out.write(SPACE);
 265:         String a_nodeName = node.getNodeName();
 266:         out.write(encodeText(a_nodeName));
 267:         String a_nodeValue = node.getNodeValue();
 268:         if (mode == Stylesheet.OUTPUT_HTML &&
 269:             a_nodeName.equals(a_nodeValue) &&
 270:             isHTMLBoolean((Attr) node, a_nodeName))
 271:           break;
 272:         out.write(EQ);
 273:         value = "\"" + encode(a_nodeValue, true, true) + "\"";
 274:         out.write(encodeText(value));
 275:         break;
 276:       case Node.ELEMENT_NODE:
 277:         pushNamespaceContext();
 278:         value = node.getNodeName();
 279:         out.write(BRA);
 280:         out.write(encodeText(value));
 281:         prefix = node.getPrefix();
 282:         if (uri != null && !isDefined(uri, prefix))
 283:           {
 284:             prefix = define(uri, prefix);
 285:             String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
 286:             out.write(SPACE);
 287:             out.write(encodeText(nsname));
 288:             out.write(EQ);
 289:             String nsvalue = "\"" + encode(uri, true, true) + "\"";
 290:             out.write(encodeText(nsvalue));
 291:           }
 292:         NamedNodeMap attrs = node.getAttributes();
 293:         if (attrs != null)
 294:           {
 295:             int len = attrs.getLength();
 296:             for (int i = 0; i < len; i++)
 297:               {
 298:                 Attr attr = (Attr) attrs.item(i);
 299:                 if (discardDefaultContent && !attr.getSpecified())
 300:                   {
 301:                     // NOOP
 302:                   }
 303:                 else
 304:                   serialize(attr, out, false);
 305:               }
 306:           }
 307:         convertToCdata = cdataSectionElements.contains(value);
 308:         children = node.getFirstChild();
 309:         if (children == null)
 310:           {
 311:             out.write(SLASH);
 312:             out.write(KET);
 313:           }
 314:         else
 315:           {
 316:             out.write(KET);
 317:             serialize(children, out, convertToCdata);
 318:             out.write(BRA);
 319:             out.write(SLASH);
 320:             out.write(encodeText(value));
 321:             out.write(KET);
 322:           }
 323:         popNamespaceContext();
 324:         break;
 325:       case Node.TEXT_NODE:
 326:         value = node.getNodeValue();
 327:         if (!"yes".equals(node.getUserData("disable-output-escaping")) &&
 328:             mode != Stylesheet.OUTPUT_TEXT)
 329:           value = encode(value, false, false);
 330:         out.write(encodeText(value));
 331:         break;
 332:       case Node.CDATA_SECTION_NODE:
 333:         value = node.getNodeValue();
 334:         // Where any instanceof of ]]> occur, split into multiple CDATA
 335:         // sections
 336:         int bbk = value.indexOf("]]>");
 337:         while (bbk != -1)
 338:           {
 339:             String head = value.substring(0, bbk + 2);
 340:             out.write(encodeText("<![CDATA[" + head + "]]>"));
 341:             value = value.substring(bbk + 2);
 342:             bbk = value.indexOf("]]>");
 343:           }
 344:         // Write final tail value
 345:         out.write(encodeText("<![CDATA[" + value + "]]>"));
 346:         break;
 347:       case Node.COMMENT_NODE:
 348:         value = "<!--" + node.getNodeValue() + "-->";
 349:         out.write(encodeText(value));
 350:         Node cp = node.getParentNode();
 351:         if (cp != null && cp.getNodeType() == Node.DOCUMENT_NODE)
 352:           out.write(encodeText(eol));
 353:         break;
 354:       case Node.DOCUMENT_NODE:
 355:       case Node.DOCUMENT_FRAGMENT_NODE:
 356:         if (mode == Stylesheet.OUTPUT_XML)
 357:           {
 358:             if ("UTF-16".equalsIgnoreCase(encoding))
 359:               {
 360:                 out.write(0xfe);
 361:                 out.write(0xff);
 362:               }
 363:             if (!"yes".equals(node.getUserData("omit-xml-declaration")) &&
 364:                 xmlDeclaration)
 365:               {
 366:                 Document doc = (node instanceof Document) ?
 367:                   (Document) node : null;
 368:                 String version = (doc != null) ? doc.getXmlVersion() : null;
 369:                 if (version == null)
 370:                   version = (String) node.getUserData("version");
 371:                 if (version == null)
 372:                   version = "1.0";
 373:                 out.write(BRA);
 374:                 out.write(0x3f);
 375:                 out.write("xml version=\"".getBytes("US-ASCII"));
 376:                 out.write(version.getBytes("US-ASCII"));
 377:                 out.write(0x22);
 378:                 if (!("UTF-8".equalsIgnoreCase(encoding)))
 379:                   {
 380:                     out.write(" encoding=\"".getBytes("US-ASCII"));
 381:                     out.write(encoding.getBytes("US-ASCII"));
 382:                     out.write(0x22);
 383:                   }
 384:                 if ((doc != null && doc.getXmlStandalone()) ||
 385:                     "yes".equals(node.getUserData("standalone")))
 386:                   out.write(" standalone=\"yes\"".getBytes("US-ASCII"));
 387:                 out.write(0x3f);
 388:                 out.write(KET);
 389:                 out.write(encodeText(eol));
 390:               }
 391:             // TODO warn if not outputting the declaration would be a
 392:             // problem
 393:           }
 394:         else if (mode == Stylesheet.OUTPUT_HTML)
 395:           {
 396:             // Ensure that encoding is accessible if head element is present
 397:             String mediaType = (String) node.getUserData("media-type");
 398:             if (mediaType == null)
 399:               mediaType = "text/html";
 400:             String contentType = mediaType + "; charset=" +
 401:               ((encoding.indexOf(' ') != -1) ?
 402:                 "\"" + encoding + "\"" :
 403:                 encoding);
 404:             Document doc = (node instanceof Document) ? (Document) node :
 405:               node.getOwnerDocument();
 406:             Node html = null;
 407:             for (Node ctx = node.getFirstChild(); ctx != null;
 408:                  ctx = ctx.getNextSibling())
 409:               {
 410:                 if (ctx.getNodeType() == Node.ELEMENT_NODE &&
 411:                     isHTMLElement(ctx, "html"))
 412:                   {
 413:                     html = ctx;
 414:                     break;
 415:                   }
 416:               }
 417:             if (html != null)
 418:               {
 419:                 Node head = null;
 420:                 for (Node ctx = html.getFirstChild(); ctx != null;
 421:                      ctx = ctx.getNextSibling())
 422:                   {
 423:                     if (isHTMLElement(ctx, "head"))
 424:                       {
 425:                         head = ctx;
 426:                         break;
 427:                       }
 428:                   }
 429:                 if (head != null)
 430:                   {
 431:                     Node meta = null;
 432:                     Node metaContent = null;
 433:                     for (Node ctx = head.getFirstChild(); ctx != null;
 434:                          ctx = ctx.getNextSibling())
 435:                       {
 436:                         if (isHTMLElement(ctx, "meta"))
 437:                           {
 438:                             NamedNodeMap metaAttrs = ctx.getAttributes();
 439:                             int len = metaAttrs.getLength();
 440:                             String httpEquiv = null;
 441:                             Node content = null;
 442:                             for (int i = 0; i < len; i++)
 443:                               {
 444:                                 Node attr = metaAttrs.item(i);
 445:                                 String attrName = attr.getNodeName();
 446:                                 if ("http-equiv".equalsIgnoreCase(attrName))
 447:                                   httpEquiv = attr.getNodeValue();
 448:                                 else if ("content".equalsIgnoreCase(attrName))
 449:                                   content = attr;
 450:                               }
 451:                             if ("Content-Type".equalsIgnoreCase(httpEquiv))
 452:                               {
 453:                                 meta = ctx;
 454:                                 metaContent = content;
 455:                                 break;
 456:                               }
 457:                           }
 458:                       }
 459:                     if (meta == null)
 460:                       {
 461:                         meta = doc.createElement("meta");
 462:                         // Insert first
 463:                         Node first = head.getFirstChild();
 464:                         if (first == null)
 465:                           head.appendChild(meta);
 466:                         else
 467:                           head.insertBefore(meta, first);
 468:                         Node metaHttpEquiv = doc.createAttribute("http-equiv");
 469:                         meta.getAttributes().setNamedItem(metaHttpEquiv);
 470:                         metaHttpEquiv.setNodeValue("Content-Type");
 471:                       }
 472:                     if (metaContent == null)
 473:                       {
 474:                         metaContent = doc.createAttribute("content");
 475:                         meta.getAttributes().setNamedItem(metaContent);
 476:                       }
 477:                     metaContent.setNodeValue(contentType);
 478:                     htmlEncoded = true;
 479:                   }
 480:               }
 481:           }
 482:         children = node.getFirstChild();
 483:         if (children != null)
 484:           serialize(children, out, convertToCdata);
 485:         break;
 486:       case Node.DOCUMENT_TYPE_NODE:
 487:         DocumentType doctype = (DocumentType) node;
 488:         out.write(BRA);
 489:         out.write(BANG);
 490:         out.write(encodeText("DOCTYPE "));
 491:         value = doctype.getNodeName();
 492:         out.write(encodeText(value));
 493:         String publicId = doctype.getPublicId();
 494:         if (publicId != null)
 495:           {
 496:             out.write(encodeText(" PUBLIC "));
 497:             out.write(APOS);
 498:             out.write(encodeText(publicId));
 499:             out.write(APOS);
 500:           }
 501:         String systemId = doctype.getSystemId();
 502:         if (systemId != null)
 503:           {
 504:             out.write(encodeText(" SYSTEM "));
 505:             out.write(APOS);
 506:             out.write(encodeText(systemId));
 507:             out.write(APOS);
 508:           }
 509:         String internalSubset = doctype.getInternalSubset();
 510:         if (internalSubset != null)
 511:           {
 512:             out.write(encodeText(internalSubset));
 513:           }
 514:         out.write(KET);
 515:         out.write(eol.getBytes(encoding));
 516:         break;
 517:       case Node.ENTITY_REFERENCE_NODE:
 518:         value = "&" + node.getNodeValue() + ";";
 519:         out.write(encodeText(value));
 520:         break;
 521:       case Node.PROCESSING_INSTRUCTION_NODE:
 522:         value = "<?" + node.getNodeName() + " " + node.getNodeValue() + "?>";
 523:         out.write(encodeText(value));
 524:         Node pp = node.getParentNode();
 525:         if (pp != null && pp.getNodeType() == Node.DOCUMENT_NODE)
 526:           {
 527:             out.write(encodeText(eol));
 528:           }
 529:         break;
 530:       default:
 531:         System.err.println("Unhandled node type: "+nt);
 532:       }
 533:   }
 534: 
 535:   boolean isHTMLElement(Node node, String name)
 536:   {
 537:     if (node.getNodeType() != Node.ELEMENT_NODE)
 538:       return false;
 539:     String localName = node.getLocalName();
 540:     if (localName == null)
 541:       localName = node.getNodeName();
 542:     if (!name.equalsIgnoreCase(localName))
 543:       return false;
 544:     String uri = node.getNamespaceURI();
 545:     return (uri == null || HTML_URIS.contains(uri));
 546:   }
 547: 
 548:   boolean isDefined(String uri, String prefix)
 549:   {
 550:     if (XMLConstants.XML_NS_URI.equals(uri))
 551:       return "xml".equals(prefix);
 552:     if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri))
 553:       return "xmlns".equals(prefix);
 554:     if (prefix == null)
 555:       prefix = "";
 556:     for (Iterator i = namespaces.iterator(); i.hasNext(); )
 557:       {
 558:         Map ctx = (Map) i.next();
 559:         String val = (String) ctx.get(uri);
 560:         if (val != null && val.equals(prefix))
 561:           return true;
 562:       }
 563:     return false;
 564:   }
 565: 
 566:   void pushNamespaceContext()
 567:   {
 568:     namespaces.addFirst(new HashMap());
 569:   }
 570: 
 571:   String define(String uri, String prefix)
 572:   {
 573:     if (namespaces.isEmpty())
 574:       return prefix;
 575:     HashMap ctx = (HashMap) namespaces.getFirst();
 576:     while (ctx.containsValue(prefix))
 577:       {
 578:         // Fabricate new prefix
 579:         prefix = prefix + "_";
 580:       }
 581:     ctx.put(uri, prefix);
 582:     return prefix;
 583:   }
 584: 
 585:   void popNamespaceContext()
 586:   {
 587:     namespaces.removeFirst();
 588:   }
 589: 
 590:   final byte[] encodeText(String text)
 591:     throws IOException
 592:   {
 593:     encoder.reset();
 594:     boolean htmlNeedingEncoding =
 595:       (mode == Stylesheet.OUTPUT_HTML && !htmlEncoded);
 596:     if (!encoder.canEncode(text) || htmlNeedingEncoding)
 597:       {
 598:         // Check each character
 599:         StringBuffer buf = new StringBuffer();
 600:         int len = text.length();
 601:         for (int i = 0; i < len; i++)
 602:           {
 603:             char c = text.charAt(i);
 604:             if (!encoder.canEncode(c))
 605:               {
 606:                 // Replace with character entity reference
 607:                 String hex = Integer.toHexString((int) c);
 608:                 buf.append("&#x");
 609:                 buf.append(hex);
 610:                 buf.append(';');
 611:               }
 612:             else if (htmlNeedingEncoding)
 613:               {
 614:                 String entityName = getHTMLCharacterEntity(c);
 615:                 if (entityName != null)
 616:                   {
 617:                     buf.append('&');
 618:                     buf.append(entityName);
 619:                     buf.append(';');
 620:                   }
 621:                 else
 622:                   buf.append(c);
 623:               }
 624:             else
 625:               buf.append(c);
 626:           }
 627:         text = buf.toString();
 628:       }
 629:     ByteBuffer encoded = encoder.encode(CharBuffer.wrap(text));
 630:     int len = encoded.limit() - encoded.position();
 631:     if (encoded.hasArray())
 632:       {
 633:         byte[] ret = encoded.array();
 634:         if (ret.length > len)
 635:           {
 636:             // Why?
 637:             byte[] ret2 = new byte[len];
 638:             System.arraycopy(ret, 0, ret2, 0, len);
 639:             ret = ret2;
 640:           }
 641:         return ret;
 642:       }
 643:     encoded.flip();
 644:     byte[] ret = new byte[len];
 645:     encoded.get(ret, 0, len);
 646:     return ret;
 647:   }
 648: 
 649:   String encode(String text, boolean encodeCtl, boolean inAttr)
 650:   {
 651:     int len = text.length();
 652:     StringBuffer buf = null;
 653:     for (int i = 0; i < len; i++)
 654:       {
 655:         char c = text.charAt(i);
 656:         if (c == '<')
 657:           {
 658:             if (buf == null)
 659:               buf = new StringBuffer(text.substring(0, i));
 660:             buf.append("&lt;");
 661:           }
 662:         else if (c == '>')
 663:           {
 664:             if (buf == null)
 665:               buf = new StringBuffer(text.substring(0, i));
 666:             buf.append("&gt;");
 667:           }
 668:         else if (c == '&')
 669:           {
 670:             if (mode == Stylesheet.OUTPUT_HTML && (i + 1) < len &&
 671:                 text.charAt(i + 1) == '{')
 672:               {
 673:                 if (buf != null)
 674:                   buf.append(c);
 675:               }
 676:             else
 677:               {
 678:                 if (buf == null)
 679:                   buf = new StringBuffer(text.substring(0, i));
 680:                 buf.append("&amp;");
 681:               }
 682:           }
 683:         else if (c == '\'' && inAttr)
 684:           {
 685:             if (buf == null)
 686:               buf = new StringBuffer(text.substring(0, i));
 687:             if (mode == Stylesheet.OUTPUT_HTML)
 688:               // HTML does not define &apos;, use character entity ref
 689:               buf.append("&#x27;");
 690:             else
 691:               buf.append("&apos;");
 692:           }
 693:         else if (c == '"' && inAttr)
 694:           {
 695:             if (buf == null)
 696:               buf = new StringBuffer(text.substring(0, i));
 697:             buf.append("&quot;");
 698:           }
 699:         else if (encodeCtl)
 700:           {
 701:             if (c < 0x20)
 702:               {
 703:                 if (buf == null)
 704:                   buf = new StringBuffer(text.substring(0, i));
 705:                 buf.append('&');
 706:                 buf.append('#');
 707:                 buf.append((int) c);
 708:                 buf.append(';');
 709:               }
 710:             else if (buf != null)
 711:               buf.append(c);
 712:           }
 713:         else if (buf != null)
 714:           buf.append(c);
 715:       }
 716:     return (buf == null) ? text : buf.toString();
 717:   }
 718: 
 719:   String toString(Node node)
 720:   {
 721:     ByteArrayOutputStream out = new ByteArrayOutputStream();
 722:     try
 723:       {
 724:         serialize(node, out);
 725:         return new String(out.toByteArray(), encoding);
 726:       }
 727:     catch (IOException e)
 728:       {
 729:         throw new RuntimeException(e.getMessage());
 730:       }
 731:   }
 732: 
 733:   boolean isHTMLBoolean(Attr attr, String attrName)
 734:   {
 735:     attrName = attrName.toLowerCase();
 736:     Node element = attr.getOwnerElement();
 737:     String elementName = element.getLocalName();
 738:     if (elementName == null)
 739:       {
 740:         elementName = element.getNodeName();
 741:       }
 742:     elementName = elementName.toLowerCase();
 743:     Collection attributes =
 744:       (Collection) HTML_BOOLEAN_ATTRIBUTES.get(elementName);
 745:     return (attributes != null && attributes.contains(attrName));
 746:   }
 747: 
 748:   static String getHTMLCharacterEntity(char c)
 749:   {
 750:     // Hardcode these here to avoid loading the HTML DTD
 751:     switch (c)
 752:       {
 753:       case 160: return "nbsp";
 754:       case 161: return "iexcl";
 755:       case 162: return "cent";
 756:       case 163: return "pound";
 757:       case 164: return "curren";
 758:       case 165: return "yen";
 759:       case 166: return "brvbar";
 760:       case 167: return "sect";
 761:       case 168: return "uml";
 762:       case 169: return "copy";
 763:       case 170: return "ordf";
 764:       case 171: return "laquo";
 765:       case 172: return "not";
 766:       case 173: return "shy";
 767:       case 174: return "reg";
 768:       case 175: return "macr";
 769:       case 176: return "deg";
 770:       case 177: return "plusmn";
 771:       case 178: return "sup2";
 772:       case 179: return "sup3";
 773:       case 180: return "acute";
 774:       case 181: return "micro";
 775:       case 182: return "para";
 776:       case 183: return "middot";
 777:       case 184: return "cedil";
 778:       case 185: return "sup1";
 779:       case 186: return "ordm";
 780:       case 187: return "raquo";
 781:       case 188: return "frac14";
 782:       case 189: return "frac12";
 783:       case 190: return "frac34";
 784:       case 191: return "iquest";
 785:       case 192: return "Agrave";
 786:       case 193: return "Aacute";
 787:       case 194: return "Acirc";
 788:       case 195: return "Atilde";
 789:       case 196: return "Auml";
 790:       case 197: return "Aring";
 791:       case 198: return "AElig";
 792:       case 199: return "Ccedil";
 793:       case 200: return "Egrave";
 794:       case 201: return "Eacute";
 795:       case 202: return "Ecirc";
 796:       case 203: return "Euml";
 797:       case 204: return "Igrave";
 798:       case 205: return "Iacute";
 799:       case 206: return "Icirc";
 800:       case 207: return "Iuml";
 801:       case 208: return "ETH";
 802:       case 209: return "Ntilde";
 803:       case 210: return "Ograve";
 804:       case 211: return "Oacute";
 805:       case 212: return "Ocirc";
 806:       case 213: return "Otilde";
 807:       case 214: return "Ouml";
 808:       case 215: return "times";
 809:       case 216: return "Oslash";
 810:       case 217: return "Ugrave";
 811:       case 218: return "Uacute";
 812:       case 219: return "Ucirc";
 813:       case 220: return "Uuml";
 814:       case 221: return "Yacute";
 815:       case 222: return "THORN";
 816:       case 223: return "szlig";
 817:       case 224: return "agrave";
 818:       case 225: return "aacute";
 819:       case 226: return "acirc";
 820:       case 227: return "atilde";
 821:       case 228: return "auml";
 822:       case 229: return "aring";
 823:       case 230: return "aelig";
 824:       case 231: return "ccedil";
 825:       case 232: return "egrave";
 826:       case 233: return "eacute";
 827:       case 234: return "ecirc";
 828:       case 235: return "euml";
 829:       case 236: return "igrave";
 830:       case 237: return "iacute";
 831:       case 238: return "icirc";
 832:       case 239: return "iuml";
 833:       case 240: return "eth";
 834:       case 241: return "ntilde";
 835:       case 242: return "ograve";
 836:       case 243: return "oacute";
 837:       case 244: return "ocirc";
 838:       case 245: return "otilde";
 839:       case 246: return "ouml";
 840:       case 247: return "divide";
 841:       case 248: return "oslash";
 842:       case 249: return "ugrave";
 843:       case 250: return "uacute";
 844:       case 251: return "ucirc";
 845:       case 252: return "uuml";
 846:       case 253: return "yacute";
 847:       case 254: return "thorn";
 848:       case 255: return "yuml";
 849:       default: return null;
 850:       }
 851:   }
 852: 
 853: }