Source for javax.swing.text.html.parser.DTD

   1: /* DTD.java --
   2:    Copyright (C) 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package javax.swing.text.html.parser;
  40: 
  41: import java.io.DataInputStream;
  42: import java.io.EOFException;
  43: import java.io.IOException;
  44: import java.io.ObjectInputStream;
  45: import java.lang.reflect.Field;
  46: import java.lang.reflect.Modifier;
  47: import java.util.BitSet;
  48: import java.util.Hashtable;
  49: import java.util.StringTokenizer;
  50: import java.util.Vector;
  51: 
  52: /**
  53:  * <p>Representation or the SGML DTD document.
  54:  * Provides basis for describing a syntax of the
  55:  * HTML documents. The fields of this class are NOT initialized in
  56:  * constructor. You need to do this separately before passing this data
  57:  * structure to the HTML parser. The subclasses with the fields, pre-
  58:  * initialized, for example, for HTML 4.01, can be available only between
  59:  * the implementation specific classes
  60:  * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F }
  61:  * in this implementation).</p>
  62:  * <p>
  63:  * If you need more information about SGML DTD documents,
  64:  * the author suggests to read SGML tutorial on
  65:  * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html"
  66:  * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>.
  67:  * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
  68:  * Oxford University Press, 688 p, ISBN: 0198537379.
  69:  * </p>
  70:  * <p>
  71:  * Warning: the html, head and other tag fields will only be automatically
  72:  * assigned if the VM has the correctly implemented reflection mechanism.
  73:  * As these fields are not used anywhere in the implementation, not
  74:  * exception will be thrown in the opposite case.
  75:  * </p>
  76:  *
  77:  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
  78:  */
  79: public class DTD
  80:   implements DTDConstants
  81: {
  82:   /**
  83:    * The version of the persistent data format.
  84:    */
  85:   public static int FILE_VERSION = 1;
  86: 
  87:   /**
  88:    * The table of existing available DTDs.
  89:    */
  90:   static Hashtable dtdHash = new Hashtable();
  91: 
  92:   /**
  93:    * The applet element for this DTD.
  94:    */
  95:   public Element applet;
  96: 
  97:   /**
  98:    * The base element for this DTD.
  99:    */
 100:   public Element base;
 101: 
 102:   /**
 103:    * The body element for this DTD.
 104:    */
 105:   public Element body;
 106: 
 107:   /**
 108:    * The head element for this DTD.
 109:    */
 110:   public Element head;
 111: 
 112:   /**
 113:    * The html element for this DTD.
 114:    */
 115:   public Element html;
 116: 
 117:   /**
 118:    * The isindex element of for this DTD.
 119:    */
 120:   public Element isindex;
 121: 
 122:   /**
 123:    * The meta element for this DTD.
 124:    */
 125:   public Element meta;
 126: 
 127:   /**
 128:    * The p element for this DTD.
 129:    */
 130:   public Element p;
 131: 
 132:   /**
 133:    * The param element for this DTD.
 134:    */
 135:   public Element param;
 136: 
 137:   /**
 138:    * The pcdata for this DTD.
 139:    */
 140:   public Element pcdata;
 141: 
 142:   /**
 143:    * The title element for this DTD.
 144:    */
 145:   public Element title;
 146: 
 147:   /**
 148:    * The element for accessing all DTD elements by name.
 149:    */
 150:   public Hashtable elementHash = new Hashtable();
 151: 
 152:   /**
 153:    * The entity table for accessing all DTD entities by name.
 154:    */
 155:   public Hashtable entityHash = new Hashtable();
 156: 
 157:   /**
 158:    *  The name of this DTD.
 159:    */
 160:   public String name;
 161: 
 162:   /**
 163:    * Contains all elements in this DTD. The
 164:    * javax.swing.text.html.parser.Element#index field of all elements
 165:    * in this vector is set to the element position in this vector.
 166:    */
 167:   public Vector elements = new Vector();
 168: 
 169:   /** Create a new DTD with the specified name. */
 170:   protected DTD(String a_name)
 171:   {
 172:     name = a_name;
 173:   }
 174: 
 175:   /** Get this DTD by name. The current implementation
 176:    * only looks in the internal table of DTD documents. If no corresponding
 177:    * entry is found, the new entry is created, placed into
 178:    * the table and returned. */
 179:   public static DTD getDTD(String name)
 180:                     throws IOException
 181:   {
 182:     DTD d = (DTD) dtdHash.get(name);
 183: 
 184:     if (d == null)
 185:       {
 186:         d = new DTD(name);
 187:         dtdHash.put(d.name, d);
 188:       }
 189: 
 190:     return d;
 191:   }
 192: 
 193:   /**
 194:    * Get the element by the element name. If the element is not yet
 195:    * defined, it is newly created and placed into the element table.
 196:    * If the element name matches (ingoring case) a public non static
 197:    * element field in this class, this field is assigned to the value
 198:    * of the newly created element.
 199:    */
 200:   public Element getElement(String element_name)
 201:   {
 202:     return newElement(element_name);
 203:   }
 204: 
 205:   /**
 206:    * Get the element by the value of its
 207:    * {@link javax.swing.text.html.parser.Element#index} field.
 208:    */
 209:   public Element getElement(int index)
 210:   {
 211:     return (Element) elements.get(index);
 212:   }
 213: 
 214:   /**
 215:    * Get the entity with the given identifier.
 216:    * @param id that can be returned by
 217:    * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)}
 218:    * @return The entity from this DTD or null if there is no entity with
 219:    * such id or such entity is not present in the table of this instance.
 220:    */
 221:   public Entity getEntity(int id)
 222:   {
 223:     String name = Entity.mapper.get(id);
 224: 
 225:     if (name != null)
 226:       return (Entity) entityHash.get(name);
 227:     else
 228:       return null;
 229:   }
 230: 
 231:   /**
 232:    * Get the named entity by its name.
 233:    */
 234:   public Entity getEntity(String entity_name)
 235:   {
 236:     return (Entity) entityHash.get(entity_name);
 237:   }
 238: 
 239:   /**
 240:    * Get the name of this instance of DTD
 241:    */
 242:   public String getName()
 243:   {
 244:     return name;
 245:   }
 246: 
 247:   /**
 248:    * Creates, adds into the entity table and returns the
 249:    * character entity like <code>&amp;lt;</code>
 250:    *  (means '<code>&lt;</code>' );
 251:    * @param name The entity name (without heading &amp; and closing ;)
 252:    * @param type The entity type
 253:    * @param character The entity value (single character)
 254:    * @return The created entity
 255:    */
 256:   public Entity defEntity(String name, int type, int character)
 257:   {
 258:     Entity e = newEntity(name, type);
 259:     e.data = new char[] { (char) character };
 260:     return e;
 261:   }
 262: 
 263:   /**
 264:    * Define the attributes for the element with the given name.
 265:    * If the element is not exist, it is created.
 266:    * @param forElement
 267:    * @param attributes
 268:    */
 269:   public void defineAttributes(String forElement, AttributeList attributes)
 270:   {
 271:     Element e = (Element) elementHash.get(forElement.toLowerCase());
 272: 
 273:     if (e == null)
 274:       e = newElement(forElement);
 275: 
 276:     e.atts = attributes;
 277:   }
 278: 
 279:   /**
 280:    * Defines the element and adds it to the element table. Sets the
 281:    * <code>Element.index</code> field to the value, unique for this
 282:    * instance of DTD. If the element with the given name already exists,
 283:    * replaces all other its settings by the method argument values.
 284:    * @param name the name of the element
 285:    * @param type the type of the element
 286:    * @param headless true if the element needs no starting tag
 287:    * (should not occur in HTML).
 288:    * @param tailless true if the element needs no ending tag (like
 289:    * <code>&lt;hr&gt;</code>
 290:    * @param content the element content
 291:    * @param exclusions the set of elements that must not occur inside
 292:    * this element. The <code>Element.index</code> value defines which
 293:    * bit in this bitset corresponds to that element.
 294:    * @param inclusions the set of elements that can occur inside this
 295:    * element. the <code>Element.index</code> value defines which
 296:    * bit in this bitset corresponds to that element.
 297:    * @param attributes the element attributes.
 298:    * @return the newly defined element.
 299:    */
 300:   public Element defineElement(String name, int type, boolean headless,
 301:                                boolean tailless, ContentModel content,
 302:                                BitSet exclusions, BitSet inclusions,
 303:                                AttributeList attributes
 304:                               )
 305:   {
 306:     Element e = newElement(name);
 307:     e.type = type;
 308:     e.oStart = headless;
 309:     e.oEnd = tailless;
 310:     e.content = content;
 311:     e.exclusions = exclusions;
 312:     e.inclusions = inclusions;
 313:     e.atts = attributes;
 314: 
 315:     return e;
 316:   }
 317: 
 318:   /**
 319:    * Creates, intializes and adds to the entity table the new
 320:    * entity.
 321:    * @param name the name of the entity
 322:    * @param type the type of the entity
 323:    * @param data the data section of the entity
 324:    * @return the created entity
 325:    */
 326:   public Entity defineEntity(String name, int type, char[] data)
 327:   {
 328:     Entity e = newEntity(name, type);
 329:     e.data = data;
 330: 
 331:     return e;
 332:   }
 333: 
 334:   /** Place this DTD into the DTD table. */
 335:   public static void putDTDHash(String name, DTD dtd)
 336:   {
 337:     dtdHash.put(name, dtd);
 338:   }
 339: 
 340:   /**
 341:    * <p>Reads DTD from an archived format. This format is not standardized
 342:    * and differs between implementations.</p><p> This implementation
 343:    * reads and defines all entities and elements using
 344:    * ObjectInputStream. The elements and entities can be written into the
 345:    * stream in any order. The objects other than elements and entities
 346:    * are ignored.</p>
 347:    * @param stream A data stream to read from.
 348:    * @throws java.io.IOException If one is thrown by the input stream
 349:    */
 350:   public void read(DataInputStream stream)
 351:             throws java.io.IOException
 352:   {
 353:     ObjectInputStream oi = new ObjectInputStream(stream);
 354:     Object def;
 355:     try
 356:       {
 357:         while (true)
 358:           {
 359:             def = oi.readObject();
 360:             if (def instanceof Element)
 361:               {
 362:                 Element e = (Element) def;
 363:                 elementHash.put(e.name.toLowerCase(), e);
 364:                 assignField(e);
 365:               }
 366:             else if (def instanceof Entity)
 367:               {
 368:                 Entity e = (Entity) def;
 369:                 entityHash.put(e.name, e);
 370:               }
 371:           }
 372:       }
 373:     catch (ClassNotFoundException ex)
 374:       {
 375:         throw new IOException(ex.getMessage());
 376:       }
 377:     catch (EOFException ex)
 378:       {
 379:         // ok EOF
 380:       }
 381:   }
 382: 
 383:   /**
 384:    * Returns the name of this instance of DTD.
 385:    */
 386:   public String toString()
 387:   {
 388:     return name;
 389:   }
 390: 
 391:   /**
 392:    * Creates and returns new attribute (not an attribute list).
 393:    * @param name the name of this attribute
 394:    * @param type the type of this attribute (FIXED, IMPLIED or
 395:    * REQUIRED from <code>DTDConstants</code>).
 396:    * @param modifier the modifier of this attribute
 397:    * @param default_value the default value of this attribute
 398:    * @param allowed_values the allowed values of this attribute. The multiple
 399:    * possible values in this parameter are supposed to be separated by
 400:    * '|', same as in SGML DTD <code>&lt;!ATTLIST </code>tag. This parameter
 401:    * can be null if no list of allowed values is specified.
 402:    * @param atts the previous attribute of this element. This is
 403:    * placed to the field
 404:    * {@link javax.swing.text.html.parser.AttributeList#next },
 405:    * creating a linked list.
 406:    * @return The attributes.
 407:    */
 408:   protected AttributeList defAttributeList(String name, int type, int modifier,
 409:                                            String default_value,
 410:                                            String allowed_values,
 411:                                            AttributeList atts
 412:                                           )
 413:   {
 414:     AttributeList al = new AttributeList(name);
 415:     al.modifier = modifier;
 416:     al.value = default_value;
 417:     al.next = atts;
 418: 
 419:     if (allowed_values != null)
 420:       {
 421:         StringTokenizer st = new StringTokenizer(allowed_values, " \t|");
 422:         Vector v = new Vector(st.countTokens());
 423: 
 424:         while (st.hasMoreTokens())
 425:           v.add(st.nextToken());
 426: 
 427:         al.values = v;
 428:       }
 429: 
 430:     return al;
 431:   }
 432: 
 433:   /**
 434:    * Creates a new content model.
 435:    * @param type specifies the BNF operation for this content model.
 436:    * The valid operations are documented in the
 437:    * {@link javax.swing.text.html.parser.ContentModel#type }.
 438:    * @param content the content of this content model
 439:    * @param next if the content model is specified by BNF-like
 440:    * expression, contains the rest of this expression.
 441:    * @return The newly created content model.
 442:    */
 443:   protected ContentModel defContentModel(int type, Object content,
 444:                                          ContentModel next
 445:                                         )
 446:   {
 447:     ContentModel model = new ContentModel();
 448:     model.type = type;
 449:     model.next = next;
 450:     model.content = content;
 451: 
 452:     return model;
 453:   }
 454: 
 455:   /**
 456:    * Defines a new element and adds it to the element table.
 457:    * If the element alredy exists,
 458:    * overrides it settings with the specified values.
 459:    * @param name the name of the new element
 460:    * @param type the type of the element
 461:    * @param headless true if the element needs no starting tag
 462:    * @param tailless true if the element needs no closing tag
 463:    * @param content the element content.
 464:    * @param exclusions the elements that must be excluded from the
 465:    * content of this element, in all levels of the hierarchy.
 466:    * @param inclusions the elements that can be included as the
 467:    * content of this element.
 468:    * @param attributes the element attributes.
 469:    * @return the created or updated element.
 470:    */
 471:   protected Element defElement(String name, int type, boolean headless,
 472:                                boolean tailless, ContentModel content,
 473:                                String[] exclusions, String[] inclusions,
 474:                                AttributeList attributes
 475:                               )
 476:   {
 477:     // compute the bit sets
 478:     BitSet exclude = bitSet(exclusions);
 479:     BitSet include = bitSet(inclusions);
 480: 
 481:     Element e =
 482:       defineElement(name, type, headless, tailless, content, exclude, include,
 483:                     attributes
 484:                    );
 485: 
 486:     return e;
 487:   }
 488: 
 489:   /**
 490:    * Creates, intializes and adds to the entity table the new
 491:    * entity.
 492:    * @param name the name of the entity
 493:    * @param type the type of the entity
 494:    * @param data the data section of the entity
 495:    * @return the created entity
 496:    */
 497:   protected Entity defEntity(String name, int type, String data)
 498:   {
 499:     Entity e = newEntity(name, type);
 500:     e.data = data.toCharArray();
 501: 
 502:     return e;
 503:   }
 504: 
 505:   private void assignField(Element e)
 506:   {
 507:     String element_name = e.name;
 508:     try
 509:       {
 510:         // Assign the field via reflection.
 511:         Field f = getClass().getField(element_name.toLowerCase());
 512:         if ((f.getModifiers() & Modifier.PUBLIC) != 0)
 513:           if ((f.getModifiers() & Modifier.STATIC) == 0)
 514:             if (f.getType().isAssignableFrom(e.getClass()))
 515:               f.set(this, e);
 516:       }
 517:     catch (IllegalAccessException ex)
 518:       {
 519:         unexpected(ex);
 520:       }
 521:     catch (NoSuchFieldException ex)
 522:       {
 523:         // This is ok.
 524:       }
 525: 
 526:     // Some virtual machines may still lack the proper
 527:     // implementation of reflection. As the tag fields
 528:     // are not used anywhere in this implementation,
 529:     // (and this class is also rarely used by the end user),
 530:     // it may be better not to crash everything by throwing an error
 531:     // for each case when the HTML parsing is required.
 532:     catch (Throwable t)
 533:       {
 534:         // This VM has no reflection mechanism implemented!
 535:         if (t instanceof OutOfMemoryError)
 536:           throw (Error) t;
 537:       }
 538:   }
 539: 
 540:   /**
 541:    * Create the bit set for this array of elements.
 542:    * The unknown elements are automatically defined and added
 543:    * to the element table.
 544:    * @param elements
 545:    * @return The bit set.
 546:    */
 547:   private BitSet bitSet(String[] elements)
 548:   {
 549:     BitSet b = new BitSet();
 550: 
 551:     for (int i = 0; i < elements.length; i++)
 552:       {
 553:         Element e = getElement(elements [ i ]);
 554: 
 555:         if (e == null)
 556:           e = newElement(elements [ i ]);
 557: 
 558:         b.set(e.index);
 559:       }
 560: 
 561:     return b;
 562:   }
 563: 
 564:   /**
 565:    * Find the element with the given name in the element table.
 566:    * If not find, create a new element with this name and add to the
 567:    * table.
 568:    * @param name the name of the element
 569:    * @return the found or created element.
 570:    */
 571:   private Element newElement(String name)
 572:   {
 573:     Element e = (Element) elementHash.get(name.toLowerCase());
 574: 
 575:     if (e == null)
 576:       {
 577:         e = new Element();
 578:         e.name = name;
 579:         e.index = elements.size();
 580:         elements.add(e);
 581:         elementHash.put(e.name.toLowerCase(), e);
 582:         assignField(e);
 583:       }
 584:     return e;
 585:   }
 586: 
 587:   /**
 588:    * Creates and adds to the element table the entity with an
 589:    * unitialized data section. Used internally.
 590:    * @param name the name of the entity
 591:    * @param type the type of the entity, a bitwise combination
 592:    * of GENERAL, PARAMETER, SYSTEM and PUBLIC.
 593:    * @throws an error if the parameter is both GENERAL and PARAMETER
 594:    * of both PUBLIC and SYSTEM.
 595:    * @return the created entity
 596:    */
 597:   private Entity newEntity(String name, int type)
 598:   {
 599:     Entity e = new Entity(name, type, null);
 600:     entityHash.put(e.name, e);
 601:     return e;
 602:   }
 603: 
 604:   private void unexpected(Exception ex)
 605:   {
 606:     throw new Error("This should never happen, report a bug", ex);
 607:   }
 608: }