Source for javax.swing.text.html.parser.DTD

   1: /* DTD.java --
   2:    Copyright (C) 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package javax.swing.text.html.parser;
  40: 
  41: import java.io.DataInputStream;
  42: import java.io.EOFException;
  43: import java.io.IOException;
  44: import java.io.ObjectInputStream;
  45: import java.lang.reflect.Field;
  46: import java.lang.reflect.Modifier;
  47: import java.util.BitSet;
  48: import java.util.Hashtable;
  49: import java.util.StringTokenizer;
  50: import java.util.Vector;
  51: 
  52: /**
  53:  * <p>Representation or the SGML DTD document.
  54:  * Provides basis for describing a syntax of the
  55:  * HTML documents. The fields of this class are NOT initialized in
  56:  * constructor. You need to do this separately before passing this data
  57:  * structure to the HTML parser. The subclasses with the fields, pre-
  58:  * initialized, for example, for HTML 4.01, can be available only between
  59:  * the implementation specific classes
  60:  * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F }
  61:  * in this implementation).</p>
  62:  * <p>
  63:  * If you need more information about SGML DTD documents,
  64:  * the author suggests to read SGML tutorial on
  65:  * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html"
  66:  * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>.
  67:  * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
  68:  * Oxford University Press, 688 p, ISBN: 0198537379.
  69:  * </p>
  70:  * <p>
  71:  * Warning: the html, head and other tag fields will only be automatically
  72:  * assigned if the VM has the correctly implemented reflection mechanism.
  73:  * As these fields are not used anywhere in the implementation, not
  74:  * exception will be thrown in the opposite case.
  75:  * </p>
  76:  *
  77:  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
  78:  */
  79: public class DTD
  80:   implements DTDConstants
  81: {
  82:   /**
  83:    * The version of the persistent data format.
  84:    * @specnote This was made <code>final</code> in 1.5.
  85:    */
  86:   public static final int FILE_VERSION = 1;
  87: 
  88:   /**
  89:    * The table of existing available DTDs.
  90:    */
  91:   static Hashtable dtdHash = new Hashtable();
  92: 
  93:   /**
  94:    * The applet element for this DTD.
  95:    */
  96:   public Element applet;
  97: 
  98:   /**
  99:    * The base element for this DTD.
 100:    */
 101:   public Element base;
 102: 
 103:   /**
 104:    * The body element for this DTD.
 105:    */
 106:   public Element body;
 107: 
 108:   /**
 109:    * The head element for this DTD.
 110:    */
 111:   public Element head;
 112: 
 113:   /**
 114:    * The html element for this DTD.
 115:    */
 116:   public Element html;
 117: 
 118:   /**
 119:    * The isindex element of for this DTD.
 120:    */
 121:   public Element isindex;
 122: 
 123:   /**
 124:    * The meta element for this DTD.
 125:    */
 126:   public Element meta;
 127: 
 128:   /**
 129:    * The p element for this DTD.
 130:    */
 131:   public Element p;
 132: 
 133:   /**
 134:    * The param element for this DTD.
 135:    */
 136:   public Element param;
 137: 
 138:   /**
 139:    * The pcdata for this DTD.
 140:    */
 141:   public Element pcdata;
 142: 
 143:   /**
 144:    * The title element for this DTD.
 145:    */
 146:   public Element title;
 147: 
 148:   /**
 149:    * The element for accessing all DTD elements by name.
 150:    */
 151:   public Hashtable elementHash = new Hashtable();
 152: 
 153:   /**
 154:    * The entity table for accessing all DTD entities by name.
 155:    */
 156:   public Hashtable entityHash = new Hashtable();
 157: 
 158:   /**
 159:    *  The name of this DTD.
 160:    */
 161:   public String name;
 162: 
 163:   /**
 164:    * Contains all elements in this DTD. The
 165:    * javax.swing.text.html.parser.Element#index field of all elements
 166:    * in this vector is set to the element position in this vector.
 167:    */
 168:   public Vector elements = new Vector();
 169: 
 170:   /** Create a new DTD with the specified name. */
 171:   protected DTD(String a_name)
 172:   {
 173:     name = a_name;
 174:   }
 175: 
 176:   /** Get this DTD by name. The current implementation
 177:    * only looks in the internal table of DTD documents. If no corresponding
 178:    * entry is found, the new entry is created, placed into
 179:    * the table and returned. */
 180:   public static DTD getDTD(String name)
 181:                     throws IOException
 182:   {
 183:     DTD d = (DTD) dtdHash.get(name);
 184: 
 185:     if (d == null)
 186:       {
 187:         d = new DTD(name);
 188:         dtdHash.put(d.name, d);
 189:       }
 190: 
 191:     return d;
 192:   }
 193: 
 194:   /**
 195:    * Get the element by the element name. If the element is not yet
 196:    * defined, it is newly created and placed into the element table.
 197:    * If the element name matches (ingoring case) a public non static
 198:    * element field in this class, this field is assigned to the value
 199:    * of the newly created element.
 200:    */
 201:   public Element getElement(String element_name)
 202:   {
 203:     return newElement(element_name);
 204:   }
 205: 
 206:   /**
 207:    * Get the element by the value of its
 208:    * {@link javax.swing.text.html.parser.Element#index} field.
 209:    */
 210:   public Element getElement(int index)
 211:   {
 212:     return (Element) elements.get(index);
 213:   }
 214: 
 215:   /**
 216:    * Get the entity with the given identifier.
 217:    * @param id that can be returned by
 218:    * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)}
 219:    * @return The entity from this DTD or null if there is no entity with
 220:    * such id or such entity is not present in the table of this instance.
 221:    */
 222:   public Entity getEntity(int id)
 223:   {
 224:     String name = Entity.mapper.get(id);
 225: 
 226:     if (name != null)
 227:       return (Entity) entityHash.get(name);
 228:     else
 229:       return null;
 230:   }
 231: 
 232:   /**
 233:    * Get the named entity by its name.
 234:    */
 235:   public Entity getEntity(String entity_name)
 236:   {
 237:     return (Entity) entityHash.get(entity_name);
 238:   }
 239: 
 240:   /**
 241:    * Get the name of this instance of DTD
 242:    */
 243:   public String getName()
 244:   {
 245:     return name;
 246:   }
 247: 
 248:   /**
 249:    * Creates, adds into the entity table and returns the
 250:    * character entity like <code>&amp;lt;</code>
 251:    *  (means '<code>&lt;</code>' );
 252:    * @param name The entity name (without heading &amp; and closing ;)
 253:    * @param type The entity type
 254:    * @param character The entity value (single character)
 255:    * @return The created entity
 256:    */
 257:   public Entity defEntity(String name, int type, int character)
 258:   {
 259:     Entity e = newEntity(name, type);
 260:     e.data = new char[] { (char) character };
 261:     return e;
 262:   }
 263: 
 264:   /**
 265:    * Define the attributes for the element with the given name.
 266:    * If the element is not exist, it is created.
 267:    * @param forElement
 268:    * @param attributes
 269:    */
 270:   public void defineAttributes(String forElement, AttributeList attributes)
 271:   {
 272:     Element e = (Element) elementHash.get(forElement.toLowerCase());
 273: 
 274:     if (e == null)
 275:       e = newElement(forElement);
 276: 
 277:     e.atts = attributes;
 278:   }
 279: 
 280:   /**
 281:    * Defines the element and adds it to the element table. Sets the
 282:    * <code>Element.index</code> field to the value, unique for this
 283:    * instance of DTD. If the element with the given name already exists,
 284:    * replaces all other its settings by the method argument values.
 285:    * @param name the name of the element
 286:    * @param type the type of the element
 287:    * @param headless true if the element needs no starting tag
 288:    * (should not occur in HTML).
 289:    * @param tailless true if the element needs no ending tag (like
 290:    * <code>&lt;hr&gt;</code>
 291:    * @param content the element content
 292:    * @param exclusions the set of elements that must not occur inside
 293:    * this element. The <code>Element.index</code> value defines which
 294:    * bit in this bitset corresponds to that element.
 295:    * @param inclusions the set of elements that can occur inside this
 296:    * element. the <code>Element.index</code> value defines which
 297:    * bit in this bitset corresponds to that element.
 298:    * @param attributes the element attributes.
 299:    * @return the newly defined element.
 300:    */
 301:   public Element defineElement(String name, int type, boolean headless,
 302:                                boolean tailless, ContentModel content,
 303:                                BitSet exclusions, BitSet inclusions,
 304:                                AttributeList attributes
 305:                               )
 306:   {
 307:     Element e = newElement(name);
 308:     e.type = type;
 309:     e.oStart = headless;
 310:     e.oEnd = tailless;
 311:     e.content = content;
 312:     e.exclusions = exclusions;
 313:     e.inclusions = inclusions;
 314:     e.atts = attributes;
 315: 
 316:     return e;
 317:   }
 318: 
 319:   /**
 320:    * Creates, intializes and adds to the entity table the new
 321:    * entity.
 322:    * @param name the name of the entity
 323:    * @param type the type of the entity
 324:    * @param data the data section of the entity
 325:    * @return the created entity
 326:    */
 327:   public Entity defineEntity(String name, int type, char[] data)
 328:   {
 329:     Entity e = newEntity(name, type);
 330:     e.data = data;
 331: 
 332:     return e;
 333:   }
 334: 
 335:   /** Place this DTD into the DTD table. */
 336:   public static void putDTDHash(String name, DTD dtd)
 337:   {
 338:     dtdHash.put(name, dtd);
 339:   }
 340: 
 341:   /**
 342:    * <p>Reads DTD from an archived format. This format is not standardized
 343:    * and differs between implementations.</p><p> This implementation
 344:    * reads and defines all entities and elements using
 345:    * ObjectInputStream. The elements and entities can be written into the
 346:    * stream in any order. The objects other than elements and entities
 347:    * are ignored.</p>
 348:    * @param stream A data stream to read from.
 349:    * @throws java.io.IOException If one is thrown by the input stream
 350:    */
 351:   public void read(DataInputStream stream)
 352:             throws java.io.IOException
 353:   {
 354:     ObjectInputStream oi = new ObjectInputStream(stream);
 355:     Object def;
 356:     try
 357:       {
 358:         while (true)
 359:           {
 360:             def = oi.readObject();
 361:             if (def instanceof Element)
 362:               {
 363:                 Element e = (Element) def;
 364:                 elementHash.put(e.name.toLowerCase(), e);
 365:                 assignField(e);
 366:               }
 367:             else if (def instanceof Entity)
 368:               {
 369:                 Entity e = (Entity) def;
 370:                 entityHash.put(e.name, e);
 371:               }
 372:           }
 373:       }
 374:     catch (ClassNotFoundException ex)
 375:       {
 376:         throw new IOException(ex.getMessage());
 377:       }
 378:     catch (EOFException ex)
 379:       {
 380:         // ok EOF
 381:       }
 382:   }
 383: 
 384:   /**
 385:    * Returns the name of this instance of DTD.
 386:    */
 387:   public String toString()
 388:   {
 389:     return name;
 390:   }
 391: 
 392:   /**
 393:    * Creates and returns new attribute (not an attribute list).
 394:    * @param name the name of this attribute
 395:    * @param type the type of this attribute (FIXED, IMPLIED or
 396:    * REQUIRED from <code>DTDConstants</code>).
 397:    * @param modifier the modifier of this attribute
 398:    * @param default_value the default value of this attribute
 399:    * @param allowed_values the allowed values of this attribute. The multiple
 400:    * possible values in this parameter are supposed to be separated by
 401:    * '|', same as in SGML DTD <code>&lt;!ATTLIST </code>tag. This parameter
 402:    * can be null if no list of allowed values is specified.
 403:    * @param atts the previous attribute of this element. This is
 404:    * placed to the field
 405:    * {@link javax.swing.text.html.parser.AttributeList#next },
 406:    * creating a linked list.
 407:    * @return The attributes.
 408:    */
 409:   protected AttributeList defAttributeList(String name, int type, int modifier,
 410:                                            String default_value,
 411:                                            String allowed_values,
 412:                                            AttributeList atts
 413:                                           )
 414:   {
 415:     AttributeList al = new AttributeList(name);
 416:     al.modifier = modifier;
 417:     al.value = default_value;
 418:     al.next = atts;
 419: 
 420:     if (allowed_values != null)
 421:       {
 422:         StringTokenizer st = new StringTokenizer(allowed_values, " \t|");
 423:         Vector v = new Vector(st.countTokens());
 424: 
 425:         while (st.hasMoreTokens())
 426:           v.add(st.nextToken());
 427: 
 428:         al.values = v;
 429:       }
 430: 
 431:     return al;
 432:   }
 433: 
 434:   /**
 435:    * Creates a new content model.
 436:    * @param type specifies the BNF operation for this content model.
 437:    * The valid operations are documented in the
 438:    * {@link javax.swing.text.html.parser.ContentModel#type }.
 439:    * @param content the content of this content model
 440:    * @param next if the content model is specified by BNF-like
 441:    * expression, contains the rest of this expression.
 442:    * @return The newly created content model.
 443:    */
 444:   protected ContentModel defContentModel(int type, Object content,
 445:                                          ContentModel next
 446:                                         )
 447:   {
 448:     ContentModel model = new ContentModel();
 449:     model.type = type;
 450:     model.next = next;
 451:     model.content = content;
 452: 
 453:     return model;
 454:   }
 455: 
 456:   /**
 457:    * Defines a new element and adds it to the element table.
 458:    * If the element alredy exists,
 459:    * overrides it settings with the specified values.
 460:    * @param name the name of the new element
 461:    * @param type the type of the element
 462:    * @param headless true if the element needs no starting tag
 463:    * @param tailless true if the element needs no closing tag
 464:    * @param content the element content.
 465:    * @param exclusions the elements that must be excluded from the
 466:    * content of this element, in all levels of the hierarchy.
 467:    * @param inclusions the elements that can be included as the
 468:    * content of this element.
 469:    * @param attributes the element attributes.
 470:    * @return the created or updated element.
 471:    */
 472:   protected Element defElement(String name, int type, boolean headless,
 473:                                boolean tailless, ContentModel content,
 474:                                String[] exclusions, String[] inclusions,
 475:                                AttributeList attributes
 476:                               )
 477:   {
 478:     // compute the bit sets
 479:     BitSet exclude = bitSet(exclusions);
 480:     BitSet include = bitSet(inclusions);
 481: 
 482:     Element e =
 483:       defineElement(name, type, headless, tailless, content, exclude, include,
 484:                     attributes
 485:                    );
 486: 
 487:     return e;
 488:   }
 489: 
 490:   /**
 491:    * Creates, intializes and adds to the entity table the new
 492:    * entity.
 493:    * @param name the name of the entity
 494:    * @param type the type of the entity
 495:    * @param data the data section of the entity
 496:    * @return the created entity
 497:    */
 498:   protected Entity defEntity(String name, int type, String data)
 499:   {
 500:     Entity e = newEntity(name, type);
 501:     e.data = data.toCharArray();
 502: 
 503:     return e;
 504:   }
 505: 
 506:   private void assignField(Element e)
 507:   {
 508:     String element_name = e.name;
 509:     try
 510:       {
 511:         // Assign the field via reflection.
 512:         Field f = getClass().getField(element_name.toLowerCase());
 513:         if ((f.getModifiers() & Modifier.PUBLIC) != 0)
 514:           if ((f.getModifiers() & Modifier.STATIC) == 0)
 515:             if (f.getType().isAssignableFrom(e.getClass()))
 516:               f.set(this, e);
 517:       }
 518:     catch (IllegalAccessException ex)
 519:       {
 520:         unexpected(ex);
 521:       }
 522:     catch (NoSuchFieldException ex)
 523:       {
 524:         // This is ok.
 525:       }
 526: 
 527:     // Some virtual machines may still lack the proper
 528:     // implementation of reflection. As the tag fields
 529:     // are not used anywhere in this implementation,
 530:     // (and this class is also rarely used by the end user),
 531:     // it may be better not to crash everything by throwing an error
 532:     // for each case when the HTML parsing is required.
 533:     catch (Throwable t)
 534:       {
 535:         // This VM has no reflection mechanism implemented!
 536:         if (t instanceof OutOfMemoryError)
 537:           throw (Error) t;
 538:       }
 539:   }
 540: 
 541:   /**
 542:    * Create the bit set for this array of elements.
 543:    * The unknown elements are automatically defined and added
 544:    * to the element table.
 545:    * @param elements
 546:    * @return The bit set.
 547:    */
 548:   private BitSet bitSet(String[] elements)
 549:   {
 550:     BitSet b = new BitSet();
 551: 
 552:     for (int i = 0; i < elements.length; i++)
 553:       {
 554:         Element e = getElement(elements [ i ]);
 555: 
 556:         if (e == null)
 557:           e = newElement(elements [ i ]);
 558: 
 559:         b.set(e.index);
 560:       }
 561: 
 562:     return b;
 563:   }
 564: 
 565:   /**
 566:    * Find the element with the given name in the element table.
 567:    * If not find, create a new element with this name and add to the
 568:    * table.
 569:    * @param name the name of the element
 570:    * @return the found or created element.
 571:    */
 572:   private Element newElement(String name)
 573:   {
 574:     Element e = (Element) elementHash.get(name.toLowerCase());
 575: 
 576:     if (e == null)
 577:       {
 578:         e = new Element();
 579:         e.name = name;
 580:         e.index = elements.size();
 581:         elements.add(e);
 582:         elementHash.put(e.name.toLowerCase(), e);
 583:         assignField(e);
 584:       }
 585:     return e;
 586:   }
 587: 
 588:   /**
 589:    * Creates and adds to the element table the entity with an
 590:    * unitialized data section. Used internally.
 591:    * @param name the name of the entity
 592:    * @param type the type of the entity, a bitwise combination
 593:    * of GENERAL, PARAMETER, SYSTEM and PUBLIC.
 594:    *
 595:    * @return the created entity
 596:    */
 597:   private Entity newEntity(String name, int type)
 598:   {
 599:     Entity e = new Entity(name, type, null);
 600:     entityHash.put(e.name, e);
 601:     return e;
 602:   }
 603: 
 604:   private void unexpected(Exception ex)
 605:   {
 606:     throw new Error("This should never happen, report a bug", ex);
 607:   }
 608: }