View Javadoc

1   ////////////////////////////////////////////////////////////////////////////////
2   // MillScript-XML: an Open Spice interpreter and batch website creation tool
3   // Copyright (C) 2005 Kevin Rogers
4   //
5   // This file is part of MillScript-XML.
6   //
7   // MillScript-XML is free software; you can redistribute it and/or modify it under
8   // the terms of the GNU General Public License as published by the Free
9   // Software Foundation; either version 2 of the License, or (at your option)
10  // any later version.
11  //
12  // MillScript-XML is distributed in the hope that it will be useful, but WITHOUT
13  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  // more details.
16  //
17  // You should have received a copy of the GNU General Public License along with
18  // MillScript-XML; if not, write to the Free Software Foundation, Inc., 59 Temple
19  // Place, Suite 330, Boston, MA  02111-1307  USA
20  ////////////////////////////////////////////////////////////////////////////////
21  package org.millscript.commons.xml.tokenizer;
22  
23  import org.millscript.commons.alert.alerts.Fault;
24  import org.millscript.commons.alert.alerts.IOAlert;
25  import org.millscript.commons.util.EList;
26  import org.millscript.commons.util.EMap;
27  import org.millscript.commons.util.IMap;
28  import org.millscript.commons.util.list.EArrayList;
29  import org.millscript.commons.util.list.ELinkedList;
30  import org.millscript.commons.util.list.ISingletonList;
31  import org.millscript.commons.util.map.EHashMap;
32  import org.millscript.commons.xml.alerts.XmlErrorAlert;
33  import org.millscript.commons.xml.alerts.XmlValidityConstraintAlert;
34  import org.millscript.commons.xml.alerts.XmlWellFormednessAlert;
35  import org.millscript.commons.xml.api.AttDef;
36  import org.millscript.commons.xml.api.Contentspec;
37  import org.millscript.commons.xml.api.Name;
38  import org.millscript.commons.xml.api.contentspec.ChoiceContentspec;
39  import org.millscript.commons.xml.api.contentspec.CpContentspec;
40  import org.millscript.commons.xml.api.contentspec.SeqContentspec;
41  import org.millscript.commons.xml.api.token.AttListDeclToken;
42  import org.millscript.commons.xml.api.token.CharDataToken;
43  import org.millscript.commons.xml.api.token.CommentToken;
44  import org.millscript.commons.xml.api.token.DTDToken;
45  import org.millscript.commons.xml.api.token.ElementDeclToken;
46  import org.millscript.commons.xml.api.token.EndTagToken;
47  import org.millscript.commons.xml.api.token.EntityDeclToken;
48  import org.millscript.commons.xml.api.token.NotationDeclToken;
49  import org.millscript.commons.xml.api.token.PIToken;
50  import org.millscript.commons.xml.api.token.StartTagToken;
51  import org.millscript.commons.xml.api.token.Token;
52  import org.millscript.commons.xml.api.token.TokenVisitor;
53  import org.millscript.commons.xml.api.token.XmlDeclToken;
54  import org.millscript.commons.xml.api.tokenizer.XmlTokenizer;
55  import org.millscript.commons.xml.atttype.CdataAttDefImpl;
56  import org.millscript.commons.xml.atttype.EntitiesAttDefImpl;
57  import org.millscript.commons.xml.atttype.EntityAttDefImpl;
58  import org.millscript.commons.xml.atttype.EnumerationAttDefImpl;
59  import org.millscript.commons.xml.atttype.IdAttDefImpl;
60  import org.millscript.commons.xml.atttype.IdrefAttDefImpl;
61  import org.millscript.commons.xml.atttype.IdrefsAttDefImpl;
62  import org.millscript.commons.xml.atttype.NmtokenAttDefImpl;
63  import org.millscript.commons.xml.atttype.NmtokensAttDefImpl;
64  import org.millscript.commons.xml.atttype.NotationAttDefImpl;
65  import org.millscript.commons.xml.contentspec.AnyContentspecImpl;
66  import org.millscript.commons.xml.contentspec.ChoiceContentspecImpl;
67  import org.millscript.commons.xml.contentspec.EmptyContentspecImpl;
68  import org.millscript.commons.xml.contentspec.MixedContentspecImpl;
69  import org.millscript.commons.xml.contentspec.NameContentspecImpl;
70  import org.millscript.commons.xml.contentspec.SeqContentspecImpl;
71  import org.millscript.commons.xml.entity.CharacterEntityImpl;
72  import org.millscript.commons.xml.entity.EntityImpl;
73  import org.millscript.commons.xml.entity.ExternalGeneralEntity;
74  import org.millscript.commons.xml.entity.ExternalParameterEntity;
75  import org.millscript.commons.xml.entity.InternalGeneralEntity;
76  import org.millscript.commons.xml.entity.InternalParameterEntity;
77  import org.millscript.commons.xml.entity.UnparsedGeneralEntity;
78  import org.millscript.commons.xml.token.AttListDeclTokenImpl;
79  import org.millscript.commons.xml.token.CharDataTokenImpl;
80  import org.millscript.commons.xml.token.CommentTokenImpl;
81  import org.millscript.commons.xml.token.DTDTokenImpl;
82  import org.millscript.commons.xml.token.ElementDeclTokenImpl;
83  import org.millscript.commons.xml.token.EmptyElemTokenImpl;
84  import org.millscript.commons.xml.token.EndTagTokenImpl;
85  import org.millscript.commons.xml.token.EntityDeclTokenImpl;
86  import org.millscript.commons.xml.token.NotationDeclTokenImpl;
87  import org.millscript.commons.xml.token.PITokenImpl;
88  import org.millscript.commons.xml.token.StartTagTokenImpl;
89  import org.millscript.commons.xml.token.XmlDeclTokenImpl;
90  
91  import java.io.IOException;
92  import java.io.InputStream;
93  import java.io.InputStreamReader;
94  import java.io.Reader;
95  import java.nio.charset.Charset;
96  
97  /**
98   * This class provides an <code>XmlTokenizer</code> implementation that breaks
99   * an XML document into tokens, such as a start tag, end tag, character data,
100  * etc. This tokenizer will only perform a minimum number of well-formedness
101  * checks, such as for illegal characters, attributes, etc. This tokenizer does
102  * not perform checks such as for matching start/end tags, or that a DTD
103  * appears at the start of a document.
104  */
105 public abstract class AbstractXmlTokenizerImpl implements XmlTokenizer {
106 
107     /**
108      * The number of the current character on the current line.
109      */
110     protected int columnNumber = 0;
111 
112     /**
113      * This buffer is used when constructing certain tokens, as characters
114      * sometimes need to be appended from outside the individual token reading
115      * methods.
116      */
117     private final StringBuffer currentTokenData = new StringBuffer();
118 
119     /**
120      * Indicates when we've reached the end of the file.
121      */
122     private boolean endOfFile = false;
123 
124     /**
125      * This map contains the mapping from an entity name to it's
126      * <code>Entity</code> object.
127      */
128     private final EMap< String, EntityImpl > entities = new EHashMap< String, EntityImpl >();
129 
130     /**
131      * The current line number.
132      */
133     protected int lineNumber = 1;
134 
135     private IMap< String, String > prefixToNamespace = new PrefixToNamespaceMap();
136 
137     /**
138      * This name tokenizer is used to tokenizer qualified names. Depending on
139      * whether we are handling namespaces, we can choose between a namespace
140      * aware or not name tokenzier for this.
141      */
142     private final NameTokenizer nameTokenizer;
143 
144     /**
145      * This is the buffer we use when we need to push back characters, which we
146      * can then tokenize.
147      */
148     private char[] pushBackBuffer = new char[ 16 ];
149 
150     /**
151      * The current position in the push back buffer.
152      */
153     private int pushBackPos = -1;
154 
155     /**
156      * The reader we should obtain characters to tokenize from.
157      */
158     private final Reader reader;
159 
160     /**
161      * Constructs a new XML tokenizer to read from the specified input stream,
162      * using the specified character set, with optional namespace support.
163      *
164      * @param is    the <code>InputStream</code> to read from
165      * @param cs    the <code>Charset</code> to decode the
166      * <code>InputStream</code> with
167      * @param namespaceAware    indicates if the tokenizer should be namespace
168      * aware
169      */
170     protected AbstractXmlTokenizerImpl( final InputStream is, final Charset cs, final boolean namespaceAware ) {
171         this(
172             new InputStreamReader( is, cs ),
173             namespaceAware
174         );
175     }
176 
177     /**
178      * Constructs a new XML tokenizer to read from the specified reader, with
179      * optional namespace support.
180      *
181      * @param r the <code>Reader</code> to obtain characters from
182      * @param namespaceAware    indicates if the tokenizer should be namespace
183      * aware
184      */
185     protected AbstractXmlTokenizerImpl( final Reader r, final boolean namespaceAware ) {
186         // Set up the default XML entities
187         this.entities.insert( "amp", new InternalGeneralEntity( this, "amp", "&#38;" ) );
188         this.entities.insert( "lt", new InternalGeneralEntity( this, "lt", "&#60;" ) );
189         this.entities.insert( "gt", new InternalGeneralEntity( this, "gt", "&#62;" ) );
190         this.entities.insert( "apos", new InternalGeneralEntity( this, "apos", "&#39;" ) );
191         this.entities.insert( "quot", new InternalGeneralEntity( this, "quot", "&#34;" ) );
192         this.reader = r;
193         if ( namespaceAware ) {
194             this.nameTokenizer = new NamespaceAwareNameTokenizer( this );
195         } else {
196             this.nameTokenizer = new NamespaceIgnoreNameTokenizer( this );
197         }
198     }
199 
200     /**
201      * Constructs a new XML tokenizer which will copy it's state from the
202      * specified existing tokenizer.
203      *
204      * @param axti  the existing tokenizer to copy state from
205      */
206     protected AbstractXmlTokenizerImpl( final AbstractXmlTokenizerImpl axti ) {
207         this( axti, axti.reader );
208     }
209 
210     /**
211      * Constructs a new XML tokenizer which will copy it's state from the
212      * specified existing tokenizer, but will use the specified reader instead
213      * of the one from the existing tokenizer.
214      *
215      * @param axti  the existing tokenizer to copy state from
216      * @param rr    the new reader this tokenizer should read characters from
217      */
218     protected AbstractXmlTokenizerImpl( final AbstractXmlTokenizerImpl axti, final Reader rr ) {
219         this.columnNumber = axti.columnNumber;
220         this.endOfFile = axti.endOfFile;
221         this.entities.insertAll( axti.entities );
222         this.lineNumber = axti.lineNumber;
223         if ( axti.nameTokenizer instanceof NamespaceAwareNameTokenizer ) {
224             this.nameTokenizer = new NamespaceAwareNameTokenizer( this );
225         } else {
226             this.nameTokenizer = new NamespaceIgnoreNameTokenizer( this );
227         }
228         this.pushBackBuffer = axti.pushBackBuffer;
229         this.pushBackPos = axti.pushBackPos;
230         this.reader = rr;
231     }
232 
233     /**
234      * Appends the specified <code>char</code> to the current token.
235      *
236      * @param ch    the <code>char</code> to append
237      */
238     public void appendCurrentTokenData( final char ch ) {
239         this.currentTokenData.append( ch );
240     }
241 
242     /**
243      * Calculates the a suitable new size for the push back buffer, based on
244      * the current size and the required free space.
245      *
246      * @param currentLength the length of the push back buffer
247      * @param spaceRequired the required amount of free space
248      * @return  an <code>int</code> size for the push back buffer that contains
249      * at least the required free space
250      */
251     private int calcNewSize( final int currentLength, final int spaceRequired ) {
252         if ( currentLength - this.pushBackPos < spaceRequired ) {
253             return this.calcNewSize( currentLength * 2, spaceRequired );
254         } else {
255             return currentLength;
256         }
257     }
258 
259     /**
260      * Drops and characters from the input stream that match the <code>S</code>
261      * production in the XML specification.
262      * <pre>
263      * [3] S ::= (#x20 | #x9 | #xD | #xA)+
264      * </pre>
265      */
266     public void dropS() {
267         // Read the next character
268         char ch = this.getChar();
269         // Drop any space sequence
270         while ( this.isS( ch ) ) {
271             ch = this.getChar();
272         }
273         // Push back the last character as it's not a space
274         this.pushBack( ch );
275     }
276 
277     /**
278      * Returns the next character from the input stream, throwing an alert if
279      * the end of file is reached.
280      *
281      * @return  the next <code>char</code> from the input stream
282      */
283     public char getChar() {
284         final int ch = this.getIntChar();
285         if ( ch == -1 ) {
286             throw new IOAlert( "Unexpected end of file" ).mishap();
287         } else {
288             return (char) ch;
289         }
290     }
291 
292     /**
293      * Returns the raw <code>int</code> version of the next char, handling any
294      * push back characters and XML version dependencies. This method accounts
295      * for the set of legal characters in an XML document.
296      *
297      * @return  the <code>int</code> version of the next char or
298      * <code>-1</code> if there are no more characters
299      */
300     public int getIntChar() {
301         // Check the current push back buffer position
302         if ( this.pushBackPos == -1 ) {
303             // Just read a character from the parent reader
304             try {
305                 final int ch = this.reader.read();
306                 if ( ch == -1 ) {
307                     this.endOfFile = true;
308                     return ch;
309                 }
310                 return this.handleIntChar( ch );
311             } catch ( IOException e ) {
312                 throw new IOAlert().setParentThrowable( e ).mishap();
313             }
314         } else if ( this.endOfFile ) {
315             // We've already got to the end of the file, so just return -1
316             return -1;
317         } else {
318             // Return the last character from the push back buffer
319             return this.pushBackBuffer[ this.pushBackPos-- ];
320         }
321     }
322 
323     /**
324      * @see org.millscript.commons.xml.api.tokenizer.XmlTokenizer#getLineNumber()
325      */
326     public int getLineNumber() {
327         return this.lineNumber;
328     }
329 
330     /**
331      * Returns the next <code>char</code>, checking that it is a legal quote
332      * character.
333      *
334      * @return  the next <code>char</code>, if it is a legal quote character
335      */
336     public char getQuoteChar() {
337         final char ch = this.getChar();
338         if ( ch == '"' || ch == '\'' ) {
339             return ch;
340         } else {
341             throw new XmlWellFormednessAlert(
342                 "Illegal quote character"
343             ).culpritChar( ch ).mishap();
344         }
345     }
346 
347     /**
348      * Handles the specified character, performing any XML version dependent
349      * line break conversions and checks on it's validity.
350      *
351      * @param ch    the character to test
352      * @return  the handled character, which may not be the same as that
353      * supplied as the argument
354      */
355     public abstract int handleIntChar( final int ch );
356 
357     /**
358      * @see org.millscript.commons.xml.api.tokenizer.XmlTokenizer#hasNextToken()
359      */
360     public boolean hasNextToken() {
361         // Check the current push back buffer position
362         if ( this.pushBackPos == -1 ) {
363             // Ok, we need to see if we can read anything from the buffer
364             try {
365                 final int ch = this.reader.read();
366                 if ( ch == -1 ) {
367                     this.endOfFile = true;
368                     return false;
369                 }
370                 // Push back the character so it can be read as part of the
371                 // next token
372                 this.pushBack( (char) this.handleIntChar( ch ) );
373                 // Ok, there should be another token
374                 return true;
375             } catch ( IOException e ) {
376                 throw new IOAlert().setParentThrowable( e ).mishap();
377             }
378         } else {
379             // If we're already at the end of the file, there is no more data
380             // so return false, otherwise return true. This is the opposite of
381             // what's in the endOfFile field, so return that.
382             return !this.endOfFile;
383         }
384     }
385 
386     /**
387      * Tests if the specified character matches the <code>Char</code>
388      * production in the XML specification.
389      *
390      * @param ch    the character to test
391      * @return  <code>true</code> if the character is a <code>Char</code> and
392      * <code>false</code> otherwise
393      */
394     public abstract boolean isChar( final int ch );
395 
396     /**
397      * Tests if the specified character matches the <code>NameChar</code>
398      * production in the XML specification.
399      *
400      * @param ch    the character to test
401      * @return  <code>true</code> if the character is a <code>NameChar</code>
402      * and <code>false</code> otherwise
403      */
404     public abstract boolean isNameChar( final char ch );
405 
406     /**
407      * Tests if the specified character matches the <code>NameStartChar</code>
408      * production in the XML specification.
409      *
410      * @param ch    the character to test
411      * @return  <code>true</code> if the character is a
412      * <code>NameStartChar</code> and <code>false</code> otherwise
413      */
414     public abstract boolean isNameStartChar( final char ch );
415 
416     /**
417      * Tests if the specified character matches the <code>NCNameChar</code>
418      * production in the XML namespace specification.
419      *
420      * @param ch    the character to test
421      * @return  <code>true</code> if the character is a <code>NCNameChar</code>
422      * and <code>false</code> otherwise
423      */
424     public boolean isNCNameChar( final char ch ) {
425         return ch != ':' && this.isNameChar( ch );
426     }
427 
428     /**
429      * Tests if the specified character matches the
430      * <code>NCNameStartChar</code> production in the XML namespace
431      * specification.
432      *
433      * @param ch    the character to test
434      * @return  <code>true</code> if the character is a
435      * <code>NCNameStartChar</code> and <code>false</code> otherwise
436      */
437     public boolean isNCNameStartChar( final char ch ) {
438         return ch != ':' && this.isNameStartChar( ch );
439     }
440 
441     /**
442      * Tests if the specified character matches the <code>S</code> production
443      * in the XML specification.
444      * <pre>
445      * [3] S ::= (#x20 | #x9 | #xD | #xA)+
446      * </pre>
447      *
448      * @param ch    the character to test
449      * @return  <code>true</code> if the character is a <code>S</code> character
450      * and <code>false</code> otherwise
451      */
452     public boolean isS( final int ch ) {
453         return ch == 0x20 || ch == 0x09 || ch == 0x0D || ch == 0x0A;
454     }
455 
456     /**
457      * Tests that the next character is the specified one, otherwise it throws
458      * an Alert.
459      *
460      * @param testch    the character we must read next
461      */
462     public void mustRead( final char testch ) {
463         final char ch = this.getChar();
464         if ( ch != testch ) {
465             throw new XmlErrorAlert(
466                 "Unexpected character"
467             ).culprit(
468                 "found",
469                 new Character( ch )
470             ).culprit(
471                 "wanted",
472                 new Character( testch )
473             ).mishap();
474         }
475     }
476 
477     /**
478      * Tests if the next input sequence matches the <code>Eq</code> production
479      * in the XML specification, otherwise it throws an Alert. If the sequence
480      * matches, it will be dropped.
481      * <pre>
482      * [25] Eq ::= S? '=' S?
483      * </pre>
484      */
485     public void mustReadEq() {
486         // Drop any prefix space
487         this.dropS();
488         // Get the next character, which must be equals
489         char ch = this.getChar();
490         if ( ch != '=' ) {
491             throw new XmlWellFormednessAlert( "'=' character expected" ).culpritChar( ch ).mishap();
492         }
493         // Drop any postfix space
494         this.dropS();
495     }
496 
497     /**
498      * Tests if the next input sequence matches the <code>S</code> production
499      * in the XML specification, otherwise it throws an Alert. If the sequence
500      * matches, it will be dropped.
501      * <pre>
502      * [25] Eq ::= S? '=' S?
503      * </pre>
504      */
505     public void mustReadS() {
506         final char ch = this.getChar();
507         if ( this.isS( ch ) ) {
508             this.dropS();
509         } else {
510             throw new XmlWellFormednessAlert( "White space expected" ).culpritChar( ch ).mishap();
511         }
512     }
513 
514     /**
515      * @see org.millscript.commons.xml.api.tokenizer.XmlTokenizer#nextToken()
516      */
517     public Token nextToken() {
518         if ( this.tryRead( '<' ) ) {
519             // Could be an EmptyElemTag, STag, ETag, CDSect, PI, Comment,
520             // doctypeDecl, elementDecl, AttListDecl, EntityDecl or
521             // NotationDecl
522             final char ch2 = this.getChar();
523             switch ( ch2 ) {
524                 case '/':
525                     // An ETag
526                     return this.readETag();
527                 case '!':
528                     // Could be either a Comment, CDSect, doctypeDecl,
529                     // elementDecl, AttListDecl, EntityDecl or NotationDecl
530                     if ( this.tryRead( '-', '-' ) ) {
531                         // A Comment
532                         return this.readComment();
533                     } else if ( this.tryRead( '[' ) ) {
534                         // A CDSect
535                         return this.readCDSect();
536                     } else {
537                         final String name = this.readName();
538                         if ( name.equals( "DOCTYPE" ) ) {
539                             // A doctypeDecl
540                             return this.readDoctypeDecl();
541                         } else if ( name.equals( "ELEMENT" ) ) {
542                             // An elementDecl
543                             return this.readElementDecl();
544                         } else if ( name.equals( "ATTLIST" ) ) {
545                             // An AttListDecl
546                             return this.readAttlistDecl();
547                         } else if ( name.equals( "ENTITY" ) ) {
548                             // An EntityDecl
549                             return this.readEntityDecl();
550                         } else if ( name.equals( "NOTATION" ) ) {
551                             // A NotationDecl
552                             return this.readNotationDecl();
553                         } else {
554                             // Whu?
555                             throw new XmlWellFormednessAlert(
556                                 "Expected doctype or markup declaration"
557                             ).culpritToken( "<!" + name ).mishap();
558                         }
559                     }
560                 case '?':
561                     // A PI
562                     return this.readPI();
563                 default:
564                     // An EmptyElemTag or STag
565                     this.pushBack( ch2 );
566                     return this.readSTag();
567             }
568         } else {
569             // Either a Reference or CharData, handle either the same way
570             return this.readCharData();
571         }
572     }
573 
574     /**
575      * Tests that the next character is the specified one.
576      *
577      * @param testch    the character to test for.
578      * @return  <code>true</code> if the character is the required one and
579      * <code>false</code> otherwise
580      */
581     public boolean peekRead( final char testch ) {
582         final char ch = this.getChar();
583         this.pushBack( ch );
584         if ( ch == testch ) {
585             return true;
586         }
587         return false;
588     }
589 
590     /**
591      * Tests if the next available character matches the <code>S</code>
592      * production in the XML specification.
593      * <pre>
594      * [3] S ::= (#x20 | #x9 | #xD | #xA)+
595      * </pre>
596      *
597      * @return  <code>true</code> if the next character is a <code>S</code>
598      * character and <code>false</code> otherwise
599      */
600     public boolean peekS() {
601         final char ch = this.getChar();
602         this.pushBack( ch );
603         return this.isS( ch );
604     }
605 
606     /**
607      * Pushes back the specified character so it will be the next one returned
608      * by the {@link #getChar()} method.
609      *
610      * @param ch    the <code>char</code> to push back
611      */
612     public void pushBack( final char ch ) {
613         if ( ++this.pushBackPos == this.pushBackBuffer.length ) {
614             final char[] oldBuffer = this.pushBackBuffer;
615             // We need a larger token buffer, just double it's size
616             this.pushBackBuffer = new char[ this.pushBackBuffer.length * 2 ];
617             // Copy the original characters into the new buffer
618             System.arraycopy( oldBuffer, 0, this.pushBackBuffer, 0, oldBuffer.length );
619         }
620         // Append the character to the push back buffer
621         this.pushBackBuffer[ this.pushBackPos ] = ch;
622     }
623 
624     /**
625      * Pushes back all the characters in the string, so they will be returned
626      * by subsequent calls to the {@link #getChar()} method. The characters are
627      * pushed in reverse order, so that the first character in the string will
628      * be the first character returned by {@link #getChar()}.
629      *
630      * @param s the <code>String</code> to push back
631      */
632     public void pushBack( final String s ) {
633         final int newSize = this.calcNewSize( this.pushBackBuffer.length, s.length() );
634         if ( this.pushBackBuffer.length != newSize ) {
635             final char[] oldBuffer = this.pushBackBuffer;
636             // We need a larger token buffer, just double it's size
637             this.pushBackBuffer = new char[ newSize ];
638             // Copy the original characters into the new buffer
639             System.arraycopy( oldBuffer, 0, this.pushBackBuffer, 0, oldBuffer.length );
640         }
641         // Append the string to the push back buffer in reverse order
642         for ( int i = s.length() - 1; i >= 0; i-- ) {
643             this.pushBack( s.charAt( i ) );
644         }
645     }
646 
647     /**
648      * Returns the next input sequence as an attribute list declaration token.
649      * This will generate an <code>Alert</code> if the input sequence doesn't
650      * match the <code>AttlistDecl</code> production in the XML specification.
651      * <pre>
652      * [52] AttlistDecl ::='<!ATTLIST' S Name AttDef* S? '>'
653      * [53] AttDef ::= S Name S AttType S DefaultDecl
654      * [54] AttType ::= StringType | TokenizedType | EnumeratedType
655      * [55] StringType ::= 'CDATA'
656      * [56] TokenizedType ::= 'ID' [VC: ID][VC: One ID per Element Type][VC: ID Attribute Default]
657      *                      | 'IDREF' [VC: IDREF]
658      *                      | 'IDREFS' [VC: IDREF]
659      *                      | 'ENTITY' [VC: Entity Name]
660      *                      | 'ENTITIES' [VC: Entity Name]
661      *                      | 'NMTOKEN' [VC: Name Token]
662      *                      | 'NMTOKENS' [VC: Name Token]
663      * [57] EnumeratedType ::= NotationType | Enumeration
664      * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' [VC: Notation Attributes][VC: One Notation Per Element Type][VC: No Notation on Empty Element][VC: No Duplicate Tokens]
665      * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] [VC: No Duplicate Tokens]
666      * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) [VC: Required Attribute][VC: Attribute Default Value Syntactically Correct][WFC: No < in Attribute Values][VC: Fixed Attribute Default]
667      * </pre>
668      * <p>
669      * When this method is called the identifying sequence, i.e.
670      * '<code>&lt;!ATTLIST</code>', and it should NOT be expected.
671      * </p>
672      *
673      * @return  an <code>AttListDeclToken</code> for the attribute list
674      * declaration
675      */
676     public AttListDeclToken readAttlistDecl() {
677         this.mustReadS();
678         final Name elementName = this.nameTokenizer.readTagName();
679         this.dropS();
680         final EList< AttDef > attDefList = new ELinkedList< AttDef >();
681         while ( !this.tryRead( '>' ) ) {
682             final Name attributeName = this.nameTokenizer.readTagName();
683             if ( this.tryRead( '(' ) ) {
684                 // Ok, this must be an enumeration
685                 final EList< String > enumerationNmtokens = new ELinkedList< String >();
686                 // Read the first name
687                 enumerationNmtokens.addLast( this.readNmtoken() );
688                 // Drop any space after the Name
689                 this.dropS();
690                 // Are we at the end yet?
691                 while ( !this.tryRead( ')' ) ) {
692                     this.mustRead( '|' );
693                     this.dropS();
694                     enumerationNmtokens.addLast( this.readNmtoken() );
695                     this.dropS();
696                 }
697                 attDefList.addLast(
698                     new EnumerationAttDefImpl( attributeName, enumerationNmtokens, this )
699                 );
700             } else {
701                 final String attType = this.readNmtoken();
702                 if ( attType.equals( "CDATA" ) ) {
703                     attDefList.addLast(
704                         new CdataAttDefImpl( attributeName, this )
705                     );
706                 } else if ( attType.equals( "ID" ) ) {
707                     attDefList.addLast(
708                         new IdAttDefImpl( attributeName, this )
709                     );
710                 } else if ( attType.equals( "IDREF" ) ) {
711                     attDefList.addLast(
712                         new IdrefAttDefImpl( attributeName, this )
713                     );
714                 } else if ( attType.equals( "IDREFS" ) ) {
715                     attDefList.addLast(
716                         new IdrefsAttDefImpl( attributeName, this )
717                     );
718                 } else if ( attType.equals( "ENTITY" ) ) {
719                     attDefList.addLast(
720                         new EntityAttDefImpl( attributeName, this )
721                     );
722                 } else if ( attType.equals( "ENTITIES" ) ) {
723                     attDefList.addLast(
724                         new EntitiesAttDefImpl( attributeName, this )
725                     );
726                 } else if ( attType.equals( "NMTOKEN" ) ) {
727                     attDefList.addLast(
728                         new NmtokenAttDefImpl( attributeName, this )
729                     );
730                 } else if ( attType.equals( "NMTOKENS" ) ) {
731                     attDefList.addLast(
732                         new NmtokensAttDefImpl( attributeName, this )
733                     );
734                 } else if ( attType.equals( "NOTATION" ) ) {
735                     final EList< String > notationNames = new ELinkedList< String >();
736                     // Read the first name
737                     notationNames.addLast( this.readName() );
738                     // Drop any space after the Name
739                     this.dropS();
740                     // Are we at the end yet?
741                     while ( !this.tryRead( ')' ) ) {
742                         this.mustRead( '|' );
743                         this.dropS();
744                         notationNames.addLast( this.readName() );
745                         this.dropS();
746                     }
747                     attDefList.addLast(
748                         new NotationAttDefImpl( attributeName, notationNames, this )
749                     );
750                 }
751             }
752         }
753         // Set the current token to a new AttListDecl token
754         return new AttListDeclTokenImpl( elementName, attDefList );
755     }
756 
757     /**
758      * Returns the next input sequence as an attribute value string. This will
759      * generate an <code>Alert</code> if the input sequence doesn't match the
760      * <code>AttValue</code> production in the XML specification.
761      *
762      * @return  a <code>String</code> holding the attribute value
763      */
764     public String readAttValue() {
765         // Drop the equals
766         this.mustReadEq();
767         // Attribute value buffer
768         this.currentTokenData.setLength( 0 );
769         // Read opening quote
770         final char quoteChar = this.getQuoteChar();
771         // Read a character from the attribute
772         for ( char ch = this.getChar(); ch != quoteChar; ch = this.getChar() ) {
773             switch ( ch ) {
774                 case '<':
775                     throw new XmlWellFormednessAlert(
776                         "Attribute values cannot contain the '<' character"
777                     ).mishap();
778                 case '&':
779                     // We've got a Reference
780                     this.readReference().referenceInAttributeValue( quoteChar );
781                     break;
782                 case ' ':
783                 case '\n':
784                 case '\r':
785                 case '\t':
786                     // We've got a white space char, so append a space
787                     this.currentTokenData.append( ' ' );
788                     break;
789                 default:
790                     this.currentTokenData.append( ch );
791                     break;
792             }
793         }
794         // no need to push back the last character read, we can safely drop the
795         // closing quote character
796         return this.currentTokenData.toString();
797     }
798 
799     /**
800      * Returns the next input sequence as a CDATA section. This will generate
801      * an <code>Alert</code> if the input sequence doesn't match the
802      * <code>CDSect</code> production in the XML specification.
803      * <pre>
804      * [18] CDSect ::= CDStart CData CDEnd
805      * [19] CDStart ::= '<![CDATA['
806      * [20] CData ::= (Char* - (Char* ']]>' Char*))
807      * [21] CDEnd ::= ']]>'
808      * </pre>
809      * <p>
810      * When this method is called the first three characters
811      * '<code>&lt;![</code>' will have already been processed and should NOT be
812      * expected.
813      * </p>
814      *
815      * @return  a <code>CharDataToken</code> for the CDATA section
816      */
817     public CharDataToken readCDSect() {
818         // We are expecting to read "CDATA[" at this point
819         this.currentTokenData.setLength( 0 );
820         for ( int c = 0; c < 6; c++ ) {
821             this.currentTokenData.append( this.getChar() );
822         }
823         boolean justWhiteSpace = true;
824         if ( this.currentTokenData.lastIndexOf( "CDATA[" ) == 0 ) {
825             // Good. We've got the start of the CDATA section
826             this.currentTokenData.setLength( 0 );
827             for ( char ch = this.getChar();; ch = this.getChar() ) {
828                 if ( ch == ']' && this.tryRead( ']', '>' ) ) {
829                     // We're at the end of the CDATA section. Set the
830                     // current token to a new CDATA CharData token
831                     return new CharDataTokenImpl(
832                         this.currentTokenData.toString(),
833                         true,
834                         justWhiteSpace
835                     );
836                 } else {
837                     // Just a normal Char
838                     // Check if the current data is not just whitespace and the
839                     // current character is not whitespace...
840                     if ( justWhiteSpace && !this.isS( ch ) ) {
841                         // Ok, no longer just whitespace
842                         justWhiteSpace = false;
843                     }
844                     this.currentTokenData.append( ch );
845                 }
846             }
847         } else {
848             throw new XmlWellFormednessAlert(
849                 "Expected CDStart tag"
850             ).culpritToken(
851                 this.currentTokenData.insert( 0, "<!" ).toString()
852             ).mishap();
853         }
854     }
855 
856     /**
857      * Returns the next input sequence as a character data token. This will
858      * generate an <code>Alert</code> if the input sequence doesn't match the
859      * <code>CharData</code> production in the XML specification.
860      * <pre>
861      * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
862      * </pre>
863      *
864      * @return  a <code>CharDataToken</code> for the character data
865      */
866     public CharDataToken readCharData() {
867         this.currentTokenData.setLength( 0 );
868         boolean justWhiteSpace = true;
869         for ( int ch = this.getIntChar(); ch != -1; ch = this.getIntChar() ) {
870             if ( ch == '<' ) {
871                 // An element, CDSect, PI or Comment
872                 this.pushBack( (char) ch );
873                 // Set the current token to a new CharData token
874                 return new CharDataTokenImpl(
875                     this.currentTokenData.toString(),
876                     false,
877                     justWhiteSpace
878                 );
879             } else if ( ch == '&' ) {
880                 // A Reference
881                 this.readReference().referenceInContent();
882             } else if ( ch == ']' && this.tryRead( ']', '>' ) ) {
883                 // We've got the CDEnd. This is illegal in normal content
884                 throw new XmlWellFormednessAlert(
885                     "']]>' sequence is not allowed in CharData"
886                 ).mishap();
887             } else {
888                 // Normal CharData
889                 // Check if the current data is not just whitespace and the
890                 // current character is not whitespace...
891                 if ( justWhiteSpace & !this.isS( ch ) ) {
892                     // Ok, no longer just whitespace
893                     justWhiteSpace = false;
894                 }
895                 this.currentTokenData.append( (char) ch );
896             }
897         }
898         if ( this.currentTokenData.length() != 0 ) {
899             // end of file
900             // We must return a token for that last bit of character data
901             return new CharDataTokenImpl(
902                 this.currentTokenData.toString(),
903                 false,
904                 justWhiteSpace
905             );
906         } else {
907             // TODO - Should we return an EOF token here?
908             throw new Fault( "This message indicates we need an EOFToken" ).mishap();
909         }
910     }
911 
912     /**
913      * Returns the next input sequence as a children content particle model.
914      * This will generate an <code>Alert</code> if the input sequence doesn't
915      * match the <code>children</code> production in the XML specification.
916      * <pre>
917      * [47] children ::= (choice | seq) ('?' | '*' | '+')?
918      * [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' [VC: Proper Group/PE Nesting]
919      * </pre>
920      *
921      * @return  a <code>CpContentspec</code> for the children declaration
922      */
923     CpContentspec readChildren() {
924         if ( this.tryRead( '(' ) ) {
925             // Either a choice or seq.
926             // Drop any space
927             this.dropS();
928             // Read the first item in the choice or seq
929             CpContentspec cs = this.readCp();
930             // Drop any optional space
931             this.dropS();
932             // Determine if we're parsing a choice or seq
933             if ( this.tryRead( '|' ) ) {
934                 // A choice
935                 cs = this.readChoice( cs );
936             } else if ( this.tryRead( ',' ) ) {
937                 // A seq with multiple cp
938                 cs = this.readSeq( cs );
939             } else if ( this.tryRead( ')' ) ) {
940                 // A seq with a single cp
941                 cs = new SeqContentspecImpl( new ISingletonList< CpContentspec >( cs ) );
942             } else {
943                 // Not well-formed cp
944                 throw new XmlWellFormednessAlert(
945                     "Expected a choice('|') or sequence(',') in the contentspec"
946                 ).culpritChar( this.getChar() ).mishap();
947             }
948             // Look for any modifier
949             if ( this.tryRead( '?' ) ) {
950                 cs.setMatchMode( CpContentspec.ZERO_OR_ONE );
951             } else if ( this.tryRead( '*' ) ) {
952                 cs.setMatchMode( CpContentspec.ZERO_OR_MORE );
953             } else if ( this.tryRead( '+' ) ) {
954                 cs.setMatchMode( CpContentspec.ONE_OR_MORE );
955             }
956             // Return the new model
957             return cs;
958         } else {
959             throw new XmlWellFormednessAlert(
960                 "Expected '(' to start a choice or sequence in the element declaration"
961             ).culpritChar( this.getChar() ).mishap();
962         }
963     }
964 
965     /**
966      * Returns the next input sequence as a choice content particle model.
967      * This will generate an <code>Alert</code> if the input sequence doesn't
968      * match the <code>choice</code> production in the XML specification.
969      * <pre>
970      * [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' [VC: Proper Group/PE Nesting]
971      * </pre>
972      *
973      * @param first the first content particle in the choice
974      * @return  a <code>ChoiceContentspec</code> for the choice declaration
975      */
976     ChoiceContentspec readChoice( final CpContentspec first ) {
977         final EList< CpContentspec > list = new ELinkedList< CpContentspec >();
978         list.addLast( first );
979         // Drop any space after the '|'
980         this.dropS();
981         // Read the next cp
982         list.addLast( this.readCp() );
983         // Are we at the end yet?
984         while ( !this.tryRead( ')' ) ) {
985             this.mustRead( '|' );
986             this.dropS();
987             list.addLast( this.readCp() );
988             this.dropS();
989         }
990         return new ChoiceContentspecImpl( list );
991     }
992 
993     /**
994      * Returns the next input sequence as a comment token. This will generate
995      * an <code>Alert</code> if the input sequence doesn't match the
996      * <code>CharData</code> production in the XML specification.
997      * <pre>
998      * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
999      * </pre>
1000      * <p>
1001      * When this method is called the first four characters
1002      * '<code>&lt;!--</code>' will have already been processed and should NOT
1003      * be expected.
1004      * </p>
1005      *
1006      * @return  a <code>CommentToken</code> for the comment
1007      */
1008     public CommentToken readComment() {
1009         this.currentTokenData.setLength( 0 );
1010         for ( char ch = this.getChar();; ch = this.getChar() ) {
1011             if ( ch == '-' ) {
1012                 char ch2 = this.getChar();
1013                 if ( ch2 == '-' ) {
1014                     char ch3 = this.getChar();
1015                     if ( ch3 == '>' ) {
1016                         // We're at the end of the comment. Set the current
1017                         // token to a new CharData token
1018                         return new CommentTokenImpl(
1019                             this.currentTokenData.toString()
1020                         );
1021                     } else {
1022                         // We've read a double hypen not followed by a
1023                         // greater than. This is not well formed
1024                         throw new XmlWellFormednessAlert(
1025                             "The sequence '--' is not allowed in the body of a comment"
1026                         ).mishap();
1027                     }
1028                 } else {
1029                     // We've read a hypen followed by a non-hypen, which is
1030                     // ok
1031                     this.currentTokenData.append( ch );
1032                     this.currentTokenData.append( ch2 );
1033                 }
1034             } else {
1035                 this.currentTokenData.append( ch );
1036             }
1037         }
1038     }
1039 
1040     /**
1041      * Returns the next input sequence as an element content model. This will
1042      * generate an <code>Alert</code> if the input sequence doesn't match the
1043      * <code>contentspec</code> production in the XML specification.
1044      * <pre>
1045      * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1046      * </pre>
1047      *
1048      * @return  a <code>Contentspec</code> for the content model
1049      */
1050     Contentspec readContentspec() {
1051         if ( this.tryRead( '(' ) ) {
1052             // Either Mixed or children
1053             // Drop any space
1054             this.dropS();
1055             if ( this.tryRead( '#' ) ) {
1056                 // Mixed
1057                 final String name = this.readNmtoken();
1058                 if ( name.equals( "#PCDATA" ) ) {
1059                     // ok
1060                     return this.readMixed();
1061                 } else {
1062                     // This could only be "#PCDATA" so this is a syntax error
1063                     throw new XmlWellFormednessAlert(
1064                         "Expected '#PCDATA'"
1065                     ).culpritToken( name ).mishap();
1066                 }
1067             } else {
1068                 // children
1069                 return this.readChildren();
1070             }
1071         } else {
1072             // Either empty or any
1073             final String name = this.readNmtoken();
1074             if ( name.equals( "EMPTY" ) ) {
1075                 return new EmptyContentspecImpl();
1076             } else if ( name.equals( "ANY" ) ) {
1077                 return new AnyContentspecImpl();
1078             } else {
1079                 throw new XmlWellFormednessAlert(
1080                     "Expected 'EMPTY', 'ANY', Mixed or children"
1081                 ).culpritToken( name ).mishap();
1082             }
1083         }
1084     }
1085 
1086     /**
1087      * Returns the next input sequence as a content particle in a content
1088      * model. This will generate an <code>Alert</code> if the input sequence
1089      * doesn't match the <code>cp</code> production in the XML specification.
1090      * <pre>
1091      * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1092      * </pre>
1093      *
1094      * @return  a <code>CpContentspec</code> for the content particle
1095      */
1096     CpContentspec readCp() {
1097         CpContentspec cs = null;
1098         if ( this.peekRead( '(' ) ) {
1099             // Re-use the children parsing code
1100             cs = this.readChildren();
1101         } else {
1102             // just a Name
1103             cs = new NameContentspecImpl( this.nameTokenizer.readTagName() );
1104         }
1105         // Look for any modifier
1106         if ( this.tryRead( '?' ) ) {
1107             cs.setMatchMode( CpContentspec.ZERO_OR_ONE );
1108         } else if ( this.tryRead( '*' ) ) {
1109             cs.setMatchMode( CpContentspec.ZERO_OR_MORE );
1110         } else if ( this.tryRead( '+' ) ) {
1111             cs.setMatchMode( CpContentspec.ONE_OR_MORE );
1112         }
1113         // Return the new model
1114         return cs;
1115     }
1116 
1117     /**
1118      * Returns the next input sequence as an document type declaration token.
1119      * This will generate an <code>Alert</code> if the input sequence doesn't
1120      * match the <code>doctypedecl</code> production in the XML specification.
1121      * <pre>
1122      * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' [VC: Root Element Type] [WFC: External Subset]
1123      * [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
1124      * </pre>
1125      * <p>
1126      * When this method is called the identifying sequence, i.e.
1127      * '<code>&lt;!DOCTYPE</code>', and it should NOT be expected.
1128      * </p>
1129      *
1130      * @return  a <code>DTDToken</code> for the document type declaration
1131      */
1132     public DTDToken readDoctypeDecl() {
1133         this.dropS();
1134         final Name name = this.nameTokenizer.readTagName();
1135         String sysid = null;
1136         String pubid = null;
1137         this.dropS();
1138         if ( this.peekRead( 'S' ) || this.peekRead( 'P' ) ) {
1139             final String id = this.readNmtoken();
1140             this.dropS();
1141             if ( id.equals( "SYSTEM" ) ) {
1142                 sysid = this.readSystemLiteral();
1143             } else if ( id.equals( "PUBLIC" ) ) {
1144                 pubid = this.readPubidLiteral();
1145                 this.mustReadS();
1146                 sysid = this.readSystemLiteral();
1147             } else {
1148                 // huh. An EnternalID must start with either SYSTEM or
1149                 // PUBLIC
1150                 throw new XmlWellFormednessAlert(
1151                     "An ExternalID must start with 'SYSTEM' or 'PUBLIC'"
1152                 ).culpritToken( id ).mishap();
1153             }
1154             // Drop the optional space
1155             this.dropS();
1156         }
1157         // Ok, we should have parsed any ExternalID. Lets have a look
1158         // for any internal subset
1159         if ( this.tryRead( '[' ) ) {
1160             // TODO - We need to do something with the doctype model we generate
1161             this.readIntSubset();
1162         }
1163         // Ok, we should have parsed any intSubset. Lets have a look
1164         // for the end
1165         this.dropS();
1166         this.mustRead( '>' );
1167         // Set the current token to a new DTD token
1168         return new DTDTokenImpl( name, pubid, sysid );
1169     }
1170 
1171     /**
1172      * Returns the next input sequence as an element declaration token. This
1173      * will generate an <code>Alert</code> if the input sequence doesn't match
1174      * the <code>elementdecl</code> production in the XML specification.
1175      * <pre>
1176      * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' [VC: Unique Element Type Declaration]
1177      * </pre>
1178      * <p>
1179      * When this method is called the identifying sequence, i.e.
1180      * '<code>&lt;!ELEMENT</code>', and it should NOT be expected.
1181      * </p>
1182      *
1183      * @return  an <code>ElementDeclToken</code> for the element declaration
1184      */
1185     public ElementDeclToken readElementDecl() {
1186         // Must read space
1187         this.mustReadS();
1188         final Name name = this.nameTokenizer.readTagName();
1189         this.mustReadS();
1190         // Parse the contentspec into a content model
1191         final Contentspec contentspec = this.readContentspec();
1192         // Now look for the end of the element declaration
1193         this.dropS();
1194         if ( this.tryRead( '>' ) ) {
1195             // Set the current token to a new ElementDecl token
1196             return new ElementDeclTokenImpl( name, contentspec );
1197         } else {
1198             throw new XmlWellFormednessAlert(
1199                 "Expected '>' to end element declaration"
1200             ).culpritChar( this.getChar() ).mishap();
1201         }
1202     }
1203 
1204     /**
1205      * Returns the next input sequence as an encoding declaration. This will
1206      * generate an <code>Alert</code> if the input sequence doesn't match the
1207      * <code>encodingDecl</code> production in the XML specification.
1208      * <pre>
1209      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1210      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1211      * </pre>
1212      *
1213      * @return  a <code>String</code> holding the value of the encoding
1214      * declaration
1215      */
1216     public String readEncodingDecl() {
1217         // There must be space before the encoding declaration
1218         if ( this.tryReadS() ) {
1219             // NOTE - do NOT combine this with the previous condition using
1220             // "&&" as we can no longer infer if we should push back some space
1221             // if the following peekRead fails.
1222             if ( this.peekRead( 'e' ) ) {
1223                 final String attributeName = this.readName();
1224                 // Check the attribute is the version one
1225                 if ( !attributeName.equals( "encoding" ) ) {
1226                     throw new XmlWellFormednessAlert(
1227                         "Expected 'encoding' attribute in text declaration"
1228                     ).culprit( "attribute", attributeName ).mishap();
1229                 }
1230                 // Drop the equals
1231                 this.mustReadEq();
1232                 // Now read the version number
1233                 this.currentTokenData.setLength( 0 );
1234                 // Read opening quote
1235                 final char quoteChar = this.getQuoteChar();
1236                 // Read a character from the attribute value
1237                 for ( char ch = this.getChar(); ch != quoteChar; ch = this.getChar() ) {
1238                     if ( ( ch >= 'a' && ch <= 'z' ) || ( ch >= 'A' && ch <= 'Z' ) || ( ch >= '0' && ch <= '9' ) || ch == '-' || ch == '_' || ch == '.' ) {
1239                         this.currentTokenData.append( ch );
1240                     } else {
1241                         throw new XmlWellFormednessAlert(
1242                             "Illegal character in encoding name"
1243                         ).culpritChar( ch ).mishap();
1244                     }
1245                 }
1246                 // no need to push back the last character read, we can safely drop the
1247                 // closing quote character
1248                 // Return the encoding
1249                 return this.currentTokenData.toString();
1250             } else {
1251                 // We read a space at the begining but didn't read an encoding
1252                 // attribute, so we MUST push back a space before we carry on
1253                 this.pushBack( " " );
1254             }
1255         }
1256         // Must have been something else, not an encoding declaration
1257         return null;
1258     }
1259 
1260     /**
1261      * Returns the next input sequence as an entity declaration token. This
1262      * will generate an <code>Alert</code> if the input sequence doesn't match
1263      * the <code>EntityDecl</code> production in the XML specification.
1264      * <pre>
1265      * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
1266      *                   | "'" ([^%&'] | PEReference | Reference)* "'"
1267      * [70] EntityDecl ::= GEDecl | PEDecl
1268      * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1269      * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1270      * [73] EntityDef ::= EntityValue| (ExternalID NDataDecl?)
1271      * [74] PEDef ::= EntityValue | ExternalID
1272      * </pre>
1273      * <p>
1274      * When this method is called the identifying sequence, i.e.
1275      * '<code>&lt;!ENTITY</code>', and it should NOT be expected.
1276      * </p>
1277      *
1278      * @return  an <code>EntityDeclToken</code> for the entity declaration
1279      */
1280     public EntityDeclToken readEntityDecl() {
1281         // Drop the required space
1282         this.mustReadS();
1283         // Is it a parameter entity?
1284         final boolean pe = this.tryRead( '%' );
1285         // If it's a parameter entity, we expect space after the percent
1286         if ( pe ) {
1287             this.mustReadS();
1288         }
1289         // Read the entities name
1290         final String name = this.readName();
1291         // Drop the space
1292         this.mustReadS();
1293         // Check what kind of definition is provided for this entity
1294         if ( this.peekRead( '"' ) || this.peekRead( '\'' ) ) {
1295             // It's an entity value for a internal entity
1296             this.currentTokenData.setLength( 0 );
1297             // Read opening quote
1298             final char quoteChar = this.getQuoteChar();
1299             // Read a character from the attribute
1300             for ( char ch = this.getChar(); ch != quoteChar; ch = this.getChar() ) {
1301                 switch ( ch ) {
1302                     case '<':
1303                         throw new XmlWellFormednessAlert(
1304                             "Attribute values cannot contain the '<' character"
1305                         ).mishap();
1306                     case '&':
1307