1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.millscript.millscript.syntax;
24
25 import org.millscript.commons.util.list.ELinkedList;
26 import org.millscript.commons.util.map.EHashMap;
27 import org.millscript.millscript.alert.Alerts;
28 import org.millscript.millscript.conf.Configuration;
29
30 import java.io.IOException;
31 import java.io.LineNumberReader;
32 import java.io.Reader;
33 import java.util.regex.Pattern;
34
35 /**
36 * This class performs the basic tokenisation of the MillScript language. A
37 * single instance of this class would be used to tokenise one file/source.
38 */
39 public class TokenizerImpl implements Tokenizer {
40
41 /**
42 * The buffer used while constructing a token.
43 */
44 private StringBuffer buff = new StringBuffer();
45
46 /**
47 * The current configuration.
48 */
49 protected Configuration config;
50
51 /**
52 * Flag indicating if end of line comments are available in this tokenizer.
53 * This is required because <code>#</code> could be in use as the alternate
54 * string quote within a template.
55 */
56 private boolean eolc;
57
58 /**
59 * Used to store interpolation values, mapping the position in the
60 * interpolated string to the value to be interpolated.
61 */
62 protected EHashMap< Integer, CharSequence > interpolationMap = new EHashMap< Integer, CharSequence >();
63
64 /**
65 * The origin message for this tokenizers input.
66 */
67 private String origin;
68
69 /**
70 * The line number reader which provides the character stream to tokenize.
71 */
72 private LineNumberReader reader;
73
74 /**
75 * Storage for a saved character. This allows the tokenizer to
76 * <em>unread</em> a read character, as the read character may be the start
77 * of the next token.
78 *
79 * <dl>
80 * <dt>-2</dt>
81 * <dd>No pushback character available</dd>
82 * <dt>-1</dt>
83 * <dd>End of file</dd>
84 * <dt>>=0</dt>
85 * <dd>the saved character</dd>
86 * </dl>
87 */
88 private int savedChar = -2;
89
90 /**
91 * Used to indicate the type of token that has just been read.
92 */
93 private TokenType ttype = TokenType.NEED_NEW;
94
95 /**
96 * Used to determine where the tokenizer is during the tokenizing process.
97 * We have to be able to distinguish whether we are tokenizing an XML tag or
98 * attribute name, or the contents of an XML comment. In these situations
99 * the tokenizer must behave differently.
100 *
101 * <dl>
102 * <dt>'n'</dt>
103 * <dd>tag or attribute name</dd>
104 * <dt>'c'</dt>
105 * <dd>comment</dd>
106 * <dt>'?'</dt>
107 * <dd>somewhere else</dd>
108 * </dl>
109 *
110 * @todo this should probably be handled with different classes of
111 * tokenizer
112 */
113 private int where = '?';
114
115 /**
116 * Constructs a new <code>Tokenizer</code> with the specified origin,
117 * source, interactive prompt and end of line comment status.
118 *
119 * @param o the origin message for this tokenizer
120 * @param r the character source to tokenize
121 * @param e flag indicating if end of line comments are supported
122 * @param c the configuration
123 */
124 TokenizerImpl( final String o, final Reader r, final boolean e, final Configuration c ) {
125 this.origin = o;
126 this.reader = new LineNumberReader( r );
127 this.eolc = e;
128 this.config = c;
129 }
130
131 /**
132 * @see org.millscript.millscript.syntax.Tokenizer#getOrigin()
133 */
134 public String getOrigin() {
135 return origin;
136 }
137
138 /**
139 * @see org.millscript.millscript.syntax.Tokenizer#getLineNumber()
140 */
141 public int getLineNumber() {
142
143 return reader.getLineNumber() + 1;
144 }
145
146 /**
147 * @see org.millscript.millscript.syntax.Tokenizer#setWhere(char)
148 */
149 public void setWhere( final char ch ) {
150 if ( "nc?".indexOf( ch ) >= 0 ) {
151 where = ch;
152 }
153 }
154
155 /**
156 * @see org.millscript.millscript.syntax.Tokenizer#checkWhere(char)
157 */
158 public boolean checkWhere( final char ch ) {
159 return where == ch;
160 }
161
162 /**
163 * Returns the integer value of the next character from this tokenizers
164 * input. If a character has previously been pushed back, it will be
165 * returned, otherwise another character will be read from this tokenizers
166 * reader.
167 *
168 * @return the <code>int</code> value of the next character
169 */
170 private int getIntChar() {
171 try {
172 if ( savedChar == -2 ) {
173 int c = reader.read();
174 return c;
175 } else {
176 int x = savedChar;
177 savedChar = -2;
178 return x;
179 }
180 } catch ( IOException ex ) {
181 throw(
182 Alerts.parse(
183 "Could not read from input source",
184 null
185 ).culprit( "message", ex.getMessage() ).mishap()
186 );
187 }
188 }
189
190 /**
191 * Returns the <code>char</code> value of the next character from this
192 * tokenizers input.
193 *
194 * @return the <code>char</code> value of the next character
195 */
196 private char getChar() {
197 int ch = getIntChar();
198 if ( ch == -1 ) {
199 throw(
200 Alerts.parse(
201 "Unexpected end of file",
202 null
203 ).mishap()
204 );
205 }
206 return (char)ch;
207 }
208
209 /**
210 * Pushes back the specified character. This will be the next character
211 * returned by <code>getIntChar</code> or <code>getChar</code>. This method
212 * would be used when a read character is not part of the current token.
213 *
214 * @param ch the character to push back
215 */
216 private void pushBackChar( final int ch ) {
217 savedChar = ch;
218 }
219
220 /**
221 * Adds the specified character to the token being constructed.
222 *
223 * @param ch the character to be appended to the current token
224 */
225 private void addChar( final int ch ) {
226 buff.append( (char)ch );
227 }
228
229 /**
230 * Accepts the specified character as part of the current token and returns
231 * the next character from the input.
232 *
233 * @param ch the character to append to the current token
234 * @return the <code>int</code> value of the next character
235 */
236 private int acceptChar( final int ch ) {
237 addChar( ch );
238 return getIntChar();
239 }
240
241 /**
242 * Accepts the specified character as part of the current token and returns
243 * the next character from the input.
244 *
245 * @param ch the character to append to the current token
246 * @return the <code>char</code> value of the next character
247 */
248 private char okChar( final int ch ) {
249 addChar( ch );
250 return getChar();
251 }
252
253 /**
254 * Determines if the specified character is a MillScript sign character. The
255 * following characters are MillScript signs:
256 *
257 * <ul>
258 * <li><>!$%^&*-+=|:?/~</li>
259 * </ul>
260 *
261 * @param ch the character to test.
262 * @return <code>true</code> if the character is a MillScript sign,
263 * <code>false</code> otherwise.
264 */
265 private static boolean isSign( final char ch ) {
266 return "<>!$%^&*-+=|:?/~".indexOf( ch ) >= 0;
267 }
268
269 /**
270 * Returns the <code>char</code> value of the last character in the current
271 * token, without removing it.
272 *
273 * @return the <code>char</code> value of the last character in the current
274 * token.
275 * @see #cantStick
276 */
277 private char lastCharInBuff() {
278 return buff.charAt( buff.length() - 1 );
279 }
280
281 /**
282 * Determines if the specified character cannot be stuck onto the current
283 * token. This is required to support the tokenization bodge for REXML i.e.
284 * to unstick sequences such as <code>></</code> into
285 * <code>></code> and <code></</code>
286 *
287 * @param ch the character to test
288 * @return <code>true</code> if the character cannot be stuck onto the
289 * last character in the buffer, <code>false</code> otherwise
290 */
291 private boolean cantStick( final char ch ) {
292 return (
293 ch == '<' && lastCharInBuff() == '>' ||
294 ch == '>' && lastCharInBuff() == '<'
295 );
296 }
297
298 /**
299 * Accepts repetitions of the specified character into the current token.
300 * This is currently used to read sequences of periods or <code>@</code>
301 * symbols.
302 *
303 * @param orig the character to read repetitions of
304 */
305 private void readRepetitions( final int orig ) {
306 int ich = orig;
307 while ( orig == ( ich = acceptChar( ich ) ) ) {
308 }
309 pushBackChar( ich );
310 }
311
312 /**
313 * Reads an interpolated section of a string. This method is called when an
314 * escaped <code>(</code> is found in an input string. This handles nested
315 * strings and parenthesis within the interpolated section.
316 *
317 * @return a CharSequence for the interpolated section
318 */
319 private CharSequence readInterpolatedExpr() {
320
321 final StringBuffer interp = new StringBuffer();
322 int state = 0;
323 int parenLevel = 0;
324 final ELinkedList< Character > nestingQuotes = new ELinkedList< Character >();
325 char quote = ' ';
326 for (;;) {
327 final char ch = this.getChar();
328 if ( ch == '\n' || ch == '\r' ) {
329 throw(
330 Alerts.parse(
331 "Line break before end of string",
332 null
333 ).mishap()
334 );
335 }
336 switch ( state ) {
337 case 0:
338 switch ( ch ) {
339 case '"':
340 case '\'':
341 case '`':
342 state = 1;
343 quote = ch;
344 break;
345 case ')':
346 if ( parenLevel == 0 ) {
347 if ( nestingQuotes.isEmtpy() ) {
348 return interp.toString();
349 } else {
350 quote = nestingQuotes.last().charValue();
351 nestingQuotes.deleteLast();
352 state = 1;
353 }
354 } else {
355 parenLevel -= 1;
356 }
357 break;
358 case '(':
359 parenLevel += 1;
360 break;
361 default :
362 break;
363 }
364 break;
365 case 1:
366 if ( ch == quote ) {
367 state = 0;
368 } else if ( ch == '\\' ) {
369 state = 2;
370 }
371 break;
372 case 2:
373 int count = 0;
374 switch ( ch ) {
375 case '(':
376 state = 0;
377 nestingQuotes.addLast( new Character( quote ) );
378 break;
379 case '^':
380 count = 1;
381 break;
382 case '0':
383 count = 3;
384 break;
385 case 'u':
386 count = 4;
387 break;
388 }
389 for ( int i = 0; i < count; i++ ) {
390 final char x = this.getChar();
391 if ( !Character.isLetterOrDigit( ch ) ) {
392 this.config.reportAlertAsWarning(
393 Alerts.parse(
394 "Unexpected backquote sequence",
395 null
396 ).culprit( "char", new Character( x ) )
397 );
398 }
399 }
400 break;
401 default :
402 break;
403 }
404 interp.append( ch );
405 }
406 }
407
408 /**
409 * Adds the specified character sequence as the interpolated section for
410 * the current buffer position. The current buffer position represents the
411 * position in the string that the interpolated section begins.
412 *
413 * @param cs the CharSequence to add as the interpolated section for the
414 * current buffer position.
415 */
416 private void addInterpolation( final CharSequence cs ) {
417 this.interpolationMap.insert( new Integer( this.buff.length() - 1 ), cs );
418 }
419
420 /**
421 * Tokenizes a fat string with the specified character as the string
422 * quotes. The string terminates at the next triple quote sequence.
423 *
424 * @param endChar <code>char</code> value of this strings string quotes.
425 */
426 private void readFatString( final char endChar ) {
427 addChar( endChar );
428 char ch = getChar();
429 while( true ) {
430 if ( ch == '\r' || ch == '\n' ) {
431
432 while ( Character.isWhitespace( ch ) ) {
433 ch = getChar();
434 }
435 } else if ( ch == '|' ) {
436
437
438 ch = getChar();
439 while ( ch != '\r' && ch != '\n' ) {
440 ch = okChar( ch );
441 }
442 } else if ( ch == endChar ) {
443 final char ch2 = getChar();
444 if ( ch2 == endChar ) {
445 final char ch3 = getChar();
446 if ( ch3 == endChar ) {
447
448
449 addChar( endChar );
450 break;
451 } else {
452 throw(
453 Alerts.compile(
454 "Unexpected fat string close quote",
455 "Closing triple-quote must appear on a line by itself"
456 ).culprit(
457 "partial string",
458 getErrorString()
459 ).culprit(
460 "Unexpected close quote",
461 String.copyValueOf( new char[] { ch, ch2, ch3 } )
462 ).mishap()
463 );
464 }
465 } else {
466 throw(
467 Alerts.compile(
468 "Unexpected fat string close quote",
469 "Closing triple-quote must appear on a line by itself"
470 ).culprit(
471 "partial string",
472 getErrorString()
473 ).culprit(
474 "Unexpected close quote",
475 String.copyValueOf( new char[] { ch, ch2 } )
476 ).mishap()
477 );
478 }
479 } else {
480 throw(
481 Alerts.compile(
482 "Unexpected character begining a fat string text line",
483 "Each text line must start with a vertical pipe or the end-string triple-quote"
484 ).culprit(
485 "partial string",
486 getErrorString()
487 ).culprit(
488 "Unexpected character",
489 ch
490 ).mishap()
491 );
492 }
493 }
494 }
495
496 /**
497 * Tokenizes a string with the specified character as the string quotes. The
498 * string terminates at the first unescaped quote character, i.e. by
499 * preceeding the quote character by <code>\</code> you can include it in
500 * the tokenized string.
501 *
502 * @param endChar <code>char</code> value of this strings string quotes.
503 */
504 private void readString( final char endChar ) {
505 addChar( endChar );
506 markReader();
507 char ch = getChar();
508
509
510
511 boolean keepReading = ( ch != endChar );
512
513 while ( keepReading ) {
514
515 if ( ch == '-' ) {
516 ch = okChar( ch );
517 if ( ch == '-' ) {
518 ch = okChar( ch );
519 if ( ch == '>' && where == 'c' ) {
520 resetReader();
521 keepReading = false;
522 }
523 }
524 } else if ( where != 'c' && ch == endChar ) {
525
526
527 keepReading = false;
528 } else if ( ( ch == '\r' || ch == '\n' ) && where != 'c' ) {
529
530 throw(
531 Alerts.compile(
532 "Unterminated string",
533 "A newline or carriage return was encountered before the closing quotes"
534 ).culprit( "partial string", getErrorString() ).mishap()
535 );
536 } else if ( ch == '\\' ) {
537 ch = getChar();
538 if ( ch == endChar ) {
539 ch = okChar( ch );
540 } else if ( ch == '\\' ) {
541 ch = okChar( '\\' );
542 } else if ( ch == 't' ) {
543 ch = okChar( '\t' );
544 } else if ( ch == 'n' ) {
545 ch = okChar( '\n' );
546 } else if ( ch == 'r' ) {
547 ch = okChar( '\r' );
548 } else if ( ch == '&' ) {
549 StringBuffer b = new StringBuffer();
550 ch = getChar();
551 if ( ch == '#' ) {
552 int radix = 10;
553 ch = getChar();
554 if ( ch == 'x' ) {
555
556 ch = getChar();
557 radix = 16;
558 }
559 for (;;) {
560 int n = Character.digit( ch, radix );
561 if ( n == -1 ) {
562 break;
563 }
564 b.append( ch );
565 ch = getChar();
566 }
567 buff.append( (char) Integer.parseInt( b.toString(), radix ) );
568 } else {
569 for (;;) {
570 if ( !Character.isLetter( ch ) ) {
571 break;
572 }
573 b.append( ch );
574 ch = getChar();
575 }
576 String s = b.toString().intern();
577 Character tmp = config.getHTMLCharacterEntity().getCharacterFor( s );
578 if ( tmp == null ) {
579 throw(
580 Alerts.parse(
581 "Unrecognized HTML entity in string",
582 "Not all entities are recognized yet"
583 ).culprit( "entity name", s ).mishap()
584 );
585 } else {
586 buff.append( tmp );
587 }
588 }
589 if ( ch != ';' ) {
590 b.append( ch );
591 String culprit = "&" + b.toString();
592 throw(
593 Alerts.parse(
594 "Unexpected HTML entity sequence in string",
595 "Entities are a sequence of letters terminated by a semi-colon"
596 ).culprit( "entity", culprit ).mishap()
597 );
598 }
599 ch = getChar();
600 } else if ( ch == '(' ) {
601
602 final CharSequence cs = this.readInterpolatedExpr();
603 this.addInterpolation( cs );
604 ch = this.getChar();
605 } else {
606 throw(
607 Alerts.parse(
608 "Unexpected escape sequence in string",
609 null
610 ).
611 culprit( "sequence", "\\" + Character.toString( ch ) ).
612 mishap()
613 );
614 }
615 } else {
616 addChar( ch );
617
618 markReader();
619 ch = getChar();
620 }
621 }
622 addChar( ch );
623 }
624
625 /**
626 * Tokenizes a traditional regular expression with the specified character
627 * as the terminator character. The regular expression terminates at the
628 * first unescaped end character, i.e. by preceeding the quote character by
629 * <code>\</code> you can include it in the tokenized string.
630 *
631 * @param endChar <code>char</code> value of this traditional regular
632 * expression end quote
633 */
634 private void readTraditionalRegex( final char endChar ) {
635
636 addChar( endChar );
637 addChar( endChar );
638
639 char ch = getChar();
640
641 while ( ch != endChar ) {
642 if ( ch == '\r' || ch == '\n' ) {
643
644
645 throw(
646 Alerts.compile(
647 "Unterminated regular expression",
648 "A newline or carriage return was encountered before the closing quote"
649 ).culprit( "partial regular expression", getErrorString() ).mishap()
650 );
651 } else if ( ch == endChar ) {
652
653 break;
654 } else if ( ch == '\\' ) {
655
656 ch = okChar( ch );
657 if ( ch == endChar ) {
658
659
660 ch = okChar( ch );
661 }
662 } else {
663
664
665 addChar( ch );
666 ch = getChar();
667 }
668 }
669
670 addChar( ch );
671
672
673 ch = getChar();
674 while ( "xiumsd".indexOf( ch ) != -1 ) {
675
676 addChar( ch );
677
678 ch = getChar();
679 }
680
681 this.pushBackChar( ch );
682 }
683
684 /**
685 * @see org.millscript.millscript.syntax.Tokenizer#makePattern()
686 */
687 public Pattern makePattern() {
688
689 final String s = this.buff.toString();
690
691 final int n = s.lastIndexOf( '/' );
692
693 final String regex = s.substring( 2, n );
694
695 final String flags = s.substring( n + 1 );
696
697
698 int iflags = 0;
699 for ( int i = 0; i < flags.length(); i++ ) {
700 final char f = flags.charAt( i );
701 switch ( f ) {
702 case 'i' :
703 iflags |= Pattern.CASE_INSENSITIVE;
704 break;
705 case 'x' :
706 iflags |= Pattern.COMMENTS;
707 break;
708 case 's' :
709 iflags |= Pattern.DOTALL;
710 break;
711 case 'm' :
712 iflags |= Pattern.MULTILINE;
713 break;
714 case 'u' :
715 iflags |= Pattern.UNICODE_CASE;
716 break;
717 case 'd' :
718 iflags |= Pattern.UNIX_LINES;
719 break;
720 default :
721 throw(
722 Alerts.fault(
723 "Invalid flag for traditional regex"
724 ).culprit( "flag", new Character( f ) ).mishap()
725 );
726 }
727 }
728
729 return Pattern.compile( regex, iflags );
730 }
731
732 /**
733 * Checks if the next character is the same as the specified one.
734 *
735 * @param ch the character to compare with the next one from the input
736 * @return <code>true</code> if the next character in the input is the
737 * same as the specified one
738 */
739 private boolean tryChar( final char ch ) {
740 final char nch = this.getChar();
741 if ( ch == nch ) {
742 return true;
743 } else {
744 this.pushBackChar( nch );
745 return false;
746 }
747 }
748
749 /**
750 * @see org.millscript.millscript.syntax.Tokenizer#nextToken()
751 */
752 public TokenType nextToken() {
753 if ( ttype == TokenType.NEED_NEW ) {
754 buff.setLength( 0 );
755 this.interpolationMap.removeAll();
756 int ch = getIntChar();
757
758
759
760 if ( where == 'c' && ch != '-' ) {
761 readString( (char)ch );
762 return TokenType.STRING;
763 }
764 while ( Character.isWhitespace( (char)ch ) ) {
765 ch = getIntChar();
766 }
767 if ( ch == -1 ) {
768 return TokenType.EOF;
769 } else if ( ch == '#' && eolc ) {
770
771 for (;;) {
772 ch = getIntChar();
773 if ( ch == '\n' || ch == -1 ) {
774 break;
775 }
776 }
777 return nextToken();
778 } else if ( Character.isLetter( (char)ch ) ) {
779 markReader();
780 ch = acceptChar( ch );
781 while ( Character.isLetterOrDigit( (char)ch ) || ch == '_' || ( ch == '-' && where == 'n' ) ) {
782 markReader();
783 ch = acceptChar( ch );
784 }
785 if ( ch == ':' ) {
786 ch = acceptChar( ch );
787 if ( ch == ':' ) {
788 resetReader();
789 buff.deleteCharAt( buff.length() - 1 );
790 return TokenType.NAME;
791 }
792 while ( Character.isLetterOrDigit( (char)ch ) || ch == '_' || ( ch == '-' && where == 'n' ) ) {
793 ch = acceptChar( ch );
794 }
795 }
796 pushBackChar( ch );
797 return TokenType.NAME;
798 } else if ( Character.isDigit( (char)ch ) || ( ch == '-' && where != 'c' ) ) {
799 ch = acceptChar( ch );
800 while ( Character.isDigit( (char)ch ) ) {
801 ch = acceptChar( ch );
802 }
803 pushBackChar( ch );
804 if ( buff.length() == 1 && buff.charAt( 0 ) == '-' ) {
805 return TokenType.NAME;
806 } else {
807 return TokenType.INTEGER;
808 }
809 } else if ( ch == '"' ) {
810 markReader();
811 ch = getChar();
812 if ( ch == '"' ) {
813 final char ch2 = getChar();
814 if ( ch2 == '"' ) {
815
816 readFatString( (char) ch );
817 } else {
818 resetReader();
819 readString( (char) ch );
820 }
821 } else {
822 resetReader();
823 readString( '"' );
824 }
825 return TokenType.STRING;
826 } else if ( ch == '"' || ch == '\'' || ch == '`' || ch == '#' && !eolc ) {
827 readString( (char)ch );
828 return TokenType.STRING;
829 } else if ( ch == '/' && this.tryChar( '/') ) {
830 readTraditionalRegex( (char) ch );
831 return TokenType.TRADITIONAL_REGEX;
832 } else if ( ch == '.' || ch == '@' ) {
833 readRepetitions( ch );
834 return TokenType.NAME;
835 } else if ( isSign( (char)ch ) ) {
836 ch = acceptChar( ch );
837 while ( isSign( (char)ch ) && !( cantStick( (char)ch ) ) ) {
838 ch = acceptChar( ch );
839 }
840 pushBackChar( ch );
841 return TokenType.NAME;
842 } else {
843 addChar( ch );
844 return TokenType.NAME;
845 }
846 } else {
847 final TokenType tt = ttype;
848 ttype = TokenType.NEED_NEW;
849 return tt;
850 }
851 }
852
853 /**
854 * @see org.millscript.millscript.syntax.Tokenizer#peekToken()
855 */
856 public TokenType peekToken() {
857 return ttype = nextToken();
858 }
859
860 /**
861 * @see org.millscript.millscript.syntax.Tokenizer#dropToken()
862 */
863 public void dropToken() {
864 ttype = TokenType.NEED_NEW;
865 }
866
867 /**
868 * @see org.millscript.millscript.syntax.Tokenizer#tryRead(java.lang.String)
869 */
870 public boolean tryRead( final String sym ) {
871 if ( peekToken() == TokenType.NAME && getName() == sym ) {
872 dropToken();
873 return true;
874 } else {
875 return false;
876 }
877 }
878
879 /**
880 * @see org.millscript.millscript.syntax.Tokenizer#markReader()
881 */
882 public void markReader() {
883 try {
884 reader.mark( 2048 );
885 } catch ( IOException ex ) {
886 throw(
887 Alerts.parse(
888 "Problem with input source",
889 null
890 ).culprit( "message", ex.getMessage() ).mishap()
891 );
892 }
893 }
894
895 /**
896 * @see org.millscript.millscript.syntax.Tokenizer#resetReader()
897 */
898 public void resetReader() {
899 try {
900 reader.reset();
901 } catch ( IOException ex ) {
902 throw(
903 Alerts.parse(
904 "Problem with input source",
905 null
906 ).culprit( "message", ex.getMessage() ).mishap()
907 );
908 }
909 }
910
911 /**
912 * @see org.millscript.millscript.syntax.Tokenizer#peekRead(java.lang.String)
913 */
914 public boolean peekRead( final String sym ) {
915 return peekToken() == TokenType.NAME && getName() == sym;
916 }
917
918 /**
919 * @see org.millscript.millscript.syntax.Tokenizer#mustRead(java.lang.String)
920 */
921 public void mustRead( final String sym ) {
922 if ( !tryRead( sym ) ) {
923 throw(
924 Alerts.parse(
925 "Unexpected symbol",
926 null
927 ).
928 culprit( "found", getErrorString() ).
929 culprit( "wanted", sym ).
930 origin( this ).
931 mishap()
932 );
933 }
934 }
935
936 /**
937 * @see org.millscript.millscript.syntax.Tokenizer#getString()
938 */
939 public String getString() {
940 return buff.toString();
941 }
942
943 /**
944 * @see org.millscript.millscript.syntax.Tokenizer#getErrorString()
945 */
946 public String getErrorString() {
947 return (
948 ttype == TokenType.EOF ? "<end of file>" :
949 buff.toString()
950 );
951
952 }
953
954 /**
955 * @see org.millscript.millscript.syntax.Tokenizer#getAttributeName()
956 */
957 public String getAttributeName() {
958 int size = buff.length();
959 for ( int i = 0; i < size; i++ ) {
960 char ch = buff.charAt( i );
961 if ( !(
962 Character.isLetterOrDigit( ch ) ||
963 "_:-".indexOf( ch ) >= 0
964 ) ) {
965 throw(
966 Alerts.parse(
967 "Invalid attribute name",
968 null
969 ).culprit( "name", buff.toString() ).mishap()
970 );
971 }
972 }
973 return buff.toString();
974 }
975
976 /**
977 * @see org.millscript.millscript.syntax.Tokenizer#getTagName()
978 */
979 public String getTagName() {
980 int size = buff.length();
981 for ( int i = 0; i < size; i++ ) {
982 char ch = buff.charAt( i );
983 if ( !(
984 Character.isLetterOrDigit( ch ) ||
985 "_:-".indexOf( ch ) >= 0
986 ) ) {
987 throw(
988 Alerts.parse(
989 "Invalid tag name",
990 null
991 ).culprit( "name", buff.toString() ).mishap()
992 );
993 }
994 }
995 return buff.toString();
996 }
997
998
999 /**
1000 * @see org.millscript.millscript.syntax.Tokenizer#getStringNoQuotes()
1001 */
1002 public String getStringNoQuotes() {
1003 return this.getStringNoQuotes( 1, 1 );
1004 }
1005
1006 /**
1007 * @see org.millscript.millscript.syntax.Tokenizer#getStringNoQuotes(int, int)
1008 */
1009 public String getStringNoQuotes( final int a, final int b ) {
1010 String s = buff.toString();
1011 return s.substring( a, s.length() - b );
1012 }
1013
1014 /**
1015 * @see org.millscript.millscript.syntax.Tokenizer#getQuoteChar()
1016 */
1017 public char getQuoteChar() {
1018 return buff.charAt( 0 );
1019 }
1020
1021 /**
1022 * @see org.millscript.millscript.syntax.Tokenizer#getName()
1023 */
1024 public String getName() {
1025 return buff.toString().intern();
1026 }
1027
1028 /**
1029 * @see org.millscript.millscript.syntax.Tokenizer#getInt()
1030 */
1031 public int getInt() {
1032 return Integer.parseInt( buff.toString() );
1033 }
1034
1035 /**
1036 * @see org.millscript.commons.alert.AlertOrigin#setContext(java.lang.String, int)
1037 */
1038 public void setContext( final String s, final int n ) {
1039 this.origin = s;
1040
1041 }
1042
1043 /**
1044 * @see org.millscript.commons.alert.AlertOrigin#setLineNumber(int)
1045 */
1046 public void setLineNumber( final int n ) {
1047
1048 }
1049
1050 /**
1051 * @see org.millscript.commons.alert.AlertOrigin#setOrigin(java.lang.String)
1052 */
1053 public void setOrigin( final String o ) {
1054 this.origin = o;
1055 }
1056
1057 }