1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.millscript.millscript.syntax;
22
23 import org.millscript.commons.alert.AlertOrigin;
24
25 import java.util.regex.Pattern;
26
27 /**
28 * This defines the MillScript tokenizer interface.
29 */
30 public interface Tokenizer extends AlertOrigin {
31
32 /**
33 * Check if the tokenzier is using the specified strategy for reading names
34 * and strings.
35 *
36 * @param ch the test strategy
37 * @return <code>true</code> if the test and current strategy are the same,
38 * otherwise <code>false</code>
39 */
40 boolean checkWhere( final char ch );
41
42 /**
43 * Drops the current token, ready to read the next one.
44 */
45 void dropToken();
46
47 /**
48 * Returns the current token, checking it is a valid XML element attribute
49 * name.
50 *
51 * @return a string holding the current token, which will be a valid XML
52 * element attribute name
53 */
54 String getAttributeName();
55
56 /**
57 * Returns the current token for use as an error message. As such it will
58 * be the current token, or a special message if the end of file has been
59 * reached.
60 *
61 * @return a string holding the current token as an error message
62 */
63 String getErrorString();
64
65 /**
66 * Returns the current token as an int.
67 *
68 * @return the int value of the current token
69 */
70 int getInt();
71
72 /**
73 * Returns the line number of the current token.
74 *
75 * @return the line number of the current token
76 */
77 int getLineNumber();
78
79 /**
80 * Returns the current token as a name, or interned string.
81 *
82 * @return an interned string for the current token
83 */
84 String getName();
85
86 /**
87 * Returns the origin of this tokenizers character source.
88 *
89 * @return this tokenziers character source origin
90 */
91 String getOrigin();
92
93 /**
94 * Returns the first character of the current token, which will be the
95 * quote character for a string token.
96 *
97 * @return the first character of the current token
98 */
99 char getQuoteChar();
100
101 /**
102 * Returns the current token as a string.
103 *
104 * @return a string holding the current token
105 */
106 String getString();
107
108 /**
109 * Returns the current token as a string, without the surrounding quotes.
110 *
111 * @return a String with the current token, leading and trailing
112 * characters removed
113 */
114 String getStringNoQuotes();
115
116 /**
117 * Returns the current token as a string, with the specified number of
118 * characters removed from the start and end.
119 *
120 * @param a the number of character to remove from the start of the current
121 * token
122 * @param b the number of character to remove from the end of the current
123 * token
124 * @return a String with the current token, with the specified number of
125 * characters removed from the start and end
126 */
127 String getStringNoQuotes( final int a, final int b );
128
129 /**
130 * Returns the current token, checking it is a valid XML element name.
131 *
132 * @return a string holding the current token, which will be a valid XML
133 * element name
134 */
135 String getTagName();
136
137 /**
138 * Returns a new tradition regular expression <code>Pattern</code>, made
139 * from the contents of the current buffer. It looks for the last
140 * <code>/</code> in the string to see if there are any flags specified.
141 *
142 * @return a new <code>Pattern</code> for the current buffer
143 */
144 Pattern makePattern();
145
146 /**
147 * Marks the current position in the reader, so we can jump back to the
148 * marked position if required.
149 */
150 void markReader();
151
152 /**
153 * Tests if the next token is the same as the specified string. If
154 * successful, the token will be dropped, otherwise an alert will be
155 * raised.
156 *
157 * @param sym the token we want to try and read
158 */
159 void mustRead( final String sym );
160
161 /**
162 * Returns the type of the next token. It's unlikely that the
163 * {@linkplain TokenType#NEED_NEW} token will be returned by this method.
164 *
165 * @return the {@code TokenType} of the next token
166 */
167 TokenType nextToken();
168
169 /**
170 * Peeks a look at the next token and compares it to the specified value.
171 *
172 * @param sym the token we want to try and read
173 * @return <code>true</code> if the next token is the same as the one
174 * specified, <code>false</code> otherwise
175 */
176 boolean peekRead( final String sym );
177
178 /**
179 * Peeks a look at the type of the next token to be returned by
180 * <code>nextToken</code>.
181 *
182 * @return the {@code TokenType} of the next token to be returned by a
183 * call to {@linkplain #nextToken()}
184 */
185 TokenType peekToken();
186
187 /**
188 * Resets the reader to the previously marked position.
189 */
190 void resetReader();
191
192 /**
193 * Sets the tokenizers current strategy for reading names and strings. This
194 * is used to let the tokenizer know when it's reading an XML tag name,
195 * attribute name or comment.
196 *
197 * @param ch the new strategy
198 * @see #where
199 */
200 void setWhere( final char ch );
201
202 /**
203 * Tests if the next token is the same as the specified string. If
204 * successful, the token will be dropped.
205 *
206 * @param sym the token we want to try and read
207 * @return <code>true</code> if the next token is the same as the one
208 * specified, <code>false</code> otherwise
209 */
210 boolean tryRead( final String sym );
211
212 }