1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.millscript.office.excel;
22
23 import org.millscript.millscript.expr.Expr;
24 import org.millscript.office.alerts.OfficeAlert;
25 import org.millscript.office.excel.alerts.BIFFAlert;
26 import org.millscript.office.excel.records.formula.AddTokenSyntax;
27 import org.millscript.office.excel.records.formula.Area3dTokenSyntax;
28 import org.millscript.office.excel.records.formula.ArrayTokenSyntax;
29 import org.millscript.office.excel.records.formula.BoolTokenSyntax;
30 import org.millscript.office.excel.records.formula.ConcatTokenSyntax;
31 import org.millscript.office.excel.records.formula.DivTokenSyntax;
32 import org.millscript.office.excel.records.formula.ErrTokenSyntax;
33 import org.millscript.office.excel.records.formula.IntTokenSyntax;
34 import org.millscript.office.excel.records.formula.MissArgTokenSyntax;
35 import org.millscript.office.excel.records.formula.MulTokenSyntax;
36 import org.millscript.office.excel.records.formula.NumberTokenSyntax;
37 import org.millscript.office.excel.records.formula.PowerTokenSyntax;
38 import org.millscript.office.excel.records.formula.RefTokenSyntax;
39 import org.millscript.office.excel.records.formula.StrTokenSyntax;
40 import org.millscript.office.excel.records.formula.SubTokenSyntax;
41 import org.millscript.office.excel.records.formula.UMinusTokenSyntax;
42 import org.millscript.office.excel.records.formula.UPercentTokenSyntax;
43 import org.millscript.office.excel.records.formula.UPlusTokenSyntax;
44 import org.millscript.office.excel.records.substructures.CellRangeAddress;
45 import org.millscript.office.excel.records.substructures.ConstantCachedValue;
46 import org.millscript.office.excel.records.substructures.FormattingRun;
47 import org.millscript.office.excel.versions.BIFFVersion;
48 import org.millscript.office.spreadsheet.formula.AbsoluteCellAddress;
49 import org.millscript.office.spreadsheet.formula.OffsetCellAddress;
50
51 import java.util.HashMap;
52
53 /**
54 *
55 */
56 public class FormulaTokenizer {
57
58 private static HashMap< Integer, FormulaTokenSyntax > TABLE = new HashMap< Integer, FormulaTokenSyntax >();
59
60 static {
61 TABLE.put( 0x03, new AddTokenSyntax() );
62 TABLE.put( 0x3B, new Area3dTokenSyntax() );
63 TABLE.put( 0x5B, new Area3dTokenSyntax() );
64 TABLE.put( 0x7B, new Area3dTokenSyntax() );
65 TABLE.put( 0x20, new ArrayTokenSyntax() );
66 TABLE.put( 0x40, new ArrayTokenSyntax() );
67 TABLE.put( 0x60, new ArrayTokenSyntax() );
68 TABLE.put( 0x1D, new BoolTokenSyntax() );
69 TABLE.put( 0x08, new ConcatTokenSyntax() );
70 TABLE.put( 0x06, new DivTokenSyntax() );
71 TABLE.put( 0x1C, new ErrTokenSyntax() );
72 TABLE.put( 0x1E, new IntTokenSyntax() );
73 TABLE.put( 0x16, new MissArgTokenSyntax() );
74 TABLE.put( 0x05, new MulTokenSyntax() );
75 TABLE.put( 0x1F, new NumberTokenSyntax() );
76 TABLE.put( 0x07, new PowerTokenSyntax() );
77 TABLE.put( 0x24, new RefTokenSyntax() );
78 TABLE.put( 0x44, new RefTokenSyntax() );
79 TABLE.put( 0x64, new RefTokenSyntax() );
80 TABLE.put( 0x04, new SubTokenSyntax() );
81 TABLE.put( 0x17, new StrTokenSyntax() );
82 TABLE.put( 0x14, new UPercentTokenSyntax() );
83 TABLE.put( 0x12, new UPlusTokenSyntax() );
84 TABLE.put( 0x13, new UMinusTokenSyntax() );
85 }
86
87 private Expr< ? >[] actionStack = new Expr< ? >[ 128 ];
88
89 private int actionStackIndex = 0;
90
91 private final int additionalDataOffset;
92
93 private final RecordTokenizer recordTokenizer;
94
95 private int storedAdditionalDataOffset = -1;
96
97 private int storedFormulaDataOffset = -1;
98
99 public FormulaTokenizer( final RecordTokenizer tokenizer, final int formulaDataSize ) {
100 this.recordTokenizer = tokenizer;
101
102 this.additionalDataOffset = tokenizer.getCurrentRecordDataOffset() + formulaDataSize;
103 }
104
105 public String decode( final byte[] data ) {
106 return this.recordTokenizer.decode( data );
107 }
108
109 /**
110 * @see org.millscript.office.excel.RecordTokenizer#getBiffVersion()
111 */
112 public BIFFVersion getBiffVersion() {
113 return recordTokenizer.getBiffVersion();
114 }
115
116 public Expr< ? > parse() {
117
118 while ( this.recordTokenizer.getCurrentRecordDataOffset() != this.additionalDataOffset ) {
119 final int tokenId = this.recordTokenizer.readUnsignedByte();
120
121 final FormulaTokenSyntax syntax = TABLE.get( tokenId );
122
123
124 if ( syntax == null ) {
125
126 throw new BIFFAlert(
127 "This formula token is not supported"
128 ).culprit(
129 "identifier",
130 tokenId
131 ).mishap();
132 } else if ( this.recordTokenizer.checkBIFFVersionSupportsRecord( syntax ) ) {
133
134 syntax.newToken( this );
135 } else {
136
137 throw new BIFFAlert(
138 "This record is not supported by the current BIFF version"
139 ).culprit(
140 "identifier",
141 tokenId
142 ).culprit(
143 "record",
144 syntax
145 ).mishap();
146 }
147 }
148
149
150 if ( this.actionStackIndex == 1 ) {
151 return this.actionStack[ 0 ];
152 } else {
153 throw new OfficeAlert(
154 "There was an unexpected problem reading a formula"
155 ).mishap();
156 }
157 }
158
159 public Expr< ? > popExpr() {
160
161
162
163
164
165
166 return this.actionStack[ --this.actionStackIndex ];
167 }
168
169 public void pushExpr( final Expr< ? > expr ) {
170 if ( this.actionStackIndex >= this.actionStack.length ) {
171
172 final Expr< ? >[] newArray = new Expr< ? >[ (int) ( this.actionStack.length * 1.5 ) ];
173 System.arraycopy( this.actionStackIndex, 0, newArray, 0, this.actionStack.length );
174 this.actionStack = newArray;
175 }
176
177 this.actionStack[ this.actionStackIndex++ ] = expr;
178 }
179
180 /**
181 * @see org.millscript.office.excel.RecordTokenizer#read2ByteChar()
182 */
183 public char read2ByteChar() {
184 return recordTokenizer.read2ByteChar();
185 }
186
187 /**
188 * @see org.millscript.office.excel.RecordTokenizer#read2ByteInt()
189 */
190 public int read2ByteInt() {
191 return recordTokenizer.read2ByteInt();
192 }
193
194 /**
195 * @see org.millscript.office.excel.RecordTokenizer#read4ByteInt()
196 */
197 public int read4ByteInt() {
198 return recordTokenizer.read4ByteInt();
199 }
200
201 /**
202 * @see org.millscript.office.excel.RecordTokenizer#read8ByteDouble()
203 */
204 public double read8ByteDouble() {
205 return recordTokenizer.read8ByteDouble();
206 }
207
208 /**
209 * @see org.millscript.office.excel.RecordTokenizer#readAbsoluteCellAddress()
210 */
211 public AbsoluteCellAddress readAbsoluteCellAddress() {
212 return recordTokenizer.readAbsoluteCellAddress();
213 }
214
215 /**
216 * @see org.millscript.office.excel.RecordTokenizer#readByte()
217 */
218 public byte readByte() {
219 return recordTokenizer.readByte();
220 }
221
222 /**
223 * @see org.millscript.office.excel.RecordTokenizer#readBytes(int)
224 */
225 public byte[] readBytes( final int length ) {
226 return recordTokenizer.readBytes( length );
227 }
228
229 /**
230 * @see org.millscript.office.excel.RecordTokenizer#readBytesInto(byte[])
231 */
232 public byte[] readBytesInto( final byte[] bytes ) {
233 return recordTokenizer.readBytesInto( bytes );
234 }
235
236 /**
237 * @see org.millscript.office.excel.RecordTokenizer#readCellRangeAddress()
238 */
239 public CellRangeAddress readCellRangeAddress() {
240 return recordTokenizer.readCellRangeAddress();
241 }
242
243 /**
244 * @see org.millscript.office.excel.RecordTokenizer#readCharArray(int)
245 */
246 public char[] readCharArray( final int length ) {
247 return recordTokenizer.readCharArray( length );
248 }
249
250 /**
251 * @see org.millscript.office.excel.RecordTokenizer#readCompressedCharArray(int)
252 */
253 public char[] readCompressedCharArray( final int length ) {
254 return recordTokenizer.readCompressedCharArray( length );
255 }
256
257 /**
258 * @see org.millscript.office.excel.RecordTokenizer#readConstantCachedValueArray()
259 * <p>
260 * This method always reads the constant cached value array from the
261 * additional data section of the formula.
262 * </p>
263 */
264 public ConstantCachedValue[] readConstantCachedValueArray() {
265 this.setupToReadAdditionalData();
266 final ConstantCachedValue[] values = this.recordTokenizer.readConstantCachedValueArray();
267 this.setupToReadFormulaData();
268 return values;
269 }
270
271 /**
272 * @see org.millscript.office.excel.RecordTokenizer#readFormattingRun()
273 */
274 public FormattingRun readFormattingRun() {
275 return recordTokenizer.readFormattingRun();
276 }
277
278 /**
279 * @see org.millscript.office.excel.RecordTokenizer#readOffsetCellAddress()
280 */
281 public OffsetCellAddress readOffsetCellAddress() {
282 return recordTokenizer.readOffsetCellAddress();
283 }
284
285 /**
286 * @see org.millscript.office.excel.RecordTokenizer#readShortString()
287 */
288 public String readShortString() {
289 return recordTokenizer.readShortString();
290 }
291
292 /**
293 * @see org.millscript.office.excel.RecordTokenizer#readString(int)
294 */
295 public String readString( final int length ) {
296 return recordTokenizer.readString( length );
297 }
298
299 /**
300 * @see org.millscript.office.excel.RecordTokenizer#readUnsignedByte()
301 */
302 public int readUnsignedByte() {
303 return recordTokenizer.readUnsignedByte();
304 }
305
306 /**
307 * @see org.millscript.office.excel.RecordTokenizer#readUnsigned2Byte()
308 */
309 public char readUnsigned2Byte() {
310 return recordTokenizer.readUnsigned2Byte();
311 }
312
313 protected void setupToReadAdditionalData() {
314 if ( this.storedAdditionalDataOffset == -1 ) {
315 throw new BIFFAlert(
316 "Trying to return to additional data before formula data read"
317 ).mishap();
318 } else {
319
320 this.storedFormulaDataOffset = this.recordTokenizer.getCurrentRecordDataOffset();
321
322 this.recordTokenizer.setCurrentRecordDataOffset(
323 this.storedAdditionalDataOffset
324 );
325 }
326 }
327
328 protected void setupToReadFormulaData() {
329 if ( this.storedFormulaDataOffset == -1 ) {
330 throw new BIFFAlert(
331 "Trying to return to formula data before additional data read"
332 ).mishap();
333 } else {
334
335 this.storedAdditionalDataOffset = this.recordTokenizer.getCurrentRecordDataOffset();
336
337 this.recordTokenizer.setCurrentRecordDataOffset(
338 this.storedFormulaDataOffset
339 );
340 }
341 }
342
343 /**
344 * @see org.millscript.office.excel.RecordTokenizer#skipBytes(int)
345 */
346 public void skipBytes( int skip ) {
347 recordTokenizer.skipBytes( skip );
348 }
349
350 }