View Javadoc

1   ////////////////////////////////////////////////////////////////////////////////
2   // MillScript-Excel: an Open Spice interpreter and batch website creation tool
3   // Copyright (C) 2006 Open World Ltd, Kevin Rogers
4   //
5   // This file is part of MillScript-Excel.
6   //
7   // MillScript-Excel is free software; you can redistribute it and/or modify it under
8   // the terms of the GNU General Public License as published by the Free
9   // Software Foundation; either version 2 of the License, or (at your option)
10  // any later version.
11  //
12  // MillScript-Excel is distributed in the hope that it will be useful, but WITHOUT
13  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  // more details.
16  //
17  // You should have received a copy of the GNU General Public License along with
18  // MillScript-Excel; if not, write to the Free Software Foundation, Inc., 59 Temple
19  // Place, Suite 330, Boston, MA  02111-1307  USA
20  ////////////////////////////////////////////////////////////////////////////////
21  package org.millscript.office.excel;
22  
23  import org.millscript.millscript.expr.Expr;
24  import org.millscript.office.alerts.OfficeAlert;
25  import org.millscript.office.excel.alerts.BIFFAlert;
26  import org.millscript.office.excel.records.formula.AddTokenSyntax;
27  import org.millscript.office.excel.records.formula.Area3dTokenSyntax;
28  import org.millscript.office.excel.records.formula.ArrayTokenSyntax;
29  import org.millscript.office.excel.records.formula.BoolTokenSyntax;
30  import org.millscript.office.excel.records.formula.ConcatTokenSyntax;
31  import org.millscript.office.excel.records.formula.DivTokenSyntax;
32  import org.millscript.office.excel.records.formula.ErrTokenSyntax;
33  import org.millscript.office.excel.records.formula.IntTokenSyntax;
34  import org.millscript.office.excel.records.formula.MissArgTokenSyntax;
35  import org.millscript.office.excel.records.formula.MulTokenSyntax;
36  import org.millscript.office.excel.records.formula.NumberTokenSyntax;
37  import org.millscript.office.excel.records.formula.PowerTokenSyntax;
38  import org.millscript.office.excel.records.formula.RefTokenSyntax;
39  import org.millscript.office.excel.records.formula.StrTokenSyntax;
40  import org.millscript.office.excel.records.formula.SubTokenSyntax;
41  import org.millscript.office.excel.records.formula.UMinusTokenSyntax;
42  import org.millscript.office.excel.records.formula.UPercentTokenSyntax;
43  import org.millscript.office.excel.records.formula.UPlusTokenSyntax;
44  import org.millscript.office.excel.records.substructures.CellRangeAddress;
45  import org.millscript.office.excel.records.substructures.ConstantCachedValue;
46  import org.millscript.office.excel.records.substructures.FormattingRun;
47  import org.millscript.office.excel.versions.BIFFVersion;
48  import org.millscript.office.spreadsheet.formula.AbsoluteCellAddress;
49  import org.millscript.office.spreadsheet.formula.OffsetCellAddress;
50  
51  import java.util.HashMap;
52  
53  /**
54   * 
55   */
56  public class FormulaTokenizer {
57  
58      private static HashMap< Integer, FormulaTokenSyntax > TABLE = new HashMap< Integer, FormulaTokenSyntax >();
59  
60      static {
61          TABLE.put( 0x03, new AddTokenSyntax() );
62          TABLE.put( 0x3B, new Area3dTokenSyntax() );
63          TABLE.put( 0x5B, new Area3dTokenSyntax() );
64          TABLE.put( 0x7B, new Area3dTokenSyntax() );
65          TABLE.put( 0x20, new ArrayTokenSyntax() );
66          TABLE.put( 0x40, new ArrayTokenSyntax() );
67          TABLE.put( 0x60, new ArrayTokenSyntax() );
68          TABLE.put( 0x1D, new BoolTokenSyntax() );
69          TABLE.put( 0x08, new ConcatTokenSyntax() );
70          TABLE.put( 0x06, new DivTokenSyntax() );
71          TABLE.put( 0x1C, new ErrTokenSyntax() );
72          TABLE.put( 0x1E, new IntTokenSyntax() );
73          TABLE.put( 0x16, new MissArgTokenSyntax() );
74          TABLE.put( 0x05, new MulTokenSyntax() );
75          TABLE.put( 0x1F, new NumberTokenSyntax() );
76          TABLE.put( 0x07, new PowerTokenSyntax() );
77          TABLE.put( 0x24, new RefTokenSyntax() );
78          TABLE.put( 0x44, new RefTokenSyntax() );
79          TABLE.put( 0x64, new RefTokenSyntax() );
80          TABLE.put( 0x04, new SubTokenSyntax() );
81          TABLE.put( 0x17, new StrTokenSyntax() );
82          TABLE.put( 0x14, new UPercentTokenSyntax() );
83          TABLE.put( 0x12, new UPlusTokenSyntax() );
84          TABLE.put( 0x13, new UMinusTokenSyntax() );
85      }
86  
87      private Expr< ? >[] actionStack = new Expr< ? >[ 128 ];
88  
89      private int actionStackIndex = 0;
90  
91      private final int additionalDataOffset;
92  
93      private final RecordTokenizer recordTokenizer;
94  
95      private int storedAdditionalDataOffset = -1;
96  
97      private int storedFormulaDataOffset = -1;
98  
99      public FormulaTokenizer( final RecordTokenizer tokenizer, final int formulaDataSize ) {
100         this.recordTokenizer = tokenizer;
101         // Calculate the additional data offset
102         this.additionalDataOffset = tokenizer.getCurrentRecordDataOffset() + formulaDataSize;
103     }
104 
105     public String decode( final byte[] data ) {
106         return this.recordTokenizer.decode( data );
107     }
108 
109     /**
110      * @see org.millscript.office.excel.RecordTokenizer#getBiffVersion()
111      */
112     public BIFFVersion getBiffVersion() {
113         return recordTokenizer.getBiffVersion();
114     }
115 
116     public Expr< ? > parse() {
117         // Read all the formulas tokens
118         while ( this.recordTokenizer.getCurrentRecordDataOffset() != this.additionalDataOffset ) {
119             final int tokenId = this.recordTokenizer.readUnsignedByte();
120             // Get the formula token syntax for this record
121             final FormulaTokenSyntax syntax = TABLE.get( tokenId );
122             // Did we find a syntax for this record identifier and is that syntax
123             // supported by this BIFF file version?
124             if ( syntax == null ) {
125                 // Record is not known version
126                 throw new BIFFAlert(
127                     "This formula token is not supported"
128                 ).culprit(
129                     "identifier",
130                     tokenId
131                 ).mishap();
132             } else if ( this.recordTokenizer.checkBIFFVersionSupportsRecord( syntax ) ) {
133                 // Parse the formula token using this syntax
134                 syntax.newToken( this );
135             } else {
136                 // Record is not supported in this BIFF version
137                 throw new BIFFAlert(
138                     "This record is not supported by the current BIFF version"
139                 ).culprit(
140                     "identifier",
141                     tokenId
142                 ).culprit(
143                     "record",
144                     syntax
145                 ).mishap();
146             }
147         }
148         // There should be a single action left on the stack, otherwise there
149         // was a problem converting the RPN array
150         if ( this.actionStackIndex == 1 ) {
151             return this.actionStack[ 0 ];
152         } else {
153             throw new OfficeAlert(
154                 "There was an unexpected problem reading a formula"
155             ).mishap();
156         }
157     }
158 
159     public Expr< ? > popExpr() {
160         // Decrement the index before indexing the array. Remember that the
161         // index is the index for the next element and hence one greater
162         // than we need now.
163         // NOTE - we deliberately allow this code to blow up if you try to pop
164         // an object off an empty stack. You're going to get an exception
165         // anyway so it might as well be an index out of bounds.
166         return this.actionStack[ --this.actionStackIndex ];
167     }
168 
169     public void pushExpr( final Expr< ? > expr ) {
170         if ( this.actionStackIndex >= this.actionStack.length ) {
171             // We need to grow the array
172             final Expr< ? >[] newArray = new Expr< ? >[ (int) ( this.actionStack.length * 1.5 ) ];
173             System.arraycopy( this.actionStackIndex, 0, newArray, 0, this.actionStack.length );
174             this.actionStack = newArray;
175         }
176         // "push" the new value on the end
177         this.actionStack[ this.actionStackIndex++ ] = expr;
178     }
179 
180     /**
181      * @see org.millscript.office.excel.RecordTokenizer#read2ByteChar()
182      */
183     public char read2ByteChar() {
184         return recordTokenizer.read2ByteChar();
185     }
186 
187     /**
188      * @see org.millscript.office.excel.RecordTokenizer#read2ByteInt()
189      */
190     public int read2ByteInt() {
191         return recordTokenizer.read2ByteInt();
192     }
193 
194     /**
195      * @see org.millscript.office.excel.RecordTokenizer#read4ByteInt()
196      */
197     public int read4ByteInt() {
198         return recordTokenizer.read4ByteInt();
199     }
200 
201     /**
202      * @see org.millscript.office.excel.RecordTokenizer#read8ByteDouble()
203      */
204     public double read8ByteDouble() {
205         return recordTokenizer.read8ByteDouble();
206     }
207 
208     /**
209      * @see org.millscript.office.excel.RecordTokenizer#readAbsoluteCellAddress()
210      */
211     public AbsoluteCellAddress readAbsoluteCellAddress() {
212         return recordTokenizer.readAbsoluteCellAddress();
213     }
214 
215     /**
216      * @see org.millscript.office.excel.RecordTokenizer#readByte()
217      */
218     public byte readByte() {
219         return recordTokenizer.readByte();
220     }
221 
222     /**
223      * @see org.millscript.office.excel.RecordTokenizer#readBytes(int)
224      */
225     public byte[] readBytes( final int length ) {
226         return recordTokenizer.readBytes( length );
227     }
228 
229     /**
230      * @see org.millscript.office.excel.RecordTokenizer#readBytesInto(byte[])
231      */
232     public byte[] readBytesInto( final byte[] bytes ) {
233         return recordTokenizer.readBytesInto( bytes );
234     }
235 
236     /**
237      * @see org.millscript.office.excel.RecordTokenizer#readCellRangeAddress()
238      */
239     public CellRangeAddress readCellRangeAddress() {
240         return recordTokenizer.readCellRangeAddress();
241     }
242 
243     /**
244      * @see org.millscript.office.excel.RecordTokenizer#readCharArray(int)
245      */
246     public char[] readCharArray( final int length ) {
247         return recordTokenizer.readCharArray( length );
248     }
249 
250     /**
251      * @see org.millscript.office.excel.RecordTokenizer#readCompressedCharArray(int)
252      */
253     public char[] readCompressedCharArray( final int length ) {
254         return recordTokenizer.readCompressedCharArray( length );
255     }
256 
257     /**
258      * @see org.millscript.office.excel.RecordTokenizer#readConstantCachedValueArray()
259      * <p>
260      * This method always reads the constant cached value array from the
261      * additional data section of the formula.
262      * </p>
263      */
264     public ConstantCachedValue[] readConstantCachedValueArray() {
265         this.setupToReadAdditionalData();
266         final ConstantCachedValue[] values = this.recordTokenizer.readConstantCachedValueArray();
267         this.setupToReadFormulaData();
268         return values;
269     }
270 
271     /**
272      * @see org.millscript.office.excel.RecordTokenizer#readFormattingRun()
273      */
274     public FormattingRun readFormattingRun() {
275         return recordTokenizer.readFormattingRun();
276     }
277 
278     /**
279      * @see org.millscript.office.excel.RecordTokenizer#readOffsetCellAddress()
280      */
281     public OffsetCellAddress readOffsetCellAddress() {
282         return recordTokenizer.readOffsetCellAddress();
283     }
284 
285     /**
286      * @see org.millscript.office.excel.RecordTokenizer#readShortString()
287      */
288     public String readShortString() {
289         return recordTokenizer.readShortString();
290     }
291 
292     /**
293      * @see org.millscript.office.excel.RecordTokenizer#readString(int)
294      */
295     public String readString( final int length ) {
296         return recordTokenizer.readString( length );
297     }
298 
299     /**
300      * @see org.millscript.office.excel.RecordTokenizer#readUnsignedByte()
301      */
302     public int readUnsignedByte() {
303         return recordTokenizer.readUnsignedByte();
304     }
305 
306     /**
307      * @see org.millscript.office.excel.RecordTokenizer#readUnsigned2Byte()
308      */
309     public char readUnsigned2Byte() {
310         return recordTokenizer.readUnsigned2Byte();
311     }
312 
313     protected void setupToReadAdditionalData() {
314         if ( this.storedAdditionalDataOffset == -1 ) {
315             throw new BIFFAlert(
316                 "Trying to return to additional data before formula data read"
317             ).mishap();
318         } else {
319             // Store the current offset in the formula data
320             this.storedFormulaDataOffset = this.recordTokenizer.getCurrentRecordDataOffset();
321             // Move the offset to that in the additional data
322             this.recordTokenizer.setCurrentRecordDataOffset(
323                 this.storedAdditionalDataOffset
324             );
325         }
326     }
327 
328     protected void setupToReadFormulaData() {
329         if ( this.storedFormulaDataOffset == -1 ) {
330             throw new BIFFAlert(
331                 "Trying to return to formula data before additional data read"
332             ).mishap();
333         } else {
334             // Store the current offset in the additional data
335             this.storedAdditionalDataOffset = this.recordTokenizer.getCurrentRecordDataOffset();
336             // Move the offset to that in the formula data
337             this.recordTokenizer.setCurrentRecordDataOffset(
338                 this.storedFormulaDataOffset
339             );
340         }
341     }
342 
343     /**
344      * @see org.millscript.office.excel.RecordTokenizer#skipBytes(int)
345      */
346     public void skipBytes( int skip ) {
347         recordTokenizer.skipBytes( skip );
348     }
349 
350 }