View Javadoc

1   ////////////////////////////////////////////////////////////////////////////////
2   // MillScript: an Open Spice interpreter and batch website creation tool
3   // Copyright (C) 2001-2004 Open World Ltd
4   //
5   // This file is part of MillScript.
6   //
7   // MillScript is free software; you can redistribute it and/or modify it under
8   // the terms of the GNU General Public License as published by the Free
9   // Software Foundation; either version 2 of the License, or (at your option)
10  // any later version.
11  //
12  // MillScript is distributed in the hope that it will be useful, but WITHOUT
13  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  // more details.
16  //
17  // You should have received a copy of the GNU General Public License along with
18  // MillScript; if not, write to the Free Software Foundation, Inc., 59 Temple
19  // Place, Suite 330, Boston, MA  02111-1307  USA
20  ////////////////////////////////////////////////////////////////////////////////
21  package org.millscript.millscript.loaders;
22  
23  import org.millscript.commons.util.IList;
24  import org.millscript.commons.util.list.EArrayList;
25  import org.millscript.commons.vfs.VFile;
26  import org.millscript.millscript.alert.Alerts;
27  
28  import java.io.IOException;
29  
30  /**
31   * This class implements a separated file format loader for MillScript. The
32   * contents of the file are read as text, which is then broken up into lines.
33   * Each line is broken up into sections, based on the separator registered with
34   * this loader.
35   */
36  public class SVLoader extends AbsTextLoader {
37  
38      /**
39       * The separated file field delimiter. This is used to delimit
40       * fields that contain either the field or record separator.
41       */
42      private String delimiter = "\"";
43  
44      /**
45       * The escaped version of the delimiter character. This is used
46       * when reading a quoted value that contains the delimiter
47       * character.
48       */
49      private String escapedDelimiter = delimiter + delimiter;
50  
51      /**
52       * The separated file field separator. This is used to separate
53       * individual fields within a record.
54       */
55      private String separator = ",";
56  
57      /**
58       * Holds the complete contents of the SV file as a String.
59       */
60      private String svFileContents = "";
61  
62      /**
63       * Holds the current position in the sv file.
64       */
65      private int currentPos = 0;
66  
67      /**
68       * Flag to indicate if we must still trim leading white space
69       * from the field.
70       */
71      private boolean trimLeading = true;
72  
73      /**
74       * This holds the current field that we are reading.
75       */
76      private StringBuffer currentField;
77  
78      /**
79       * Temporarily holds the last unquoted section of the
80       * current field, so that we can remove trailing white space
81       * if necessary.
82       */
83      private StringBuffer lastUnquotedSection;
84  
85      /**
86       * This will hold each field in a record from the SV file
87       */
88      private EArrayList< String > currentRecord;
89  
90      /**
91       * @see org.millscript.millscript.loaders.Loader#loadValue()
92       */
93      @Override
94      public Object loadValue() throws IOException {
95  
96          // This string is used to separate records in the SV file.
97          String recordSeparator = System.getProperty( "line.separator" );
98  
99          if ( this.entry instanceof VFile && this.entry.exists() ) {
100             // Read the complete contents of the SV file into our
101             // storage variable.
102             svFileContents = readAll( ((VFile) entry).getReader() );
103         } else {
104             throw(
105                 Alerts.compile(
106                     "Inventory entry is not a file",
107                     "Separated value loader can only be used to load files"
108                 ).culprit( "entry", this.entry ).mishap()
109             );
110         }
111 
112         // This will hold each record of the SV file
113         // after we've processed it.
114         EArrayList< IList< String > > records = new EArrayList< IList< String > >();
115 
116         // Reset the current record holder.
117         currentRecord = new EArrayList< String >();
118 
119         // Reset the current field holder.
120         currentField = new StringBuffer();
121 
122         // Reset the last unquoted section.
123         lastUnquotedSection = new StringBuffer();
124 
125         // Reset the current position in the sv file
126         currentPos = 0;
127 
128         // Reset the trim leading whitespace flag.
129         trimLeading = true;
130 
131         while ( currentPos < svFileContents.length() ) {
132 
133             if ( svFileContents.startsWith( separator, currentPos ) ) {
134 
135                 // System.err.println( "ENDFIELD: Adding field to current record" );
136 
137                 addField();
138 
139                 // Increment the current position by the length
140                 // of the field separator.
141                 currentPos += separator.length();
142 
143             } else if ( svFileContents.startsWith( recordSeparator, currentPos ) ) {
144 
145                 // System.err.println( "ENDRECORD: Adding last field to current record" );
146 
147                 addField();
148 
149                 // System.err.println( "ENDRECORD: Adding current record to records" );
150 
151                 // Add the current record to the list of records
152                 // we've read from the SV file so far.
153                 records.addLast( currentRecord );
154 
155                 // Now reset the current record ready to read
156                 // the next one.
157                 currentRecord = new EArrayList< String >();
158 
159                 // Increment the current position by the length
160                 // of the field separator.
161                 currentPos += recordSeparator.length();
162 
163             } else if ( svFileContents.startsWith( delimiter, currentPos ) ) {
164 
165                 // System.err.println( "Reading quoted field/value" );
166 
167                 addLastUnquotedSectionToField();
168 
169                 // Read the quoted field/value, appending it to
170                 // the current field buffer.
171                 readQuotedField();
172 
173             } else {
174 
175                 // System.err.println( "Adding '" + svFileContents.charAt( currentPos ) + "' to current field" );
176 
177                 // This is just a normal character, so we append it
178                 // to the current field.
179                 lastUnquotedSection.append( svFileContents.charAt( currentPos ) );
180 
181                 // Increment the current position by one, so that we
182                 // move on to the next character.
183                 currentPos++;
184 
185             }
186 
187         }
188 
189         return records;
190 
191     }
192 
193     /**
194      * Adds the last unquoted section of a field/value to the
195      * current field buffer. This is done whenever we are about
196      * to start reading a quoted section or we have reached the
197      * end of the field.
198      */
199     private void addLastUnquotedSectionToField() {
200 
201         // Add the last unquoted section of the field
202         currentField.append( lastUnquotedSection );
203 
204         // Reset the last unquoted section buffer, so that
205         // we are ready to read the next field/value.
206         lastUnquotedSection = new StringBuffer();
207 
208         // Remove any leading whitespace from the front of this field, if
209         // appropriate.
210         trimLeadingWhitespace();
211 
212     }
213 
214     /**
215      * Adds the current field to the current record, resetting
216      * the important state information.
217      */
218     private void addField() {
219 
220         // System.err.println( "ADDFIELD: Adding field to current record" );
221 
222         // Remove any trailing whitespace from the last unquoted
223         // section.
224         trimTrailingWhitespace();
225 
226         // Add the last unquoted section.
227         addLastUnquotedSectionToField();
228 
229         // Add the current field to the current record.
230         currentRecord.addLast( currentField.toString() );
231 
232         // Now reset the current field ready to read
233         // the next one.
234         currentField = new StringBuffer();
235 
236         // Reset the trim leading white space flag, for the new field.
237         trimLeading = true;
238 
239     }
240 
241     /**
242      * Set the separator for this separated value file.
243      *
244      * @param s the separator for this file
245      */
246     public void setSeparator( final String s ) {
247 
248         separator = s;
249 
250     }
251 
252     /**
253      * Set the delimiter for quoted fields/values in the separated
254      * value file.
255      *
256      * @param d the delimiter for quoted values
257      */
258     public void setDelimeter( final String d ) {
259 
260         // Set the delimiter
261         delimiter = d;
262 
263         // Set the escaped version of the delimiter.
264         escapedDelimiter = delimiter + delimiter;
265 
266     }
267 
268     /**
269      * Reads a quoted value from the SV file. A quoted value starts
270      * with the delimiter and continues to the next single delimiter.
271      * Within a quoted field the delimiter is represented by a pair
272      * of the delimiter characters.
273      */
274     private void readQuotedField() {
275 
276         // Add the character at the current position
277         // (which is the delimiter at the start of the quoted value)
278         // currentField.append( delimiter );
279 
280         // Increment the current position, otherwise we won't read
281         // the quoted value properly.
282         // We might stop without reading the quoted value, as we
283         // might think the first delimiter is the last...
284         currentPos += delimiter.length();
285 
286         while ( currentPos < svFileContents.length() ) {
287 
288             if ( svFileContents.startsWith( escapedDelimiter, currentPos ) ) {
289 
290                 // It's just a delimiter character.
291                 currentField.append( delimiter );
292 
293                 // Increment the current position by the length
294                 // of the escaped delimiter.
295                 currentPos += escapedDelimiter.length();
296 
297             } else if ( svFileContents.startsWith( delimiter, currentPos ) ) {
298 
299                 // It's the end of the quoted field/value.
300                 // currentField.append( delimiter );
301 
302                 // Increment the current position
303                 currentPos += delimiter.length();
304 
305                 // Break out of the while loop.
306                 // Otherwise we'll read to the end of the file...
307                 break;
308 
309             } else {
310 
311                 // The character at the current position is just
312                 // a normal part of the field, so we just add it.
313                 currentField.append( svFileContents.charAt( currentPos ) );
314 
315                 // Increment the current position.
316                 currentPos++;
317 
318             }
319 
320         }
321 
322     }
323 
324     /**
325      * Tests is the supplied character is white space, according to our
326      * definition. We currently recognise only spaces and tabs as white space
327      * characters. This is deliberately different from the Java version
328      * which would also include newlines and more.
329      *
330      * @param   ch  the character to test
331      * @return  <code>true</code> if the supplied character is whitespace,
332      * <code>false</code> otherwise
333      */
334     private boolean isWhiteSpace( final char ch ) {
335         return ( ch == ' ' || ch == '\t' );
336     }
337 
338     /**
339      * Removes any white space from the start of the current field. This method will
340      * only remove characters once for any imported field. As this method is called
341      * before any quoted section is parsed it will only remove unquoted white space.
342      */
343     private void trimLeadingWhitespace() {
344         if ( trimLeading ) {
345 
346             // We start from the begining of the current field.
347             int pos = 0;
348 
349             // Keep moving one character at a time, from left to right, until
350             // we find the first character that is not white space, or we reach
351             // the end of the field.
352             while ( pos < currentField.length() && isWhiteSpace( currentField.charAt( pos ) ) ) {
353                 // System.err.println( "TRIMLEADING: pos(" + Integer.toString( pos ) + ") : '" + currentField.charAt( pos ) + "'" );
354                 pos++;
355             }
356 
357             // Remove any white space from the start of the current field
358             // to the character preceeding the first non-white space character.
359             currentField.delete( 0, pos );
360 
361             // We don't want to trim any more leading white space while we
362             // are processing this field.
363             trimLeading = false;
364         }
365     }
366 
367     /**
368      * Removes any white space from the end of unquoted sections of the current
369      * field. As this method is called just before the field is "added" to the
370      * current row, only white space at the end of the last unquoted section will
371      * be removed.
372      */
373     private void trimTrailingWhitespace() {
374 
375         // The length of the last unquoted section.
376         int length = lastUnquotedSection.length();
377 
378         // The index of the last character is the length - 1.
379         int pos = length - 1;
380 
381         // Keep moving one character at a time, from right to left, until
382         // we find the first character that is not white space, or we reach
383         // the start of the field.
384         while ( length > 0 && isWhiteSpace( lastUnquotedSection.charAt( pos ) ) ) {
385             // System.err.println( "TRIMTRAILING: pos(" + Integer.toString( pos ) + ") : '" + lastUnquotedSection.charAt( pos ) + "'" );
386             length--;
387             pos--;
388         }
389 
390         // Truncate the last unquoted section after the last non-white space
391         // character.
392         lastUnquotedSection.setLength( length );
393     }
394 
395 }