1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.millscript.millscript.loaders;
22
23 import org.millscript.commons.util.IList;
24 import org.millscript.commons.util.list.EArrayList;
25 import org.millscript.commons.vfs.VFile;
26 import org.millscript.millscript.alert.Alerts;
27
28 import java.io.IOException;
29
30 /**
31 * This class implements a separated file format loader for MillScript. The
32 * contents of the file are read as text, which is then broken up into lines.
33 * Each line is broken up into sections, based on the separator registered with
34 * this loader.
35 */
36 public class SVLoader extends AbsTextLoader {
37
38 /**
39 * The separated file field delimiter. This is used to delimit
40 * fields that contain either the field or record separator.
41 */
42 private String delimiter = "\"";
43
44 /**
45 * The escaped version of the delimiter character. This is used
46 * when reading a quoted value that contains the delimiter
47 * character.
48 */
49 private String escapedDelimiter = delimiter + delimiter;
50
51 /**
52 * The separated file field separator. This is used to separate
53 * individual fields within a record.
54 */
55 private String separator = ",";
56
57 /**
58 * Holds the complete contents of the SV file as a String.
59 */
60 private String svFileContents = "";
61
62 /**
63 * Holds the current position in the sv file.
64 */
65 private int currentPos = 0;
66
67 /**
68 * Flag to indicate if we must still trim leading white space
69 * from the field.
70 */
71 private boolean trimLeading = true;
72
73 /**
74 * This holds the current field that we are reading.
75 */
76 private StringBuffer currentField;
77
78 /**
79 * Temporarily holds the last unquoted section of the
80 * current field, so that we can remove trailing white space
81 * if necessary.
82 */
83 private StringBuffer lastUnquotedSection;
84
85 /**
86 * This will hold each field in a record from the SV file
87 */
88 private EArrayList< String > currentRecord;
89
90 /**
91 * @see org.millscript.millscript.loaders.Loader#loadValue()
92 */
93 @Override
94 public Object loadValue() throws IOException {
95
96
97 String recordSeparator = System.getProperty( "line.separator" );
98
99 if ( this.entry instanceof VFile && this.entry.exists() ) {
100
101
102 svFileContents = readAll( ((VFile) entry).getReader() );
103 } else {
104 throw(
105 Alerts.compile(
106 "Inventory entry is not a file",
107 "Separated value loader can only be used to load files"
108 ).culprit( "entry", this.entry ).mishap()
109 );
110 }
111
112
113
114 EArrayList< IList< String > > records = new EArrayList< IList< String > >();
115
116
117 currentRecord = new EArrayList< String >();
118
119
120 currentField = new StringBuffer();
121
122
123 lastUnquotedSection = new StringBuffer();
124
125
126 currentPos = 0;
127
128
129 trimLeading = true;
130
131 while ( currentPos < svFileContents.length() ) {
132
133 if ( svFileContents.startsWith( separator, currentPos ) ) {
134
135
136
137 addField();
138
139
140
141 currentPos += separator.length();
142
143 } else if ( svFileContents.startsWith( recordSeparator, currentPos ) ) {
144
145
146
147 addField();
148
149
150
151
152
153 records.addLast( currentRecord );
154
155
156
157 currentRecord = new EArrayList< String >();
158
159
160
161 currentPos += recordSeparator.length();
162
163 } else if ( svFileContents.startsWith( delimiter, currentPos ) ) {
164
165
166
167 addLastUnquotedSectionToField();
168
169
170
171 readQuotedField();
172
173 } else {
174
175
176
177
178
179 lastUnquotedSection.append( svFileContents.charAt( currentPos ) );
180
181
182
183 currentPos++;
184
185 }
186
187 }
188
189 return records;
190
191 }
192
193 /**
194 * Adds the last unquoted section of a field/value to the
195 * current field buffer. This is done whenever we are about
196 * to start reading a quoted section or we have reached the
197 * end of the field.
198 */
199 private void addLastUnquotedSectionToField() {
200
201
202 currentField.append( lastUnquotedSection );
203
204
205
206 lastUnquotedSection = new StringBuffer();
207
208
209
210 trimLeadingWhitespace();
211
212 }
213
214 /**
215 * Adds the current field to the current record, resetting
216 * the important state information.
217 */
218 private void addField() {
219
220
221
222
223
224 trimTrailingWhitespace();
225
226
227 addLastUnquotedSectionToField();
228
229
230 currentRecord.addLast( currentField.toString() );
231
232
233
234 currentField = new StringBuffer();
235
236
237 trimLeading = true;
238
239 }
240
241 /**
242 * Set the separator for this separated value file.
243 *
244 * @param s the separator for this file
245 */
246 public void setSeparator( final String s ) {
247
248 separator = s;
249
250 }
251
252 /**
253 * Set the delimiter for quoted fields/values in the separated
254 * value file.
255 *
256 * @param d the delimiter for quoted values
257 */
258 public void setDelimeter( final String d ) {
259
260
261 delimiter = d;
262
263
264 escapedDelimiter = delimiter + delimiter;
265
266 }
267
268 /**
269 * Reads a quoted value from the SV file. A quoted value starts
270 * with the delimiter and continues to the next single delimiter.
271 * Within a quoted field the delimiter is represented by a pair
272 * of the delimiter characters.
273 */
274 private void readQuotedField() {
275
276
277
278
279
280
281
282
283
284 currentPos += delimiter.length();
285
286 while ( currentPos < svFileContents.length() ) {
287
288 if ( svFileContents.startsWith( escapedDelimiter, currentPos ) ) {
289
290
291 currentField.append( delimiter );
292
293
294
295 currentPos += escapedDelimiter.length();
296
297 } else if ( svFileContents.startsWith( delimiter, currentPos ) ) {
298
299
300
301
302
303 currentPos += delimiter.length();
304
305
306
307 break;
308
309 } else {
310
311
312
313 currentField.append( svFileContents.charAt( currentPos ) );
314
315
316 currentPos++;
317
318 }
319
320 }
321
322 }
323
324 /**
325 * Tests is the supplied character is white space, according to our
326 * definition. We currently recognise only spaces and tabs as white space
327 * characters. This is deliberately different from the Java version
328 * which would also include newlines and more.
329 *
330 * @param ch the character to test
331 * @return <code>true</code> if the supplied character is whitespace,
332 * <code>false</code> otherwise
333 */
334 private boolean isWhiteSpace( final char ch ) {
335 return ( ch == ' ' || ch == '\t' );
336 }
337
338 /**
339 * Removes any white space from the start of the current field. This method will
340 * only remove characters once for any imported field. As this method is called
341 * before any quoted section is parsed it will only remove unquoted white space.
342 */
343 private void trimLeadingWhitespace() {
344 if ( trimLeading ) {
345
346
347 int pos = 0;
348
349
350
351
352 while ( pos < currentField.length() && isWhiteSpace( currentField.charAt( pos ) ) ) {
353
354 pos++;
355 }
356
357
358
359 currentField.delete( 0, pos );
360
361
362
363 trimLeading = false;
364 }
365 }
366
367 /**
368 * Removes any white space from the end of unquoted sections of the current
369 * field. As this method is called just before the field is "added" to the
370 * current row, only white space at the end of the last unquoted section will
371 * be removed.
372 */
373 private void trimTrailingWhitespace() {
374
375
376 int length = lastUnquotedSection.length();
377
378
379 int pos = length - 1;
380
381
382
383
384 while ( length > 0 && isWhiteSpace( lastUnquotedSection.charAt( pos ) ) ) {
385
386 length--;
387 pos--;
388 }
389
390
391
392 lastUnquotedSection.setLength( length );
393 }
394
395 }