View Javadoc

1   ////////////////////////////////////////////////////////////////////////////////
2   // MillScript-Excel: an Open Spice interpreter and batch website creation tool
3   // Copyright (C) 2006 Open World Ltd, Kevin Rogers
4   //
5   // This file is part of MillScript-Excel.
6   //
7   // MillScript-Excel is free software; you can redistribute it and/or modify it under
8   // the terms of the GNU General Public License as published by the Free
9   // Software Foundation; either version 2 of the License, or (at your option)
10  // any later version.
11  //
12  // MillScript-Excel is distributed in the hope that it will be useful, but WITHOUT
13  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  // more details.
16  //
17  // You should have received a copy of the GNU General Public License along with
18  // MillScript-Excel; if not, write to the Free Software Foundation, Inc., 59 Temple
19  // Place, Suite 330, Boston, MA  02111-1307  USA
20  ////////////////////////////////////////////////////////////////////////////////
21  package org.millscript.office.excel.records.syntax;
22  
23  import org.millscript.office.excel.RecordSyntax;
24  import org.millscript.office.excel.RecordTokenizer;
25  import org.millscript.office.excel.alerts.BIFFAlert;
26  import org.millscript.office.excel.records.CodePage;
27  import org.millscript.office.excel.versions.BIFF2;
28  import org.millscript.office.excel.versions.BIFF3;
29  import org.millscript.office.excel.versions.BIFF4S;
30  import org.millscript.office.excel.versions.BIFF4W;
31  import org.millscript.office.excel.versions.BIFF5;
32  import org.millscript.office.excel.versions.BIFF7;
33  import org.millscript.office.excel.versions.BIFF8;
34  import org.millscript.office.excel.versions.BIFF8X;
35  
36  import java.nio.charset.Charset;
37  import java.nio.charset.IllegalCharsetNameException;
38  import java.nio.charset.UnsupportedCharsetException;
39  
40  /**
41   * 
42   */
43  public class CodePageRecordSyntax extends RecordSyntax implements BIFF2, BIFF3, BIFF4S, BIFF4W, BIFF5, BIFF7, BIFF8, BIFF8X {
44  
45      public String getCharsetName( final int code ) {
46          switch ( code ) {
47              case 0x016F:
48                  return "US-ASCII";
49              case 0x01B5:
50                  return "IBM437";        // US
51              case 0x02D0:
52                  return "CP-720";        // OEM Arabic
53              case 0x02E1:
54                  return "x-IBM737";      // Greek
55              case 0x0307:
56                  return "IBM775";        // Baltic
57              case 0x0352:
58                  return "IBM850";        // Latin I
59              case 0x0354:
60                  return "IBM852";        // Latin II (Central European)
61              case 0x0357:
62                  return "IBM855";        // Cryllic
63              case 0x0359:
64                  return "IBM857";        // Turkish
65              case 0x035A:
66                  return "IBM00858";      // Multilingual Latin I with Euro
67              case 0x035C:
68                  return "IBM860";        // Portuguese
69              case 0x035D:
70                  return "IBM861";        // Icelandic
71              case 0x035E:
72                  return "IBM862";        // Hebrew
73              case 0x035F:
74                  return "IBM863";        // Canadian (French)
75              case 0x0360:
76                  return "IBM864";        // Arabic
77              case 0x0361:
78                  return "IBM865";        // Nordic
79              case 0x0362:
80                  return "IBM866";        // Cryllic (Roman)
81              case 0x0365:
82                  return "IBM869";        // Greek (Modern)
83              case 0x036A:
84                  return "x-IBM874";      // Thai
85              case 0x03A4:
86                  return "windows-31j";   // Japanese Shift-JIS
87              case 0x03A8:
88                  return "x-mswin-936";   // Chinese Simplified GBK
89              case 0x03B5:
90                  return "x-IBM949";      // Korean (Wansung)
91              case 0x03B6:
92                  return "x-IBM950";      // Chinese Traditional BIG5
93              case 0x04B0:
94                  return "UTF-16LE";      // BIFF8
95              case 0x04E2:
96                  return "windows-1250";  // Latin II (Central European)
97              case 0x04E3:
98                  return "windows-1251";  // Cryllic
99              case 0x04E4:
100                 return "windows-1252";  // Latin I (BIFF4 - BIFF7)
101             case 0x04E5:
102                 return "windows-1253";  // Greek
103             case 0x04E6:
104                 return "windows-1254";  // Turkish
105             case 0x04E7:
106                 return "windows-1255";  // Hebrew
107             case 0x04E8:
108                 return "windows-1256";  // Arabic
109             case 0x04E9:
110                 return "windows-1257";  // Baltic
111             case 0x04EA:
112                 return "windows-1258";  // Vietnamese
113             case 0x0551:
114                 return "x-Johab";       // Korean (Johabi)
115             case 0x2710:
116                 return "x-MacRoman";
117             case 0x8000:
118                 return "x-MacRoman";
119             case 0x8001:
120                 return "windows-1252";  // Latin I (BIFF2 - BIFF3)
121             default:
122                 throw new BIFFAlert(
123                     "Unknown charset code for CODEPAGE record"
124                 ).culprit( "charset code", code ).mishap();
125         }
126     }
127 
128     /**
129      * @see org.millscript.office.excel.RecordSyntax#newRecord(RecordTokenizer)
130      */
131     @Override
132     public CodePage newRecord( final RecordTokenizer tokenizer ) {
133         final String name = this.getCharsetName( tokenizer.read2ByteInt() );
134         try {
135             // Set the CODEPAGE, so that the tokenizer can use it for decoding
136             // strings
137             tokenizer.setCodepage( Charset.forName( name ) );
138             return new CodePage( tokenizer.getCodepage() );
139         } catch ( IllegalCharsetNameException ex ) {
140             // Catch, but do handle after try..
141         } catch ( UnsupportedCharsetException ex ) {
142             // Catch, but do handle after try..
143         }
144         throw new BIFFAlert(
145             "Unsupported charset in CODEPAGE record"
146         ).culprit( "charset", name ).mishap();
147     }
148 
149 }