View Javadoc

1   ////////////////////////////////////////////////////////////////////////////////
2   // MillScript: an Open Spice interpreter and batch website creation tool
3   // Copyright (C) 2001-2004 Open World Ltd
4   //
5   // This file is part of MillScript.
6   //
7   // MillScript is free software; you can redistribute it and/or modify it under
8   // the terms of the GNU General Public License as published by the Free
9   // Software Foundation; either version 2 of the License, or (at your option)
10  // any later version.
11  //
12  // MillScript is distributed in the hope that it will be useful, but WITHOUT
13  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  // more details.
16  //
17  // You should have received a copy of the GNU General Public License along with
18  // MillScript; if not, write to the Free Software Foundation, Inc., 59 Temple
19  // Place, Suite 330, Boston, MA  02111-1307  USA
20  ////////////////////////////////////////////////////////////////////////////////
21  package millscript;
22  
23  import java.lang.*;
24  import java.util.*;
25  
26  public final class TextTools {
27  
28      public static String filter( String s ) {
29          final StringBuffer b = new StringBuffer();
30          final int size = s.length();
31          for ( int i = 0; i < size; i++ ) {
32              char ch = s.charAt( i );
33              switch ( ch ) {
34              case '&':       b.append( "and" ); continue;
35              case '>':       b.append( "&gt;" ); continue;
36              case '<':       b.append( "&lt;" ); continue;
37              case 0xAD:      b.append( '-' ); continue;      // soft hypen
38              case 0xB7:      b.append( '-' ); continue;      // middle dot
39  
40              case 0xC0:                                      // cap grave A
41              case 0xC1:                                      // cap acute A
42              case 0xC2:                                      // cap circ A
43              case 0xC3:                                      // cap tilde A
44              case 0xC4:                                      // cap dia A
45              case 0xC5:                                      // cap ring A
46                              b.append( 'A' ); continue;
47  
48              case 0xC6:      b.append( "AE" ); continue;     // latin cap AE
49              case 0xC7:      b.append( 'C' ); continue;      // cap c cedilla
50  
51              case 0xC8:                                      // cap grave E
52              case 0xC9:                                      // cap acute E
53              case 0xCA:                                      // cap circ E
54              case 0xCB:                                      // cap dia E
55                              b.append( 'E' ); continue;
56  
57              case 0xCC:                                      // cap grave I
58              case 0xCD:                                      // cap acute I
59              case 0xCE:                                      // cap circ I
60              case 0xCF:                                      // cap dia I
61                              b.append( 'I' ); continue;
62  
63              case 0xD1:      b.append( 'N' ); continue;      // cap N tilde
64  
65              case 0xD2:                                      // cap grave O
66              case 0xD3:                                      // cap acute O
67              case 0xD4:                                      // cap circ O
68              case 0xD5:                                      // cap tilde O
69              case 0xD6:                                      // cap dia O
70              // case 0xD7 is multiplication sign
71              case 0xD8:                                      // cap slash O
72                              b.append( 'O' ); continue;
73  
74              case 0xD9:                                      // cap grave U
75              case 0xDA:                                      // cap acute U
76              case 0xDB:                                      // cap circ U
77              case 0xDC:                                      // cap dia U
78                              b.append( 'U' ); continue;
79  
80              case 0xE6:      b.append( "ae" ); continue;     // latin small AE
81              case 0xE7:      b.append( 'c' ); continue;      // c cedilla
82  
83  
84              case 0xE0:                                      // small grave a
85              case 0xE1:                                      // small acute a
86              case 0xE2:                                      // small circ a
87              case 0xE3:                                      // small tilde a
88              case 0xE4:                                      // small dia a
89              case 0xE5:                                      // small ring a
90                              b.append( 'a' ); continue;
91  
92              case 0xEC:                                      // small grave i
93              case 0xED:                                      // small acute i
94              case 0xEE:                                      // small circ i
95              case 0xEF:                                      // small dia i
96                              b.append( 'i' ); continue;
97  
98              case 0xE8:                                      // small grave e
99              case 0xE9:                                      // small acute e
100             case 0xEA:                                      // small circ e
101             case 0xEB:                                      // small dia e
102                             b.append( 'e' ); continue;
103 
104             case 0xF1:      b.append( 'n' ); continue;      // small N tilde
105 
106             case 0xF2:                                      // small grave o
107             case 0xF3:                                      // small acute o
108             case 0xF4:                                      // small circ o
109             case 0xF5:                                      // small tilde o
110             case 0xF6:                                      // small dia o
111             //  case 0xF7 is division sign
112             case 0xF8:                                      // small slash o
113                             b.append( 'o' ); continue;
114 
115             case 0xF9:                                      // small grave u
116             case 0xFA:                                      // small acute u
117             case 0xFB:                                      // small circ u
118             case 0xFC:                                      // smalll dia u
119                             b.append( 'u' ); continue;
120 
121             case 0x152:     b.append( "OE" ); continue;     // latin cap OE
122             case 0x153:     b.append( "oe" ); continue;     // latin small OE
123             case 0x2018:    b.append( '`' ); continue;      // single left quote
124             case 0x2019:    b.append( '\'' ); continue;     // single right quote
125             case 0x2020:    b.append( '+' ); continue;      // dagger
126             case 0x201C:    b.append( '`' ); continue;      // single left dquote
127             case 0x201D:    b.append( '\'' ); continue;     // single right dquote
128             case 0xFEFF:    continue;                       // zero width no-break space
129             default:        b.append( ch );
130             }
131         }
132         return b.toString();
133     }
134 
135 }