1 ////////////////////////////////////////////////////////////////////////////////
2 // MillScript: an Open Spice interpreter and batch website creation tool
3 // Copyright (C) 2001-2004 Open World Ltd
4 //
5 // This file is part of MillScript.
6 //
7 // MillScript is free software; you can redistribute it and/or modify it under
8 // the terms of the GNU General Public License as published by the Free
9 // Software Foundation; either version 2 of the License, or (at your option)
10 // any later version.
11 //
12 // MillScript is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 // more details.
16 //
17 // You should have received a copy of the GNU General Public License along with
18 // MillScript; if not, write to the Free Software Foundation, Inc., 59 Temple
19 // Place, Suite 330, Boston, MA 02111-1307 USA
20 ////////////////////////////////////////////////////////////////////////////////
21 package millscript;
22
23 import java.lang.*;
24 import java.util.*;
25
26 public final class TextTools {
27
28 public static String filter( String s ) {
29 final StringBuffer b = new StringBuffer();
30 final int size = s.length();
31 for ( int i = 0; i < size; i++ ) {
32 char ch = s.charAt( i );
33 switch ( ch ) {
34 case '&': b.append( "and" ); continue;
35 case '>': b.append( ">" ); continue;
36 case '<': b.append( "<" ); continue;
37 case 0xAD: b.append( '-' ); continue; // soft hypen
38 case 0xB7: b.append( '-' ); continue; // middle dot
39
40 case 0xC0: // cap grave A
41 case 0xC1: // cap acute A
42 case 0xC2: // cap circ A
43 case 0xC3: // cap tilde A
44 case 0xC4: // cap dia A
45 case 0xC5: // cap ring A
46 b.append( 'A' ); continue;
47
48 case 0xC6: b.append( "AE" ); continue; // latin cap AE
49 case 0xC7: b.append( 'C' ); continue; // cap c cedilla
50
51 case 0xC8: // cap grave E
52 case 0xC9: // cap acute E
53 case 0xCA: // cap circ E
54 case 0xCB: // cap dia E
55 b.append( 'E' ); continue;
56
57 case 0xCC: // cap grave I
58 case 0xCD: // cap acute I
59 case 0xCE: // cap circ I
60 case 0xCF: // cap dia I
61 b.append( 'I' ); continue;
62
63 case 0xD1: b.append( 'N' ); continue; // cap N tilde
64
65 case 0xD2: // cap grave O
66 case 0xD3: // cap acute O
67 case 0xD4: // cap circ O
68 case 0xD5: // cap tilde O
69 case 0xD6: // cap dia O
70 // case 0xD7 is multiplication sign
71 case 0xD8: // cap slash O
72 b.append( 'O' ); continue;
73
74 case 0xD9: // cap grave U
75 case 0xDA: // cap acute U
76 case 0xDB: // cap circ U
77 case 0xDC: // cap dia U
78 b.append( 'U' ); continue;
79
80 case 0xE6: b.append( "ae" ); continue; // latin small AE
81 case 0xE7: b.append( 'c' ); continue; // c cedilla
82
83
84 case 0xE0: // small grave a
85 case 0xE1: // small acute a
86 case 0xE2: // small circ a
87 case 0xE3: // small tilde a
88 case 0xE4: // small dia a
89 case 0xE5: // small ring a
90 b.append( 'a' ); continue;
91
92 case 0xEC: // small grave i
93 case 0xED: // small acute i
94 case 0xEE: // small circ i
95 case 0xEF: // small dia i
96 b.append( 'i' ); continue;
97
98 case 0xE8: // small grave e
99 case 0xE9: // small acute e
100 case 0xEA: // small circ e
101 case 0xEB: // small dia e
102 b.append( 'e' ); continue;
103
104 case 0xF1: b.append( 'n' ); continue; // small N tilde
105
106 case 0xF2: // small grave o
107 case 0xF3: // small acute o
108 case 0xF4: // small circ o
109 case 0xF5: // small tilde o
110 case 0xF6: // small dia o
111 // case 0xF7 is division sign
112 case 0xF8: // small slash o
113 b.append( 'o' ); continue;
114
115 case 0xF9: // small grave u
116 case 0xFA: // small acute u
117 case 0xFB: // small circ u
118 case 0xFC: // smalll dia u
119 b.append( 'u' ); continue;
120
121 case 0x152: b.append( "OE" ); continue; // latin cap OE
122 case 0x153: b.append( "oe" ); continue; // latin small OE
123 case 0x2018: b.append( '`' ); continue; // single left quote
124 case 0x2019: b.append( '\'' ); continue; // single right quote
125 case 0x2020: b.append( '+' ); continue; // dagger
126 case 0x201C: b.append( '`' ); continue; // single left dquote
127 case 0x201D: b.append( '\'' ); continue; // single right dquote
128 case 0xFEFF: continue; // zero width no-break space
129 default: b.append( ch );
130 }
131 }
132 return b.toString();
133 }
134
135 }