1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.millscript.millscript.render;
23
24 import org.millscript.commons.util.EMap;
25 import org.millscript.commons.util.MapIterator;
26 import org.millscript.commons.vfs.VFile;
27 import org.millscript.commons.xml.api.Name;
28 import org.millscript.millscript.alert.Alerts;
29 import org.millscript.millscript.conf.Configuration;
30 import org.millscript.millscript.conf.functions.WarnAboutIllegalXML10CharactersFunction;
31 import org.millscript.millscript.datatypes.CDATA;
32 import org.millscript.millscript.datatypes.XmlElement;
33
34 import java.io.IOException;
35
36 /**
37 * This class implements MillScripts XML renderer. It renders to the XML 1.0
38 * specification, but may not always render valid XML 1.0.
39 * <p>
40 * The renderers ability to render valid XML depends on the input it's
41 * provided, e.g. supported tags. The main issue with the renderer is that
42 * database content can contain XML tags, as a result we cannot automatically
43 * encode "<" characters that occur in string content. When we can parse
44 * database content into strings, XmlElement, etc, our ability to render valid
45 * documents will greatly improve.
46 * </p>
47 */
48 public class XML10Renderer extends AbstractRenderer {
49
50 /**
51 * This renderer is used to render CDATA sections, i.e. the CDATA datatype.
52 */
53 private final XMLCDATARenderer CDATA_RENDERER;
54
55 private EMap< String, String > prefixToNamespace = new PrefixToNamespaceMap();
56
57 /**
58 * Indicates if this renderer should issue a warning when it encounters
59 * illegal XML 1.0 characters. By default this is false and MillScript will
60 * silently remove illegal characters from the document.
61 */
62 private final boolean warnAboutIllegalCharacters;
63
64 /**
65 * Constructs a new XML renderer, to render to the specified virtual file
66 * using the given confguration.
67 *
68 * @param conf the configuration to get rendering parameters from
69 * @param file the virtual output file
70 */
71 public XML10Renderer( final Configuration conf, final VFile file ) {
72 super( conf.getXMLCharacterEntity(), conf, conf.getOutputCharset(), file );
73 this.CDATA_RENDERER = new XMLCDATARenderer( this );
74 this.warnAboutIllegalCharacters = conf.getBooleanProperty(
75 WarnAboutIllegalXML10CharactersFunction.KEY
76 );
77 }
78
79 /**
80 * @see org.millscript.millscript.render.Renderer#append(char)
81 */
82 public final void append( final char ch ) throws IOException {
83
84 if ( ch == 0x09 || ch == 0x0A || ch == 0x0D || ch >= 0x20 && ch <= 0xD7FF || ch >= 0xE000 && ch <= 0xFFFD ) {
85 if ( !this.canEncode( ch ) || ch == '&' || ch == '<' || ch == '>' ) {
86
87
88 this.appendEscapeFor( ch );
89 } else {
90
91 this.outputWriter.write( ch );
92 }
93 } else if ( this.warnAboutIllegalCharacters ) {
94 this.config.reportAlertAsWarning(
95 Alerts.eval(
96 "XML 1.0 does not support this character",
97 "XML 1.0 only allows a specific subset of Unicode characters"
98 ).
99 culprit( "character entity", this.availableEntities.getEntityFor( ch ) )
100 );
101 }
102 }
103
104 /**
105 * @see org.millscript.millscript.render.Renderer#appendEscapeFor(char)
106 */
107 public final void appendEscapeFor( final char ch ) throws IOException {
108 this.appendNoEscape( this.availableEntities.getEntityFor( ch ) );
109 }
110
111 /**
112 * @see org.millscript.millscript.render.Renderer#appendNoEscape(char)
113 */
114 public final void appendNoEscape( final char ch ) throws IOException {
115
116 if ( ch == 0x09 || ch == 0x0A || ch == 0x0D || ch >= 0x20 && ch <= 0xD7FF || ch >= 0xE000 && ch <= 0xFFFD ) {
117
118 if ( this.canEncode( ch ) ) {
119 this.outputWriter.write( ch );
120 } else {
121 throw(
122 Alerts.eval(
123 "XML does not allow an entity at this point",
124 "The character is not supported by this character set"
125 ).
126 culprit( "character", new Character( ch ) ).
127 mishap()
128 );
129 }
130 } else if ( this.warnAboutIllegalCharacters ) {
131 this.config.reportAlertAsWarning(
132 Alerts.eval(
133 "XML 1.0 does not support this character",
134 "XML 1.0 only allows a specific subset of Unicode characters"
135 ).
136 culprit( "character entity", this.availableEntities.getEntityFor( ch ) )
137 );
138 }
139 }
140
141 /**
142 * @see org.millscript.millscript.render.Renderer#renderCDATA(org.millscript.millscript.datatypes.CDATA)
143 */
144 @Override
145 public final void renderCDATA( final CDATA c ) throws IOException {
146 this.CDATA_RENDERER.renderCDATA( c );
147 }
148
149 /**
150 * @see org.millscript.millscript.render.Renderer#renderDocumentFooter()
151 */
152 public final void renderDocumentFooter() throws IOException {
153
154 this.outputWriter.write( '\n' );
155 }
156
157 /**
158 * @see org.millscript.millscript.render.Renderer#renderDocumentHeader()
159 */
160 public final void renderDocumentHeader() throws IOException {
161 this.appendNoEscape( "<?xml version=\"1.0\" encoding=\"" );
162 this.appendNoEscape( this.outputCharset.name() );
163 this.appendNoEscape( "\"?>\n" );
164 }
165
166 /**
167 * @see org.millscript.millscript.render.Renderer#renderObject(java.lang.Object)
168 */
169 public final void renderObject( final Object o ) throws IOException {
170 String s = o.toString();
171 int len = s.length();
172 for ( int i = 0; i < len; i++ ) {
173 char ch = s.charAt( i );
174
175 if ( ch == 0x09 || ch == 0x0A || ch == 0x0D || ch >= 0x20 && ch <= 0xD7FF || ch >= 0xE000 && ch <= 0xFFFD ) {
176
177 if ( !this.canEncode( ch ) || ch == '&' ) {
178 this.appendEscapeFor( ch );
179 } else {
180 this.outputWriter.write( ch );
181 }
182 } else if ( this.warnAboutIllegalCharacters ) {
183 this.config.reportAlertAsWarning(
184 Alerts.eval(
185 "XML 1.0 does not support this character",
186 "XML 1.0 only allows a specific subset of Unicode characters"
187 ).
188 culprit( "character entity", this.availableEntities.getEntityFor( ch ) ).
189 culprit( "element content", s )
190 );
191 }
192 }
193 }
194
195 /**
196 * @see org.millscript.millscript.render.Renderer#renderXMLElement(org.millscript.millscript.datatypes.XmlElement)
197 */
198 public final void renderXMLElement( final XmlElement x ) throws IOException {
199
200 final EMap< String, String > previousPrefixToNamespace = this.prefixToNamespace;
201
202
203 this.prefixToNamespace = new PrefixToNamespaceMap( previousPrefixToNamespace );
204
205 this.appendNoEscape( '<' );
206
207 final Name tagName = x.getName();
208
209
210 final String tagPrefixNamespace = this.prefixToNamespace.get( tagName.getPrefix() );
211 if ( tagPrefixNamespace == null ) {
212
213
214
215 this.prefixToNamespace.insert( tagName.getPrefix(), tagName.getNamespace() );
216
217 this.appendNoEscape( tagName.getQName() );
218
219 if ( "".equals( tagName.getPrefix() ) ) {
220 this.appendNoEscape( " xmlns" );
221 } else {
222 this.appendNoEscape( " xmlns:" );
223 this.appendNoEscape( tagName.getPrefix() );
224 }
225 this.renderXMLElementAttributeValue( null, tagName.getNamespace() );
226 } else {
227
228
229 if ( tagPrefixNamespace.equals( tagName.getNamespace() ) ) {
230
231
232 this.appendNoEscape( tagName.getQName() );
233 } else if ( "".equals( tagName.getPrefix() ) ) {
234
235
236
237 this.prefixToNamespace.insert( tagName.getPrefix(), tagName.getNamespace() );
238
239 this.appendNoEscape( tagName.getQName() );
240
241 this.appendNoEscape( " xmlns" );
242 this.renderXMLElementAttributeValue( null, tagName.getNamespace() );
243 } else {
244
245
246
247
248
249 final String uniquePrefix = this.getUniquePrefix( tagName );
250
251 this.appendNoEscape( uniquePrefix );
252 this.appendNoEscape( ':' );
253 this.appendNoEscape( tagName.getLocalName() );
254
255 this.appendNoEscape( " xmlns:" );
256 this.appendNoEscape( uniquePrefix );
257 this.renderXMLElementAttributeValue( null, tagName.getNamespace() );
258 }
259 }
260 MapIterator< Name, String > it = x.getAttributes().iterator( true );
261 while ( it.hasNext() ) {
262 Name key = it.nextKey();
263 if ( key != null ) {
264 this.appendNoEscape( ' ' );
265
266 if ( "".equals( key.getPrefix() ) ) {
267
268
269 this.appendNoEscape( key.getLocalName() );
270
271 this.renderXMLElementAttributeValue( key, it.currentValue() );
272 } else if ( key.getPrefix() == null ) {
273
274 throw(
275 Alerts.fault(
276 "XML attribute prefix cannot be null"
277 ).
278 culprit( "attribute name", key ).
279 culprit( "attribute value", it.currentValue() ).
280 mishap()
281 );
282 } else {
283
284
285
286 final String attrPrefixNamespace = this.prefixToNamespace.get( key.getPrefix() );
287 if ( attrPrefixNamespace == null ) {
288
289
290
291 this.prefixToNamespace.insert( key.getPrefix(), key.getNamespace() );
292
293 this.appendNoEscape( key.getQName() );
294
295 this.renderXMLElementAttributeValue( key, it.currentValue() );
296
297 if ( "".equals( key.getPrefix() ) ) {
298 this.appendNoEscape( " xmlns" );
299 } else {
300 this.appendNoEscape( " xmlns:" );
301 this.appendNoEscape( key.getPrefix() );
302 }
303 this.renderXMLElementAttributeValue( null, key.getNamespace() );
304 } else {
305
306
307 if ( attrPrefixNamespace.equals( key.getNamespace() ) ) {
308
309
310 this.appendNoEscape( key.getQName() );
311
312 this.renderXMLElementAttributeValue( key, it.currentValue() );
313 } else if ( "".equals( key.getPrefix() ) ) {
314
315
316
317 this.prefixToNamespace.insert( key.getPrefix(), key.getNamespace() );
318
319 this.appendNoEscape( key.getQName() );
320
321 this.renderXMLElementAttributeValue( key, it.currentValue() );
322
323 this.appendNoEscape( " xmlns" );
324 this.renderXMLElementAttributeValue( null, key.getNamespace() );
325 } else {
326
327
328
329
330
331 final String uniquePrefix = this.getUniquePrefix( key );
332
333 this.appendNoEscape( uniquePrefix );
334 this.appendNoEscape( ':' );
335 this.appendNoEscape( key.getLocalName() );
336
337 this.renderXMLElementAttributeValue( key, it.currentValue() );
338
339 this.appendNoEscape( " xmlns:" );
340 this.appendNoEscape( uniquePrefix );
341 this.renderXMLElementAttributeValue( null, key.getNamespace() );
342 }
343 }
344 }
345 }
346 }
347
348 final Object[] kids = x.getChildren();
349 final int nkids = kids.length;
350
351 if ( nkids == 0 ) {
352 this.appendNoEscape( " />" );
353 } else {
354 this.appendNoEscape( '>' );
355 for ( int i = 0; i < nkids; i++ ) {
356 this.render( kids[ i ] );
357 }
358 this.appendNoEscape( "</" );
359 this.appendNoEscape( x.tagName() );
360 this.appendNoEscape( '>' );
361 }
362
363
364 this.prefixToNamespace = previousPrefixToNamespace;
365 }
366
367 public void renderXMLElementAttributeValue( final Name name, final String value ) throws IOException {
368
369
370 this.appendNoEscape( "=\"" );
371 if ( value != null ) {
372
373
374
375 int len = value.length();
376 for ( int i = 0; i < len; i++ ) {
377 char ch = value.charAt( i );
378
379 if ( ch == 0x09 || ch == 0x0A || ch == 0x0D || ch >= 0x20 && ch <= 0xD7FF || ch >= 0xE000 && ch <= 0xFFFD ) {
380
381 if ( !this.canEncode( ch ) || ch == '&' || ch == '<' || ch == '>' || ch == '"' || ch == '\n' || ch == '\r' || ch == '\t' ) {
382 this.appendEscapeFor( ch );
383 } else {
384 this.outputWriter.write( ch );
385 }
386 } else if ( this.warnAboutIllegalCharacters ) {
387 this.config.reportAlertAsWarning(
388 Alerts.eval(
389 "XML 1.0 does not support this character",
390 "XML 1.0 only allows a specific subset of Unicode characters"
391 ).
392 culprit( "character entity", this.availableEntities.getEntityFor( ch ) ).
393 culprit( "attribute name", name ).
394 culprit( "attribute value", value )
395 );
396 }
397 }
398 }
399 this.appendNoEscape( '"' );
400 }
401
402 /**
403 * Returns a new unique prefix based on the specified elements. This method
404 * should only be used in situations where the elements namespace is not
405 * declared, but the prefix is in use. By definition when this method
406 * returns a new namepsace will have been declared and an
407 * <code>xmlns</code> attribute must be written.
408 *
409 * @param tagName the element to declare a new unique prefix/namespace
410 * for
411 * @return the unique prefix for the specified elements prefix/namespace
412 */
413 public String getUniquePrefix( final Name tagName ) {
414 for ( int k = 1;; k++ ) {
415 String uniquePrefix = (
416 tagName.getPrefix() +
417 Integer.toString( k )
418 );
419
420 final String uniquePrefixNamespace = this.prefixToNamespace.get( uniquePrefix );
421 if ( uniquePrefixNamespace == null ) {
422
423 this.prefixToNamespace.insert( uniquePrefix, tagName.getNamespace() );
424
425 return uniquePrefix;
426 }
427
428
429
430
431 }
432 }
433
434 }