View Javadoc

1   ////////////////////////////////////////////////////////////////////////////////
2   // MillScript: an Open Spice interpreter and batch website creation tool
3   // Copyright (C) 2001-2005 Open World Ltd
4   // Copyright (C) 2005 Kevin Rogers
5   //
6   // This file is part of MillScript.
7   //
8   // MillScript is free software; you can redistribute it and/or modify it under
9   // the terms of the GNU General Public License as published by the Free
10  // Software Foundation; either version 2 of the License, or (at your option)
11  // any later version.
12  //
13  // MillScript is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  // more details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // MillScript; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA  02111-1307  USA
21  ////////////////////////////////////////////////////////////////////////////////
22  package org.millscript.millscript.render;
23  
24  import org.millscript.commons.util.EMap;
25  import org.millscript.commons.util.MapIterator;
26  import org.millscript.commons.vfs.VFile;
27  import org.millscript.commons.xml.api.Name;
28  import org.millscript.millscript.alert.Alerts;
29  import org.millscript.millscript.conf.Configuration;
30  import org.millscript.millscript.conf.functions.WarnAboutIllegalXML10CharactersFunction;
31  import org.millscript.millscript.datatypes.CDATA;
32  import org.millscript.millscript.datatypes.XmlElement;
33  
34  import java.io.IOException;
35  
36  /**
37   * This class implements MillScripts XML renderer. It renders to the XML 1.0
38   * specification, but may not always render valid XML 1.0.
39   * <p>
40   * The renderers ability to render valid XML depends on the input it's
41   * provided, e.g. supported tags. The main issue with the renderer is that
42   * database content can contain XML tags, as a result we cannot automatically
43   * encode "&lt;" characters that occur in string content. When we can parse
44   * database content into strings, XmlElement, etc, our ability to render valid
45   * documents will greatly improve.
46   * </p>
47   */
48  public class XML10Renderer extends AbstractRenderer {
49  
50      /**
51       * This renderer is used to render CDATA sections, i.e. the CDATA datatype.
52       */
53      private final XMLCDATARenderer CDATA_RENDERER;
54  
55      private EMap< String, String > prefixToNamespace = new PrefixToNamespaceMap();
56  
57      /**
58       * Indicates if this renderer should issue a warning when it encounters
59       * illegal XML 1.0 characters. By default this is false and MillScript will
60       * silently remove illegal characters from the document.
61       */
62      private final boolean warnAboutIllegalCharacters;
63  
64      /**
65       * Constructs a new XML renderer, to render to the specified virtual file
66       * using the given confguration.
67       *
68       * @param conf  the configuration to get rendering parameters from
69       * @param file  the virtual output file
70       */
71      public XML10Renderer( final Configuration conf, final VFile file ) {
72          super( conf.getXMLCharacterEntity(), conf, conf.getOutputCharset(), file );
73          this.CDATA_RENDERER = new XMLCDATARenderer( this );
74          this.warnAboutIllegalCharacters = conf.getBooleanProperty(
75              WarnAboutIllegalXML10CharactersFunction.KEY
76          );
77      }
78  
79      /**
80       * @see org.millscript.millscript.render.Renderer#append(char)
81       */
82      public final void append( final char ch ) throws IOException {
83          // Check the character is a valid XML 1.0 char
84          if ( ch == 0x09 || ch == 0x0A || ch == 0x0D || ch >= 0x20 && ch <= 0xD7FF || ch >= 0xE000 && ch <= 0xFFFD ) {
85              if ( !this.canEncode( ch ) || ch == '&' || ch == '<' || ch == '>' ) {
86                  // Ok, the character cannot be written in the current encoding OR
87                  // it must be escaped, so write the escape
88                  this.appendEscapeFor( ch );
89              } else {
90                  // The character is ok, so simply write out the byte sequence
91                  this.outputWriter.write( ch );
92              }
93          } else if ( this.warnAboutIllegalCharacters ) {
94              this.config.reportAlertAsWarning(
95                  Alerts.eval(
96                      "XML 1.0 does not support this character",
97                      "XML 1.0 only allows a specific subset of Unicode characters"
98                  ).
99                  culprit( "character entity", this.availableEntities.getEntityFor( ch ) )
100             );
101         }
102     }
103 
104     /**
105      * @see org.millscript.millscript.render.Renderer#appendEscapeFor(char)
106      */
107     public final void appendEscapeFor( final char ch ) throws IOException {
108         this.appendNoEscape( this.availableEntities.getEntityFor( ch ) );
109     }
110 
111     /**
112      * @see org.millscript.millscript.render.Renderer#appendNoEscape(char)
113      */
114     public final void appendNoEscape( final char ch ) throws IOException {
115         // Check the character is a valid XML 1.0 char
116         if ( ch == 0x09 || ch == 0x0A || ch == 0x0D || ch >= 0x20 && ch <= 0xD7FF || ch >= 0xE000 && ch <= 0xFFFD ) {
117             // Check we can encode this character
118             if ( this.canEncode( ch ) ) {
119                 this.outputWriter.write( ch );
120             } else {
121                 throw(
122                     Alerts.eval(
123                         "XML does not allow an entity at this point",
124                         "The character is not supported by this character set"
125                     ).
126                     culprit( "character", new Character( ch ) ).
127                     mishap()
128                 );
129             }
130         } else if ( this.warnAboutIllegalCharacters ) {
131             this.config.reportAlertAsWarning(
132                 Alerts.eval(
133                     "XML 1.0 does not support this character",
134                     "XML 1.0 only allows a specific subset of Unicode characters"
135                 ).
136                 culprit( "character entity", this.availableEntities.getEntityFor( ch ) )
137             );
138         }
139     }
140 
141     /**
142      * @see org.millscript.millscript.render.Renderer#renderCDATA(org.millscript.millscript.datatypes.CDATA)
143      */
144     @Override
145     public final void renderCDATA( final CDATA c ) throws IOException {
146         this.CDATA_RENDERER.renderCDATA( c );
147     }
148 
149     /**
150      * @see org.millscript.millscript.render.Renderer#renderDocumentFooter()
151      */
152     public final void renderDocumentFooter() throws IOException {
153         // Put a newline at the end of the file to make it look a bit better
154         this.outputWriter.write( '\n' );
155     }
156 
157     /**
158      * @see org.millscript.millscript.render.Renderer#renderDocumentHeader()
159      */
160     public final void renderDocumentHeader() throws IOException {
161         this.appendNoEscape( "<?xml version=\"1.0\" encoding=\"" );
162         this.appendNoEscape( this.outputCharset.name() );
163         this.appendNoEscape( "\"?>\n" );
164     }
165 
166     /**
167      * @see org.millscript.millscript.render.Renderer#renderObject(java.lang.Object)
168      */
169     public final void renderObject( final Object o ) throws IOException {
170         String s = o.toString();
171         int len = s.length();
172         for ( int i = 0; i < len; i++ ) {
173             char ch = s.charAt( i );
174             // Check the character is a valid XML 1.0 char
175             if ( ch == 0x09 || ch == 0x0A || ch == 0x0D || ch >= 0x20 && ch <= 0xD7FF || ch >= 0xE000 && ch <= 0xFFFD ) {
176                 // Check we can encode this character
177                 if ( !this.canEncode( ch ) || ch == '&' ) {
178                     this.appendEscapeFor( ch );
179                 } else {
180                     this.outputWriter.write( ch );
181                 }
182             } else if ( this.warnAboutIllegalCharacters ) {
183                 this.config.reportAlertAsWarning(
184                     Alerts.eval(
185                         "XML 1.0 does not support this character",
186                         "XML 1.0 only allows a specific subset of Unicode characters"
187                     ).
188                     culprit( "character entity", this.availableEntities.getEntityFor( ch ) ).
189                     culprit( "element content", s )
190                 );
191             }
192         }
193     }
194 
195     /**
196      * @see org.millscript.millscript.render.Renderer#renderXMLElement(org.millscript.millscript.datatypes.XmlElement)
197      */
198     public final void renderXMLElement( final XmlElement x ) throws IOException {
199         // Store the declared namespaces before we start rendering this element
200         final EMap< String, String > previousPrefixToNamespace = this.prefixToNamespace;
201         // Initialise storage for this elements namespaces, note we fill this
202         // in as we write out the start tag for this element
203         this.prefixToNamespace = new PrefixToNamespaceMap( previousPrefixToNamespace );
204         // Write the first part of the start tag
205         this.appendNoEscape( '<' );
206         // Get the tag name
207         final Name tagName = x.getName();
208         // First check if the prefix for the tag is currently bound to a
209         // namespace
210         final String tagPrefixNamespace = this.prefixToNamespace.get( tagName.getPrefix() );
211         if ( tagPrefixNamespace == null ) {
212             // This tags prefix is not bound yet, so let's declare it without
213             // bothering to check if the namespace is bound to a different
214             // prefix or not
215             this.prefixToNamespace.insert( tagName.getPrefix(), tagName.getNamespace() );
216             // Now we can write the whole qualified name
217             this.appendNoEscape( tagName.getQName() );
218             // and a namespace declaration
219             if ( "".equals( tagName.getPrefix() ) ) {
220                 this.appendNoEscape( " xmlns" );
221             } else {
222                 this.appendNoEscape( " xmlns:" );
223                 this.appendNoEscape( tagName.getPrefix() );
224             }
225             this.renderXMLElementAttributeValue( null, tagName.getNamespace() );
226         } else {
227             // This tags prefix is already bound to a namespace, we must check
228             // if it's bound to the same namespace.
229             if ( tagPrefixNamespace.equals( tagName.getNamespace() ) ) {
230                 // The declared prefix is bound to the same namespace, so we
231                 // can just write out the whole qualified name
232                 this.appendNoEscape( tagName.getQName() );
233             } else if ( "".equals( tagName.getPrefix() ) ) {
234                 // The tag is in the default namespace but the default
235                 // namespace in scope is different to this tag's namspace
236                 // We'll override the existing default namespace in this scope
237                 this.prefixToNamespace.insert( tagName.getPrefix(), tagName.getNamespace() );
238                 // Now we can write the local name
239                 this.appendNoEscape( tagName.getQName() );
240                 // and a namespace declaration
241                 this.appendNoEscape( " xmlns" );
242                 this.renderXMLElementAttributeValue( null, tagName.getNamespace() );
243             } else {
244                 // The declared prefix is bound to a different namespace. We
245                 // must either override this prefix in this scope or create a
246                 // new binding to a unique prefix. We'll use a unique prefix as
247                 // it will potentially keep the number of re-declarations to a
248                 // minimum.
249                 final String uniquePrefix = this.getUniquePrefix( tagName );
250                 // Now write out the tag name using the new unique prefix
251                 this.appendNoEscape( uniquePrefix );
252                 this.appendNoEscape( ':' );
253                 this.appendNoEscape( tagName.getLocalName() );
254                 // and a namespace declaration
255                 this.appendNoEscape( " xmlns:" );
256                 this.appendNoEscape( uniquePrefix );
257                 this.renderXMLElementAttributeValue( null, tagName.getNamespace() );
258             }
259         }
260         MapIterator< Name, String > it = x.getAttributes().iterator( true );
261         while ( it.hasNext() ) {
262             Name key = it.nextKey();
263             if ( key != null ) {
264                 this.appendNoEscape( ' ' );
265                 // First check if the attribute has a default prefix
266                 if ( "".equals( key.getPrefix() ) ) {
267                     // The attribute name has a default prefix, so there is
268                     // nothing special required.
269                     this.appendNoEscape( key.getLocalName() );
270                     // and the attribute value
271                     this.renderXMLElementAttributeValue( key, it.currentValue() );
272                 } else if ( key.getPrefix() == null ) {
273                     // The prefix must not be null, it violated the API
274                     throw(
275                         Alerts.fault(
276                             "XML attribute prefix cannot be null"
277                         ).
278                         culprit( "attribute name", key ).
279                         culprit( "attribute value", it.currentValue() ).
280                         mishap()
281                     );
282                 } else {
283                     // The attribute has a prefixed name, so we must handle it
284                     // accordingly. Step one, check if the prefix for the
285                     // attribute name is currently declared.
286                     final String attrPrefixNamespace = this.prefixToNamespace.get( key.getPrefix() );
287                     if ( attrPrefixNamespace == null ) {
288                         // This tags prefix is not bound yet, so let's declare it without
289                         // bothering to check if the namespace is bound to a different
290                         // prefix or not
291                         this.prefixToNamespace.insert( key.getPrefix(), key.getNamespace() );
292                         // Now we can write the whole qualified name
293                         this.appendNoEscape( key.getQName() );
294                         // and the attribute value
295                         this.renderXMLElementAttributeValue( key, it.currentValue() );
296                         // and a namespace declaration
297                         if ( "".equals( key.getPrefix() ) ) {
298                             this.appendNoEscape( " xmlns" );
299                         } else {
300                             this.appendNoEscape( " xmlns:" );
301                             this.appendNoEscape( key.getPrefix() );
302                         }
303                         this.renderXMLElementAttributeValue( null, key.getNamespace() );
304                     } else {
305                         // This tags prefix is already bound to a namespace, we must check
306                         // if it's bound to the same namespace.
307                         if ( attrPrefixNamespace.equals( key.getNamespace() ) ) {
308                             // The declared prefix is bound to the same namespace, so we
309                             // can just write out the whole qualified name
310                             this.appendNoEscape( key.getQName() );
311                             // and the attribute value
312                             this.renderXMLElementAttributeValue( key, it.currentValue() );
313                         } else if ( "".equals( key.getPrefix() ) ) {
314                             // The tag is in the default namespace but the default
315                             // namespace in scope is different to this tag's namspace
316                             // We'll override the existing default namespace in this scope
317                             this.prefixToNamespace.insert( key.getPrefix(), key.getNamespace() );
318                             // Now we can write the local name
319                             this.appendNoEscape( key.getQName() );
320                             // and the attribute value
321                             this.renderXMLElementAttributeValue( key, it.currentValue() );
322                             // and a namespace declaration
323                             this.appendNoEscape( " xmlns" );
324                             this.renderXMLElementAttributeValue( null, key.getNamespace() );
325                         } else {
326                             // The declared prefix is bound to a different namespace. We
327                             // must either override this prefix in this scope or create a
328                             // new binding to a unique prefix. We'll use a unique prefix as
329                             // it will potentially keep the number of re-declarations to a
330                             // minimum.
331                             final String uniquePrefix = this.getUniquePrefix( key );
332                             // Now write out the tag name using the new unique prefix
333                             this.appendNoEscape( uniquePrefix );
334                             this.appendNoEscape( ':' );
335                             this.appendNoEscape( key.getLocalName() );
336                             // and the attribute value
337                             this.renderXMLElementAttributeValue( key, it.currentValue() );
338                             // and a namespace declaration
339                             this.appendNoEscape( " xmlns:" );
340                             this.appendNoEscape( uniquePrefix );
341                             this.renderXMLElementAttributeValue( null, key.getNamespace() );
342                         }
343                     }
344                 }
345             }
346         }
347 
348         final Object[] kids = x.getChildren();
349         final int nkids = kids.length;
350 
351         if ( nkids == 0 ) {
352             this.appendNoEscape( " />" );
353         } else {
354             this.appendNoEscape( '>' );
355             for ( int i = 0; i < nkids; i++ ) {
356                 this.render( kids[ i ] );
357             }
358             this.appendNoEscape( "</" );
359             this.appendNoEscape( x.tagName() );
360             this.appendNoEscape( '>' );
361         }
362         // Restore the previos namespace scope now we've finished rendering
363         // this element
364         this.prefixToNamespace = previousPrefixToNamespace;
365     }
366 
367     public void renderXMLElementAttributeValue( final Name name, final String value ) throws IOException {
368         // All attributes have the equals and quotes even if there is
369         // no value.
370         this.appendNoEscape( "=\"" );
371         if ( value != null ) {
372             // For details on XML attribute rendering see:
373             // http://www.w3.org/TR/2000/REC-xml-20001006#syntax
374             // http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
375             int len = value.length();
376             for ( int i = 0; i < len; i++ ) {
377                 char ch = value.charAt( i );
378                 // Check the character is a valid XML 1.0 char
379                 if ( ch == 0x09 || ch == 0x0A || ch == 0x0D || ch >= 0x20 && ch <= 0xD7FF || ch >= 0xE000 && ch <= 0xFFFD ) {
380                     // Check we can encode this character
381                     if ( !this.canEncode( ch ) || ch == '&' || ch == '<' || ch == '>' || ch == '"' || ch == '\n' || ch == '\r' || ch == '\t' ) {
382                         this.appendEscapeFor( ch );
383                     } else {
384                         this.outputWriter.write( ch );
385                     }
386                 } else if ( this.warnAboutIllegalCharacters ) {
387                     this.config.reportAlertAsWarning(
388                         Alerts.eval(
389                             "XML 1.0 does not support this character",
390                             "XML 1.0 only allows a specific subset of Unicode characters"
391                         ).
392                         culprit( "character entity", this.availableEntities.getEntityFor( ch ) ).
393                         culprit( "attribute name", name ).
394                         culprit( "attribute value", value )
395                     );
396                 }
397             }
398         }
399         this.appendNoEscape( '"' );
400     }
401 
402     /**
403      * Returns a new unique prefix based on the specified elements. This method
404      * should only be used in situations where the elements namespace is not
405      * declared, but the prefix is in use. By definition when this method
406      * returns a new namepsace will have been declared and an
407      * <code>xmlns</code> attribute must be written.
408      *
409      * @param tagName   the element to declare a new unique prefix/namespace
410      * for
411      * @return  the unique prefix for the specified elements prefix/namespace
412      */
413     public String getUniquePrefix( final Name tagName ) {
414         for ( int k = 1;; k++ ) {
415             String uniquePrefix = (
416                 tagName.getPrefix() +
417                 Integer.toString( k )
418             );
419             // Is this unique prefix already declared?
420             final String uniquePrefixNamespace = this.prefixToNamespace.get( uniquePrefix );
421             if ( uniquePrefixNamespace == null ) {
422                 // We've found the first free prefix, so declare and use it
423                 this.prefixToNamespace.insert( uniquePrefix, tagName.getNamespace() );
424                 // Return the unique prefix for this elements namespace
425                 return uniquePrefix;
426             }
427             // We could also check if the namespace for the unique prefix
428             // matches the elements, however this should never happen if this
429             // method is used when the elements namespace is not declared but
430             // its prefix is
431         }
432     }
433 
434 }