001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *   http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    
020    package org.apache.myfaces.tobago.util;
021    
022    import java.io.IOException;
023    import java.io.Writer;
024    
025    /**
026     * User: weber
027     * Date: Jun 28, 2005
028     * Time: 2:07:29 PM
029     */
030    public final class HtmlWriterUtil {
031    
032      private static final char[][] CHARS_TO_ESCAPE;
033    
034      static {
035        // init lookup table
036        CHARS_TO_ESCAPE = new char[0xA0][];
037        CHARS_TO_ESCAPE['"'] = """.toCharArray();
038        CHARS_TO_ESCAPE['&'] = "&".toCharArray();
039        CHARS_TO_ESCAPE['<'] = "&lt;".toCharArray();
040        CHARS_TO_ESCAPE['>'] = "&gt;".toCharArray();
041      }
042    
043      private final Writer out;
044    
045      private final ResponseWriterBuffer buffer;
046    
047      private final boolean utf8;
048    
049      public HtmlWriterUtil(final Writer out, final String characterEncoding) {
050        this.out = out;
051        utf8 = "utf-8".equalsIgnoreCase(characterEncoding);
052        buffer = new ResponseWriterBuffer(out);
053      }
054    
055      public void writeAttributeValue(final String text)
056          throws IOException {
057        writeEncodedValue(text.toCharArray(), 0, text.length(), true);
058      }
059    
060      public void writeText(final String text) throws IOException {
061        writeEncodedValue(text.toCharArray(), 0, text.length(), false);
062      }
063    
064      public void writeText(final char[] text, final int start, final int length)
065          throws IOException {
066        writeEncodedValue(text, start, length, false);
067      }
068    
069      private void writeEncodedValue(final char[] text, final int start,
070          final int length, final boolean isAttribute)
071          throws IOException {
072    
073        int localIndex = -1;
074    
075        final int end = start + length;
076        for (int i = start; i < end; i++) {
077          char ch = text[i];
078          if (ch >= CHARS_TO_ESCAPE.length || CHARS_TO_ESCAPE[ch] != null) {
079            localIndex = i;
080            break;
081          }
082        }
083    
084        if (localIndex == -1) {
085          // no need to escape
086          out.write(text, start, length);
087        } else {
088          // write until localIndex and then encode the remainder
089          out.write(text, start, localIndex);
090    
091          for (int i = localIndex; i < end; i++) {
092            final char ch = text[i];
093    
094            // Tilde or less...
095            if (ch < CHARS_TO_ESCAPE.length) {
096              if (isAttribute && ch == '&' && (i + 1 < end) && text[i + 1] == '{') {
097                // HTML 4.0, section B.7.1: ampersands followed by
098                // an open brace don't get escaped
099                buffer.addToBuffer('&');
100              } else if (CHARS_TO_ESCAPE[ch] != null) {
101                buffer.addToBuffer(CHARS_TO_ESCAPE[ch]);
102              } else {
103                buffer.addToBuffer(ch);
104              }
105            } else if (utf8) {
106              buffer.addToBuffer(ch);
107            } else if (ch <= 0xff) {
108              // ISO-8859-1 entities: encode as needed
109              buffer.flushBuffer();
110    
111              out.write('&');
112              char[] chars = ISO8859_1_ENTITIES[ch - 0xA0];
113              out.write(chars, 0, chars.length);
114              out.write(';');
115            } else {
116              buffer.flushBuffer();
117    
118              // Double-byte characters to encode.
119              // PENDING: when outputting to an encoding that
120              // supports double-byte characters (UTF-8, for example),
121              // we should not be encoding
122              writeDecRef(ch);
123            }
124          }
125    
126          buffer.flushBuffer();
127        }
128      }
129    
130    
131      /**
132       * Writes a character as a decimal escape.  Hex escapes are smaller than
133       * the decimal version, but Netscape didn't support hex escapes until
134       * 4.7.4.
135       */
136      private void writeDecRef(final char ch) throws IOException {
137        if (ch == '\u20ac') {
138          out.write("&euro;");
139          return;
140        }
141        out.write("&#");
142        // Formerly used String.valueOf().  This version tests out
143        // about 40% faster in a microbenchmark (and on systems where GC is
144        // going gonzo, it should be even better)
145        int i = (int) ch;
146        if (i > 10000) {
147          out.write('0' + (i / 10000));
148          i = i % 10000;
149          out.write('0' + (i / 1000));
150          i = i % 1000;
151          out.write('0' + (i / 100));
152          i = i % 100;
153          out.write('0' + (i / 10));
154          i = i % 10;
155          out.write('0' + i);
156        } else if (i > 1000) {
157          out.write('0' + (i / 1000));
158          i = i % 1000;
159          out.write('0' + (i / 100));
160          i = i % 100;
161          out.write('0' + (i / 10));
162          i = i % 10;
163          out.write('0' + i);
164        } else {
165          out.write('0' + (i / 100));
166          i = i % 100;
167          out.write('0' + (i / 10));
168          i = i % 10;
169          out.write('0' + i);
170        }
171    
172        out.write(';');
173      }
174    
175      public static boolean attributeValueMustEscaped(final String name) {
176        // this is 30% faster then the  .equals(name) version
177        // tested with 100 loops over 19871 names
178        //       (extracted from logfile over all demo pages)
179    
180        try {
181          switch (name.charAt(0)) {
182            case 'i': // 'id'
183              if (name.length() == 2 && name.charAt(1) == 'd') {
184                return false;
185              }
186              break;
187            case 'n': // 'name'
188              if (name.length() == 4 && name.charAt(1) == 'a' && name.charAt(2) == 'm'
189                  && name.charAt(3) == 'e') {
190                return false;
191              }
192              break;
193            case 'c': // 'class'
194              if (name.length() == 5 && name.charAt(1) == 'l' && name.charAt(2) == 'a'
195                  && name.charAt(3) == 's' && name.charAt(4) == 's') {
196                return false;
197              }
198              break;
199            default:
200              return true;
201          }
202        } catch (NullPointerException e) {
203          // ignore
204        } catch (StringIndexOutOfBoundsException e) {
205          // ignore
206        }
207        return true;
208      }
209    
210      //
211      // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF
212      //
213      private static final char [][] ISO8859_1_ENTITIES = new char [][]{
214          "nbsp".toCharArray(),
215          "iexcl".toCharArray(),
216          "cent".toCharArray(),
217          "pound".toCharArray(),
218          "curren".toCharArray(),
219          "yen".toCharArray(),
220          "brvbar".toCharArray(),
221          "sect".toCharArray(),
222          "uml".toCharArray(),
223          "copy".toCharArray(),
224          "ordf".toCharArray(),
225          "laquo".toCharArray(),
226          "not".toCharArray(),
227          "shy".toCharArray(),
228          "reg".toCharArray(),
229          "macr".toCharArray(),
230          "deg".toCharArray(),
231          "plusmn".toCharArray(),
232          "sup2".toCharArray(),
233          "sup3".toCharArray(),
234          "acute".toCharArray(),
235          "micro".toCharArray(),
236          "para".toCharArray(),
237          "middot".toCharArray(),
238          "cedil".toCharArray(),
239          "sup1".toCharArray(),
240          "ordm".toCharArray(),
241          "raquo".toCharArray(),
242          "frac14".toCharArray(),
243          "frac12".toCharArray(),
244          "frac34".toCharArray(),
245          "iquest".toCharArray(),
246          "Agrave".toCharArray(),
247          "Aacute".toCharArray(),
248          "Acirc".toCharArray(),
249          "Atilde".toCharArray(),
250          "Auml".toCharArray(),
251          "Aring".toCharArray(),
252          "AElig".toCharArray(),
253          "Ccedil".toCharArray(),
254          "Egrave".toCharArray(),
255          "Eacute".toCharArray(),
256          "Ecirc".toCharArray(),
257          "Euml".toCharArray(),
258          "Igrave".toCharArray(),
259          "Iacute".toCharArray(),
260          "Icirc".toCharArray(),
261          "Iuml".toCharArray(),
262          "ETH".toCharArray(),
263          "Ntilde".toCharArray(),
264          "Ograve".toCharArray(),
265          "Oacute".toCharArray(),
266          "Ocirc".toCharArray(),
267          "Otilde".toCharArray(),
268          "Ouml".toCharArray(),
269          "times".toCharArray(),
270          "Oslash".toCharArray(),
271          "Ugrave".toCharArray(),
272          "Uacute".toCharArray(),
273          "Ucirc".toCharArray(),
274          "Uuml".toCharArray(),
275          "Yacute".toCharArray(),
276          "THORN".toCharArray(),
277          "szlig".toCharArray(),
278          "agrave".toCharArray(),
279          "aacute".toCharArray(),
280          "acirc".toCharArray(),
281          "atilde".toCharArray(),
282          "auml".toCharArray(),
283          "aring".toCharArray(),
284          "aelig".toCharArray(),
285          "ccedil".toCharArray(),
286          "egrave".toCharArray(),
287          "eacute".toCharArray(),
288          "ecirc".toCharArray(),
289          "euml".toCharArray(),
290          "igrave".toCharArray(),
291          "iacute".toCharArray(),
292          "icirc".toCharArray(),
293          "iuml".toCharArray(),
294          "eth".toCharArray(),
295          "ntilde".toCharArray(),
296          "ograve".toCharArray(),
297          "oacute".toCharArray(),
298          "ocirc".toCharArray(),
299          "otilde".toCharArray(),
300          "ouml".toCharArray(),
301          "divide".toCharArray(),
302          "oslash".toCharArray(),
303          "ugrave".toCharArray(),
304          "uacute".toCharArray(),
305          "ucirc".toCharArray(),
306          "uuml".toCharArray(),
307          "yacute".toCharArray(),
308          "thorn".toCharArray(),
309          "yuml".toCharArray()
310      };
311    }