001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020 package org.apache.myfaces.tobago.util; 021 022 import java.io.IOException; 023 import java.io.Writer; 024 025 /** 026 * User: weber 027 * Date: Jun 28, 2005 028 * Time: 2:07:29 PM 029 */ 030 public final class HtmlWriterUtil { 031 032 private static final char[][] CHARS_TO_ESCAPE; 033 034 static { 035 // init lookup table 036 CHARS_TO_ESCAPE = new char[0xA0][]; 037 CHARS_TO_ESCAPE['"'] = """.toCharArray(); 038 CHARS_TO_ESCAPE['&'] = "&".toCharArray(); 039 CHARS_TO_ESCAPE['<'] = "<".toCharArray(); 040 CHARS_TO_ESCAPE['>'] = ">".toCharArray(); 041 } 042 043 private final Writer out; 044 045 private final ResponseWriterBuffer buffer; 046 047 private final boolean utf8; 048 049 public HtmlWriterUtil(final Writer out, final String characterEncoding) { 050 this.out = out; 051 utf8 = "utf-8".equalsIgnoreCase(characterEncoding); 052 buffer = new ResponseWriterBuffer(out); 053 } 054 055 public void writeAttributeValue(final String text) 056 throws IOException { 057 writeEncodedValue(text.toCharArray(), 0, text.length(), true); 058 } 059 060 public void writeText(final String text) throws IOException { 061 writeEncodedValue(text.toCharArray(), 0, text.length(), false); 062 } 063 064 public void writeText(final char[] text, final int start, final int length) 065 throws IOException { 066 writeEncodedValue(text, start, length, false); 067 } 068 069 private void writeEncodedValue(final char[] text, final int start, 070 final int length, final boolean isAttribute) 071 throws IOException { 072 073 int localIndex = -1; 074 075 final int end = start + length; 076 for (int i = start; i < end; i++) { 077 char ch = text[i]; 078 if (ch >= CHARS_TO_ESCAPE.length || CHARS_TO_ESCAPE[ch] != null) { 079 localIndex = i; 080 break; 081 } 082 } 083 084 if (localIndex == -1) { 085 // no need to escape 086 out.write(text, start, length); 087 } else { 088 // write until localIndex and then encode the remainder 089 out.write(text, start, localIndex); 090 091 for (int i = localIndex; i < end; i++) { 092 final char ch = text[i]; 093 094 // Tilde or less... 095 if (ch < CHARS_TO_ESCAPE.length) { 096 if (isAttribute && ch == '&' && (i + 1 < end) && text[i + 1] == '{') { 097 // HTML 4.0, section B.7.1: ampersands followed by 098 // an open brace don't get escaped 099 buffer.addToBuffer('&'); 100 } else if (CHARS_TO_ESCAPE[ch] != null) { 101 buffer.addToBuffer(CHARS_TO_ESCAPE[ch]); 102 } else { 103 buffer.addToBuffer(ch); 104 } 105 } else if (utf8) { 106 buffer.addToBuffer(ch); 107 } else if (ch <= 0xff) { 108 // ISO-8859-1 entities: encode as needed 109 buffer.flushBuffer(); 110 111 out.write('&'); 112 char[] chars = ISO8859_1_ENTITIES[ch - 0xA0]; 113 out.write(chars, 0, chars.length); 114 out.write(';'); 115 } else { 116 buffer.flushBuffer(); 117 118 // Double-byte characters to encode. 119 // PENDING: when outputting to an encoding that 120 // supports double-byte characters (UTF-8, for example), 121 // we should not be encoding 122 writeDecRef(ch); 123 } 124 } 125 126 buffer.flushBuffer(); 127 } 128 } 129 130 131 /** 132 * Writes a character as a decimal escape. Hex escapes are smaller than 133 * the decimal version, but Netscape didn't support hex escapes until 134 * 4.7.4. 135 */ 136 private void writeDecRef(final char ch) throws IOException { 137 if (ch == '\u20ac') { 138 out.write("€"); 139 return; 140 } 141 out.write("&#"); 142 // Formerly used String.valueOf(). This version tests out 143 // about 40% faster in a microbenchmark (and on systems where GC is 144 // going gonzo, it should be even better) 145 int i = (int) ch; 146 if (i > 10000) { 147 out.write('0' + (i / 10000)); 148 i = i % 10000; 149 out.write('0' + (i / 1000)); 150 i = i % 1000; 151 out.write('0' + (i / 100)); 152 i = i % 100; 153 out.write('0' + (i / 10)); 154 i = i % 10; 155 out.write('0' + i); 156 } else if (i > 1000) { 157 out.write('0' + (i / 1000)); 158 i = i % 1000; 159 out.write('0' + (i / 100)); 160 i = i % 100; 161 out.write('0' + (i / 10)); 162 i = i % 10; 163 out.write('0' + i); 164 } else { 165 out.write('0' + (i / 100)); 166 i = i % 100; 167 out.write('0' + (i / 10)); 168 i = i % 10; 169 out.write('0' + i); 170 } 171 172 out.write(';'); 173 } 174 175 public static boolean attributeValueMustEscaped(final String name) { 176 // this is 30% faster then the .equals(name) version 177 // tested with 100 loops over 19871 names 178 // (extracted from logfile over all demo pages) 179 180 try { 181 switch (name.charAt(0)) { 182 case 'i': // 'id' 183 if (name.length() == 2 && name.charAt(1) == 'd') { 184 return false; 185 } 186 break; 187 case 'n': // 'name' 188 if (name.length() == 4 && name.charAt(1) == 'a' && name.charAt(2) == 'm' 189 && name.charAt(3) == 'e') { 190 return false; 191 } 192 break; 193 case 'c': // 'class' 194 if (name.length() == 5 && name.charAt(1) == 'l' && name.charAt(2) == 'a' 195 && name.charAt(3) == 's' && name.charAt(4) == 's') { 196 return false; 197 } 198 break; 199 default: 200 return true; 201 } 202 } catch (NullPointerException e) { 203 // ignore 204 } catch (StringIndexOutOfBoundsException e) { 205 // ignore 206 } 207 return true; 208 } 209 210 // 211 // Entities from HTML 4.0, section 24.2.1; character codes 0xA0 to 0xFF 212 // 213 private static final char [][] ISO8859_1_ENTITIES = new char [][]{ 214 "nbsp".toCharArray(), 215 "iexcl".toCharArray(), 216 "cent".toCharArray(), 217 "pound".toCharArray(), 218 "curren".toCharArray(), 219 "yen".toCharArray(), 220 "brvbar".toCharArray(), 221 "sect".toCharArray(), 222 "uml".toCharArray(), 223 "copy".toCharArray(), 224 "ordf".toCharArray(), 225 "laquo".toCharArray(), 226 "not".toCharArray(), 227 "shy".toCharArray(), 228 "reg".toCharArray(), 229 "macr".toCharArray(), 230 "deg".toCharArray(), 231 "plusmn".toCharArray(), 232 "sup2".toCharArray(), 233 "sup3".toCharArray(), 234 "acute".toCharArray(), 235 "micro".toCharArray(), 236 "para".toCharArray(), 237 "middot".toCharArray(), 238 "cedil".toCharArray(), 239 "sup1".toCharArray(), 240 "ordm".toCharArray(), 241 "raquo".toCharArray(), 242 "frac14".toCharArray(), 243 "frac12".toCharArray(), 244 "frac34".toCharArray(), 245 "iquest".toCharArray(), 246 "Agrave".toCharArray(), 247 "Aacute".toCharArray(), 248 "Acirc".toCharArray(), 249 "Atilde".toCharArray(), 250 "Auml".toCharArray(), 251 "Aring".toCharArray(), 252 "AElig".toCharArray(), 253 "Ccedil".toCharArray(), 254 "Egrave".toCharArray(), 255 "Eacute".toCharArray(), 256 "Ecirc".toCharArray(), 257 "Euml".toCharArray(), 258 "Igrave".toCharArray(), 259 "Iacute".toCharArray(), 260 "Icirc".toCharArray(), 261 "Iuml".toCharArray(), 262 "ETH".toCharArray(), 263 "Ntilde".toCharArray(), 264 "Ograve".toCharArray(), 265 "Oacute".toCharArray(), 266 "Ocirc".toCharArray(), 267 "Otilde".toCharArray(), 268 "Ouml".toCharArray(), 269 "times".toCharArray(), 270 "Oslash".toCharArray(), 271 "Ugrave".toCharArray(), 272 "Uacute".toCharArray(), 273 "Ucirc".toCharArray(), 274 "Uuml".toCharArray(), 275 "Yacute".toCharArray(), 276 "THORN".toCharArray(), 277 "szlig".toCharArray(), 278 "agrave".toCharArray(), 279 "aacute".toCharArray(), 280 "acirc".toCharArray(), 281 "atilde".toCharArray(), 282 "auml".toCharArray(), 283 "aring".toCharArray(), 284 "aelig".toCharArray(), 285 "ccedil".toCharArray(), 286 "egrave".toCharArray(), 287 "eacute".toCharArray(), 288 "ecirc".toCharArray(), 289 "euml".toCharArray(), 290 "igrave".toCharArray(), 291 "iacute".toCharArray(), 292 "icirc".toCharArray(), 293 "iuml".toCharArray(), 294 "eth".toCharArray(), 295 "ntilde".toCharArray(), 296 "ograve".toCharArray(), 297 "oacute".toCharArray(), 298 "ocirc".toCharArray(), 299 "otilde".toCharArray(), 300 "ouml".toCharArray(), 301 "divide".toCharArray(), 302 "oslash".toCharArray(), 303 "ugrave".toCharArray(), 304 "uacute".toCharArray(), 305 "ucirc".toCharArray(), 306 "uuml".toCharArray(), 307 "yacute".toCharArray(), 308 "thorn".toCharArray(), 309 "yuml".toCharArray() 310 }; 311 }