001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.imaging.common;
018
019import java.io.ByteArrayInputStream;
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.PushbackInputStream;
024import java.util.Map;
025
026import org.apache.commons.imaging.ImageReadException;
027
028/**
029 * A rudimentary preprocessor and parser for the C programming
030 * language.
031 *
032 * FIXME replace this by a parser generated via ANTLR (if we really need it?!)
033 */
034public class BasicCParser {
035    private final PushbackInputStream is;
036
037    public BasicCParser(final ByteArrayInputStream is) {
038        this.is = new PushbackInputStream(is);
039    }
040
041    public String nextToken() throws IOException, ImageReadException {
042        // I don't know how complete the C parsing in an XPM file
043        // is meant to be, this is just the very basics...
044
045        boolean inString = false;
046        boolean inIdentifier = false;
047        boolean hadBackSlash = false;
048        final StringBuilder token = new StringBuilder();
049        for (int c = is.read(); c != -1; c = is.read()) {
050            if (inString) {
051                if (c == '\\') {
052                    token.append('\\');
053                    hadBackSlash = !hadBackSlash;
054                } else if (c == '"') {
055                    token.append('"');
056                    if (!hadBackSlash) {
057                        return token.toString();
058                    }
059                    hadBackSlash = false;
060                } else if (c == '\r' || c == '\n') {
061                    throw new ImageReadException(
062                            "Unterminated string in XPM file");
063                } else {
064                    token.append((char) c);
065                    hadBackSlash = false;
066                }
067            } else if (inIdentifier) {
068                if (Character.isLetterOrDigit(c) || c == '_') {
069                    token.append((char) c);
070                } else {
071                    is.unread(c);
072                    return token.toString();
073                }
074            } else {
075                if (c == '"') {
076                    token.append('"');
077                    inString = true;
078                } else if (Character.isLetterOrDigit(c) || c == '_') {
079                    token.append((char) c);
080                    inIdentifier = true;
081                } else if (c == '{' || c == '}' || c == '[' || c == ']'
082                        || c == '*' || c == ';' || c == '=' || c == ',') {
083                    token.append((char) c);
084                    return token.toString();
085                } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
086                    // ignore
087                } else {
088                    throw new ImageReadException(
089                            "Unhandled/invalid character '" + ((char) c)
090                                    + "' found in XPM file");
091                }
092            }
093        }
094
095        if (inIdentifier) {
096            return token.toString();
097        }
098        if (inString) {
099            throw new ImageReadException("Unterminated string ends XMP file");
100        }
101        return null;
102    }
103
104    public static ByteArrayOutputStream preprocess(final InputStream is,
105            final StringBuilder firstComment, final Map<String, String> defines)
106            throws IOException, ImageReadException {
107        boolean inSingleQuotes = false;
108        boolean inString = false;
109        boolean inComment = false;
110        boolean inDirective = false;
111        boolean hadSlash = false;
112        boolean hadStar = false;
113        boolean hadBackSlash = false;
114        final ByteArrayOutputStream out = new ByteArrayOutputStream();
115        boolean seenFirstComment = (firstComment == null);
116        final StringBuilder directiveBuffer = new StringBuilder();
117        for (int c = is.read(); c != -1; c = is.read()) {
118            if (inComment) {
119                if (c == '*') {
120                    if (hadStar && !seenFirstComment) {
121                        firstComment.append('*');
122                    }
123                    hadStar = true;
124                } else if (c == '/') {
125                    if (hadStar) {
126                        hadStar = false;
127                        inComment = false;
128                        seenFirstComment = true;
129                    } else {
130                        if (!seenFirstComment) {
131                            firstComment.append((char) c);
132                        }
133                    }
134                } else {
135                    if (hadStar && !seenFirstComment) {
136                        firstComment.append('*');
137                    }
138                    hadStar = false;
139                    if (!seenFirstComment) {
140                        firstComment.append((char) c);
141                    }
142                }
143            } else if (inSingleQuotes) {
144                if (c == '\\') {
145                    if (hadBackSlash) {
146                        out.write('\\');
147                        out.write('\\');
148                        hadBackSlash = false;
149                    } else {
150                        hadBackSlash = true;
151                    }
152                } else if (c == '\'') {
153                    if (hadBackSlash) {
154                        out.write('\\');
155                        hadBackSlash = false;
156                    } else {
157                        inSingleQuotes = false;
158                    }
159                    out.write('\'');
160                } else if (c == '\r' || c == '\n') {
161                    throw new ImageReadException("Unterminated single quote in file");
162                } else {
163                    if (hadBackSlash) {
164                        out.write('\\');
165                        hadBackSlash = false;
166                    }
167                    out.write(c);
168                }
169            } else if (inString) {
170                if (c == '\\') {
171                    if (hadBackSlash) {
172                        out.write('\\');
173                        out.write('\\');
174                        hadBackSlash = false;
175                    } else {
176                        hadBackSlash = true;
177                    }
178                } else if (c == '"') {
179                    if (hadBackSlash) {
180                        out.write('\\');
181                        hadBackSlash = false;
182                    } else {
183                        inString = false;
184                    }
185                    out.write('"');
186                } else if (c == '\r' || c == '\n') {
187                    throw new ImageReadException("Unterminated string in file");
188                } else {
189                    if (hadBackSlash) {
190                        out.write('\\');
191                        hadBackSlash = false;
192                    }
193                    out.write(c);
194                }
195            } else if (inDirective) {
196                if (c == '\r' || c == '\n') {
197                    inDirective = false;
198                    final String[] tokens = tokenizeRow(directiveBuffer.toString());
199                    if (tokens.length < 2 || tokens.length > 3) {
200                        throw new ImageReadException("Bad preprocessor directive");
201                    }
202                    if (!tokens[0].equals("define")) {
203                        throw new ImageReadException("Invalid/unsupported "
204                                + "preprocessor directive '" + tokens[0] + "'");
205                    }
206                    defines.put(tokens[1], (tokens.length == 3) ? tokens[2]
207                            : null);
208                    directiveBuffer.setLength(0);
209                } else {
210                    directiveBuffer.append((char) c);
211                }
212            } else {
213                if (c == '/') {
214                    if (hadSlash) {
215                        out.write('/');
216                    }
217                    hadSlash = true;
218                } else if (c == '*') {
219                    if (hadSlash) {
220                        inComment = true;
221                        hadSlash = false;
222                    } else {
223                        out.write(c);
224                    }
225                } else if (c == '\'') {
226                    if (hadSlash) {
227                        out.write('/');
228                    }
229                    hadSlash = false;
230                    out.write(c);
231                    inSingleQuotes = true;
232                } else if (c == '"') {
233                    if (hadSlash) {
234                        out.write('/');
235                    }
236                    hadSlash = false;
237                    out.write(c);
238                    inString = true;
239                } else if (c == '#') {
240                    if (defines == null) {
241                        throw new ImageReadException("Unexpected preprocessor directive");
242                    }
243                    inDirective = true;
244                } else {
245                    if (hadSlash) {
246                        out.write('/');
247                    }
248                    hadSlash = false;
249                    out.write(c);
250                    // Only whitespace allowed before first comment:
251                    if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
252                        seenFirstComment = true;
253                    }
254                }
255            }
256        }
257        if (hadSlash) {
258            out.write('/');
259        }
260        if (hadStar) {
261            out.write('*');
262        }
263        if (inString) {
264            throw new ImageReadException("Unterminated string at the end of file");
265        }
266        if (inComment) {
267            throw new ImageReadException("Unterminated comment at the end of file");
268        }
269        return out;
270    }
271
272    public static String[] tokenizeRow(final String row) {
273        final String[] tokens = row.split("[ \t]");
274        int numLiveTokens = 0;
275        for (final String token : tokens) {
276            if (token != null && token.length() > 0) {
277                ++numLiveTokens;
278            }
279        }
280        final String[] liveTokens = new String[numLiveTokens];
281        int next = 0;
282        for (final String token : tokens) {
283            if (token != null && token.length() > 0) {
284                liveTokens[next++] = token;
285            }
286        }
287        return liveTokens;
288    }
289
290    public static void unescapeString(final StringBuilder stringBuilder, final String string)
291            throws ImageReadException {
292        if (string.length() < 2) {
293            throw new ImageReadException("Parsing XPM file failed, "
294                    + "string is too short");
295        }
296        if (string.charAt(0) != '"'
297                || string.charAt(string.length() - 1) != '"') {
298            throw new ImageReadException("Parsing XPM file failed, "
299                    + "string not surrounded by '\"'");
300        }
301        boolean hadBackSlash = false;
302        for (int i = 1; i < (string.length() - 1); i++) {
303            final char c = string.charAt(i);
304            if (hadBackSlash) {
305                if (c == '\\') {
306                    stringBuilder.append('\\');
307                } else if (c == '"') {
308                    stringBuilder.append('"');
309                } else if (c == '\'') {
310                    stringBuilder.append('\'');
311                } else if (c == 'x') {
312                    if (i + 2 >= string.length()) {
313                        throw new ImageReadException(
314                                "Parsing XPM file failed, "
315                                        + "hex constant in string too short");
316                    }
317                    final char hex1 = string.charAt(i + 1);
318                    final char hex2 = string.charAt(i + 2);
319                    i += 2;
320                    int constant;
321                    try {
322                        constant = Integer.parseInt(hex1 + Character.toString(hex2), 16);
323                    } catch (final NumberFormatException nfe) {
324                        throw new ImageReadException(
325                                "Parsing XPM file failed, "
326                                        + "hex constant invalid", nfe);
327                    }
328                    stringBuilder.append((char) constant);
329                } else if (c == '0' || c == '1' || c == '2' || c == '3'
330                        || c == '4' || c == '5' || c == '6' || c == '7') {
331                    int length = 1;
332                    if (i + 1 < string.length() && '0' <= string.charAt(i + 1)
333                            && string.charAt(i + 1) <= '7') {
334                        ++length;
335                    }
336                    if (i + 2 < string.length() && '0' <= string.charAt(i + 2)
337                            && string.charAt(i + 2) <= '7') {
338                        ++length;
339                    }
340                    int constant = 0;
341                    for (int j = 0; j < length; j++) {
342                        constant *= 8;
343                        constant += (string.charAt(i + j) - '0');
344                    }
345                    i += length - 1;
346                    stringBuilder.append((char) constant);
347                } else if (c == 'a') {
348                    stringBuilder.append((char) 0x07);
349                } else if (c == 'b') {
350                    stringBuilder.append((char) 0x08);
351                } else if (c == 'f') {
352                    stringBuilder.append((char) 0x0c);
353                } else if (c == 'n') {
354                    stringBuilder.append((char) 0x0a);
355                } else if (c == 'r') {
356                    stringBuilder.append((char) 0x0d);
357                } else if (c == 't') {
358                    stringBuilder.append((char) 0x09);
359                } else if (c == 'v') {
360                    stringBuilder.append((char) 0x0b);
361                } else {
362                    throw new ImageReadException("Parsing XPM file failed, "
363                            + "invalid escape sequence");
364                }
365                hadBackSlash = false;
366            } else {
367                if (c == '\\') {
368                    hadBackSlash = true;
369                } else if (c == '"') {
370                    throw new ImageReadException("Parsing XPM file failed, "
371                            + "extra '\"' found in string");
372                } else {
373                    stringBuilder.append(c);
374                }
375            }
376        }
377        if (hadBackSlash) {
378            throw new ImageReadException("Parsing XPM file failed, "
379                    + "unterminated escape sequence found in string");
380        }
381    }
382}