1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.any23.mime.purifier;
19
20 import java.io.IOException;
21 import java.io.InputStream;
22
23 /**
24 * Implementation of {@link Purifier} that removes all the eventual blank characters at the header of a file that might
25 * prevents its <i>MIME Type</i> detection.
26 *
27 * @author Davide Palmisano ( dpalmisano@gmail.com )
28 */
29 public class WhiteSpacesPurifier implements Purifier {
30
31 /**
32 * {@inheritDoc}
33 */
34 public void purify(InputStream inputStream) throws IOException {
35 if (!inputStream.markSupported())
36 throw new IllegalArgumentException("Provided InputStream does not support marks");
37
38 // mark the current position
39 inputStream.mark(Integer.MAX_VALUE);
40 int byteRead = inputStream.read();
41 char charRead = (char) byteRead;
42 while (isBlank(charRead) && (byteRead != -1)) {
43 // if here means that the previos character must be removed, so mark.
44 inputStream.mark(Integer.MAX_VALUE);
45 byteRead = inputStream.read();
46 charRead = (char) byteRead;
47 }
48 // if exit go back to the last valid mark.
49 inputStream.reset();
50 }
51
52 private boolean isBlank(char c) {
53 return c == '\t' || c == '\n' || c == ' ' || c == '\r' || c == '\b' || c == '\f';
54 }
55 }