1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.cli;
19
20 import com.beust.jcommander.IStringConverter;
21 import com.beust.jcommander.Parameter;
22 import com.beust.jcommander.Parameters;
23 import org.apache.any23.http.DefaultHTTPClient;
24 import org.apache.any23.http.DefaultHTTPClientConfiguration;
25 import org.apache.any23.http.HTTPClient;
26 import org.apache.any23.mime.MIMEType;
27 import org.apache.any23.mime.MIMETypeDetector;
28 import org.apache.any23.mime.TikaMIMETypeDetector;
29 import org.apache.any23.source.DocumentSource;
30 import org.apache.any23.source.FileDocumentSource;
31 import org.apache.any23.source.HTTPDocumentSource;
32 import org.apache.any23.source.StringDocumentSource;
33
34 import java.io.File;
35 import java.io.PrintStream;
36 import java.net.URISyntaxException;
37 import java.util.LinkedList;
38 import java.util.List;
39
40
41
42
43
44
45
46 @Parameters(commandNames = { "mimes" }, commandDescription = "MIME Type Detector Tool.")
47 public class MimeDetector extends BaseTool {
48
49 public static final String FILE_DOCUMENT_PREFIX = "file://";
50
51 public static final String INLINE_DOCUMENT_PREFIX = "inline://";
52
53 public static final String URL_DOCUMENT_RE = "^https?://.*";
54
55 @Parameter(arity = 1, description = "Input document URL, {http://path/to/resource.html|file:///path/to/local.file|inline:// some inline content}", converter = MimeDetectorDocumentSourceConverter.class)
56 private List<DocumentSource> document = new LinkedList<DocumentSource>();
57
58 private PrintStream out = System.out;
59
60 @Override
61 PrintStream getOut() {
62 return out;
63 }
64
65 @Override
66 void setOut(PrintStream out) {
67 this.out = out;
68 }
69
70 public void run() throws Exception {
71 if (document.isEmpty()) {
72 throw new IllegalArgumentException("No input document URL specified");
73 }
74
75 final DocumentSource documentSource = document.get(0);
76 final MIMETypeDetector detector = new TikaMIMETypeDetector();
77 final MIMEType mimeType = detector.guessMIMEType(documentSource.getDocumentIRI(),
78 documentSource.openInputStream(), MIMEType.parse(documentSource.getContentType()));
79 out.println(mimeType);
80 }
81
82 public static final class MimeDetectorDocumentSourceConverter implements IStringConverter<DocumentSource> {
83
84 @Override
85 public DocumentSource convert(String document) {
86 if (document.startsWith(FILE_DOCUMENT_PREFIX)) {
87 return new FileDocumentSource(new File(document.substring(FILE_DOCUMENT_PREFIX.length())));
88 }
89 if (document.startsWith(INLINE_DOCUMENT_PREFIX)) {
90 return new StringDocumentSource(document.substring(INLINE_DOCUMENT_PREFIX.length()), "");
91 }
92 if (document.matches(URL_DOCUMENT_RE)) {
93 final HTTPClient client = new DefaultHTTPClient();
94 client.init(DefaultHTTPClientConfiguration.singleton());
95 try {
96 return new HTTPDocumentSource(client, document);
97 } catch (URISyntaxException e) {
98 throw new IllegalArgumentException("Invalid source IRI: '" + document + "'");
99 }
100 }
101 throw new IllegalArgumentException("Unsupported protocol for document " + document);
102 }
103
104 }
105
106 }