1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.cli;
19
20 import com.beust.jcommander.Parameter;
21 import com.beust.jcommander.Parameters;
22 import org.apache.any23.extractor.ExampleInputOutput;
23 import org.apache.any23.extractor.ExtractionException;
24 import org.apache.any23.extractor.Extractor;
25 import org.apache.any23.extractor.ExtractorRegistryImpl;
26 import org.apache.any23.extractor.Extractor.BlindExtractor;
27 import org.apache.any23.extractor.Extractor.ContentExtractor;
28 import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
29 import org.apache.any23.extractor.ExtractorFactory;
30 import org.apache.any23.extractor.ExtractorRegistry;
31
32 import java.io.IOException;
33 import java.io.PrintStream;
34 import java.util.Locale;
35
36
37
38
39 @Parameters(commandNames = {
40 "extractor" }, commandDescription = "Utility for obtaining documentation about metadata extractors.")
41 public class ExtractorDocumentation extends BaseTool {
42
43 @Parameter(names = { "-l", "--list" }, description = "shows the names of all available extractors")
44 private boolean showList;
45
46 @Parameter(names = { "-i", "--input" }, description = "shows example input for the given extractor")
47 private String input;
48
49 @Parameter(names = { "-o", "--output" }, description = "shows example output for the given extractor")
50 private String output;
51
52 @Parameter(names = { "-a", "--all" }, description = "shows a report about all available extractors")
53 private boolean showAll;
54
55 private PrintStream out = System.out;
56
57 @Override
58 PrintStream getOut() {
59 return out;
60 }
61
62 @Override
63 void setOut(PrintStream out) {
64 this.out = out;
65 }
66
67 @Override
68 public void run() throws Exception {
69 if (showList) {
70 printExtractorList(ExtractorRegistryImpl.getInstance());
71 } else if (input != null) {
72 printExampleInput(input, ExtractorRegistryImpl.getInstance());
73 } else if (output != null) {
74 printExampleOutput(output, ExtractorRegistryImpl.getInstance());
75 } else if (showAll) {
76 printReport(ExtractorRegistryImpl.getInstance());
77 }
78 }
79
80
81
82
83
84
85
86 public void printError(String msg) {
87 System.err.println(msg);
88 }
89
90
91
92
93
94
95
96 public void printExtractorList(ExtractorRegistry registry) {
97 for (ExtractorFactory factory : registry.getExtractorGroup()) {
98 out.println(
99 String.format(Locale.ROOT, "%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
100 }
101 }
102
103
104
105
106
107
108
109
110
111
112
113
114 public void printExampleInput(String extractorName, ExtractorRegistry registry) throws IOException {
115 ExtractorFactory<?> factory = getFactory(registry, extractorName);
116 ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
117 String input = example.getExampleInput();
118 if (input == null) {
119 throw new IllegalArgumentException("Extractor " + extractorName + " provides no example input");
120 }
121 out.println(input);
122 }
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137 public void printExampleOutput(String extractorName, ExtractorRegistry registry)
138 throws IOException, ExtractionException {
139 ExtractorFactory<?> factory = getFactory(registry, extractorName);
140 ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
141 String output = example.getExampleOutput();
142 if (output == null) {
143 throw new IllegalArgumentException("Extractor " + extractorName + " provides no example output");
144 }
145 out.println(output);
146 }
147
148
149
150
151
152
153
154
155
156
157
158
159 public void printReport(ExtractorRegistry registry) throws IOException, ExtractionException {
160 for (String extractorName : registry.getAllNames()) {
161 ExtractorFactory<?> factory = registry.getFactory(extractorName);
162 ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
163 out.println("Extractor: " + extractorName);
164 out.println("\ttype: " + getType(factory));
165 out.println();
166 final String exampleInput = example.getExampleInput();
167 if (exampleInput == null) {
168 out.println("(No Example Available)");
169 } else {
170 out.println("-------- Example Input --------");
171 out.println(exampleInput);
172 out.println("-------- Example Output --------");
173 String output = example.getExampleOutput();
174 out.println(output == null || output.trim().length() == 0 ? "(No Output Generated)" : output);
175 }
176 out.println("================================");
177 out.println();
178 }
179 }
180
181 private ExtractorFactory<?> getFactory(ExtractorRegistry registry, String name) {
182 if (!registry.isRegisteredName(name)) {
183 throw new IllegalArgumentException("Unknown extractor name: " + name);
184 }
185 return registry.getFactory(name);
186 }
187
188 private String getType(ExtractorFactory<?> factory) {
189 Extractor<?> extractor = factory.createExtractor();
190 if (extractor instanceof BlindExtractor) {
191 return BlindExtractor.class.getSimpleName();
192 }
193 if (extractor instanceof TagSoupDOMExtractor) {
194 return TagSoupDOMExtractor.class.getSimpleName();
195 }
196 if (extractor instanceof ContentExtractor) {
197 return ContentExtractor.class.getSimpleName();
198 }
199 return "?";
200 }
201
202 }