View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.cli;
19  
20  import com.beust.jcommander.Parameter;
21  import com.beust.jcommander.Parameters;
22  import org.apache.any23.extractor.ExampleInputOutput;
23  import org.apache.any23.extractor.ExtractionException;
24  import org.apache.any23.extractor.Extractor;
25  import org.apache.any23.extractor.ExtractorRegistryImpl;
26  import org.apache.any23.extractor.Extractor.BlindExtractor;
27  import org.apache.any23.extractor.Extractor.ContentExtractor;
28  import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
29  import org.apache.any23.extractor.ExtractorFactory;
30  import org.apache.any23.extractor.ExtractorRegistry;
31  
32  import java.io.IOException;
33  import java.io.PrintStream;
34  import java.util.Locale;
35  
36  /**
37   * This class provides some command-line documentation about available extractors and their usage.
38   */
39  @Parameters(commandNames = {
40          "extractor" }, commandDescription = "Utility for obtaining documentation about metadata extractors.")
41  public class ExtractorDocumentation extends BaseTool {
42  
43      @Parameter(names = { "-l", "--list" }, description = "shows the names of all available extractors")
44      private boolean showList;
45  
46      @Parameter(names = { "-i", "--input" }, description = "shows example input for the given extractor")
47      private String input;
48  
49      @Parameter(names = { "-o", "--output" }, description = "shows example output for the given extractor")
50      private String output;
51  
52      @Parameter(names = { "-a", "--all" }, description = "shows a report about all available extractors")
53      private boolean showAll;
54  
55      private PrintStream out = System.out;
56  
57      @Override
58      PrintStream getOut() {
59          return out;
60      }
61  
62      @Override
63      void setOut(PrintStream out) {
64          this.out = out;
65      }
66  
67      @Override
68      public void run() throws Exception {
69          if (showList) {
70              printExtractorList(ExtractorRegistryImpl.getInstance());
71          } else if (input != null) {
72              printExampleInput(input, ExtractorRegistryImpl.getInstance());
73          } else if (output != null) {
74              printExampleOutput(output, ExtractorRegistryImpl.getInstance());
75          } else if (showAll) {
76              printReport(ExtractorRegistryImpl.getInstance());
77          }
78      }
79  
80      /**
81       * Print an error message.
82       *
83       * @param msg
84       *            the error message to be printed
85       */
86      public void printError(String msg) {
87          System.err.println(msg);
88      }
89  
90      /**
91       * Prints the list of all the available extractors.
92       * 
93       * @param registry
94       *            the {@link org.apache.any23.extractor.ExtractorRegistry} containing all extractors
95       */
96      public void printExtractorList(ExtractorRegistry registry) {
97          for (ExtractorFactory factory : registry.getExtractorGroup()) {
98              out.println(
99                      String.format(Locale.ROOT, "%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
100         }
101     }
102 
103     /**
104      * Prints an example of input for the provided extractor.
105      *
106      * @param extractorName
107      *            the name of the extractor
108      * @param registry
109      *            the {@link org.apache.any23.extractor.ExtractorRegistry} containing all extractors
110      * 
111      * @throws IOException
112      *             raised if no extractor is found with that name
113      */
114     public void printExampleInput(String extractorName, ExtractorRegistry registry) throws IOException {
115         ExtractorFactory<?> factory = getFactory(registry, extractorName);
116         ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
117         String input = example.getExampleInput();
118         if (input == null) {
119             throw new IllegalArgumentException("Extractor " + extractorName + " provides no example input");
120         }
121         out.println(input);
122     }
123 
124     /**
125      * Prints an output example for the given extractor.
126      *
127      * @param extractorName
128      *            the extractor name
129      * @param registry
130      *            the {@link org.apache.any23.extractor.ExtractorRegistry} containing all extractors
131      * 
132      * @throws IOException
133      *             raised if no extractor is found with that name
134      * @throws ExtractionException
135      *             if there is an error duing extraction
136      */
137     public void printExampleOutput(String extractorName, ExtractorRegistry registry)
138             throws IOException, ExtractionException {
139         ExtractorFactory<?> factory = getFactory(registry, extractorName);
140         ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
141         String output = example.getExampleOutput();
142         if (output == null) {
143             throw new IllegalArgumentException("Extractor " + extractorName + " provides no example output");
144         }
145         out.println(output);
146     }
147 
148     /**
149      * Prints a complete report on all the available extractors.
150      *
151      * @param registry
152      *            the {@link org.apache.any23.extractor.ExtractorRegistry} containing all extractors
153      * 
154      * @throws IOException
155      *             raised if no extractor is found with that name
156      * @throws ExtractionException
157      *             if there is an error duing extraction
158      */
159     public void printReport(ExtractorRegistry registry) throws IOException, ExtractionException {
160         for (String extractorName : registry.getAllNames()) {
161             ExtractorFactory<?> factory = registry.getFactory(extractorName);
162             ExampleInputOutputtput.html#ExampleInputOutput">ExampleInputOutput example = new ExampleInputOutput(factory);
163             out.println("Extractor: " + extractorName);
164             out.println("\ttype: " + getType(factory));
165             out.println();
166             final String exampleInput = example.getExampleInput();
167             if (exampleInput == null) {
168                 out.println("(No Example Available)");
169             } else {
170                 out.println("-------- Example Input  --------");
171                 out.println(exampleInput);
172                 out.println("-------- Example Output --------");
173                 String output = example.getExampleOutput();
174                 out.println(output == null || output.trim().length() == 0 ? "(No Output Generated)" : output);
175             }
176             out.println("================================");
177             out.println();
178         }
179     }
180 
181     private ExtractorFactory<?> getFactory(ExtractorRegistry registry, String name) {
182         if (!registry.isRegisteredName(name)) {
183             throw new IllegalArgumentException("Unknown extractor name: " + name);
184         }
185         return registry.getFactory(name);
186     }
187 
188     private String getType(ExtractorFactory<?> factory) {
189         Extractor<?> extractor = factory.createExtractor();
190         if (extractor instanceof BlindExtractor) {
191             return BlindExtractor.class.getSimpleName();
192         }
193         if (extractor instanceof TagSoupDOMExtractor) {
194             return TagSoupDOMExtractor.class.getSimpleName();
195         }
196         if (extractor instanceof ContentExtractor) {
197             return ContentExtractor.class.getSimpleName();
198         }
199         return "?";
200     }
201 
202 }