View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor;
19  
20  import org.apache.any23.source.MemCopyFactory;
21  import org.apache.any23.source.StringDocumentSource;
22  import org.apache.any23.writer.TripleHandler;
23  import org.apache.any23.writer.TripleHandlerException;
24  import org.apache.any23.writer.TurtleWriter;
25  
26  import java.io.ByteArrayOutputStream;
27  import java.io.IOException;
28  import java.io.InputStream;
29  
30  /**
31   * A reporter for example input and output of an extractor. Example input is part of every extractor's metadata; example
32   * output is obtained by running the extractor on its own example input. This is useful as a documentation device.
33   *
34   * @author Richard Cyganiak (richard@cyganiak.de)
35   */
36  public class ExampleInputOutput {
37  
38      private final ExtractorFactory<?> factory;
39  
40      public ExampleInputOutput(String extractorName) {
41          this(ExtractorRegistryImpl.getInstance().getFactory(extractorName));
42      }
43  
44      public ExampleInputOutput(ExtractorFactory<?> factory) {
45          this.factory = factory;
46      }
47  
48      public String getExampleInput() throws IOException {
49          if (factory.getExampleInput() == null) {
50              return null;
51          }
52          if (isBlindExtractor()) {
53              return null;
54          }
55          InputStream in = factory.createExtractor().getClass().getResourceAsStream(factory.getExampleInput());
56          if (in == null) {
57              throw new IllegalArgumentException("Example input resource not found for extractor "
58                      + factory.getExtractorName() + ": " + factory.getExampleInput());
59          }
60          return new String(MemCopyFactory.toByteArray(in), "utf-8");
61      }
62  
63      public String getExampleIRI() {
64          if (factory.getExampleInput() == null) {
65              return null;
66          }
67          if (isBlindExtractor()) {
68              return factory.getExampleInput(); // Should be a IRI.
69          }
70          return "http://example.com/";
71      }
72  
73      public String getExampleOutput() throws IOException, ExtractionException {
74          if (factory.getExampleInput() == null) {
75              return null;
76          }
77          ByteArrayOutputStream out = new ByteArrayOutputStream();
78          TripleHandler writer = new TurtleWriter(out);
79          new SingleDocumentExtraction(new StringDocumentSource(getExampleInput(), getExampleIRI()), factory, writer)
80                  .run();
81          try {
82              writer.close();
83          } catch (TripleHandlerException e) {
84              throw new ExtractionException("Error while closing the triple handler", e);
85          }
86          return out.toString("utf-8");
87      }
88  
89      private boolean isBlindExtractor() {
90          return factory.createExtractor() instanceof Extractor.BlindExtractor;
91      }
92  
93  }