1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.any23.extractor; 19 20 import org.apache.any23.mime.MIMEType; 21 22 import java.util.Collection; 23 24 /** 25 * Interface defining a factory for {@link Extractor}. 26 * 27 * @param <T> 28 * the type of the {@link Extractor} to be created by this factory. 29 */ 30 public interface ExtractorFactory<T extends Extractor<?>> extends ExtractorDescription { 31 32 /** 33 * Creates an extractor instance. 34 * 35 * @return an instance of the extractor associated to this factory. 36 */ 37 T createExtractor(); 38 39 /** 40 * Supports wildcards, e.g. <code>"*/*"</code> for blind extractors that merely call a web service. 41 * 42 * @return a {@link java.util.Collection} of supported mimetypes. 43 */ 44 Collection<MIMEType> getSupportedMIMETypes(); 45 46 /** 47 * An example input file for the extractor, to be used in auto-generated documentation. For the 48 * {@link Extractor.BlindExtractor}, this is an arbitrary IRI. For extractors that require content, it is the name 49 * of a file, relative to the factory's class file's location, it will be opened using 50 * factory.getClass().getResourceAsStream(filename). The example should be a short file that produces characteristic 51 * output if sent through the extractor. The file will be read as UTF-8, so it should either use that encoding or 52 * avoid characters outside of the US-ASCII range. 53 * 54 * @return a string representing sample input for a particular extractor. 55 */ 56 String getExampleInput(); 57 }