1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.any23.writer; 19 20 import org.apache.any23.extractor.ExtractionContext; 21 import org.eclipse.rdf4j.model.Resource; 22 import org.eclipse.rdf4j.model.IRI; 23 import org.eclipse.rdf4j.model.Value; 24 25 /** 26 * Defines a document based triple handler. 27 */ 28 public interface TripleHandler extends AutoCloseable { 29 30 void startDocument(IRI documentIRI) throws TripleHandlerException; 31 32 /** 33 * Informs the handler that a new context has been established. Contexts are not guaranteed to receive any triples, 34 * so they might be closed without any triple. 35 * 36 * @param context 37 * an instantiated {@link org.apache.any23.extractor.ExtractionContext} 38 * 39 * @throws TripleHandlerException 40 * if there is an errr opening the {@link org.apache.any23.extractor.ExtractionContext} 41 */ 42 void openContext(ExtractionContext context) throws TripleHandlerException; 43 44 /** 45 * Invoked with a currently open context, notifies the detection of a triple. 46 * 47 * @param s 48 * triple subject, cannot be <code>null</code>. 49 * @param p 50 * triple predicate, cannot be <code>null</code>. 51 * @param o 52 * triple object, cannot be <code>null</code>. 53 * @param g 54 * triple graph, can be <code>null</code>. 55 * @param context 56 * extraction context. 57 * 58 * @throws TripleHandlerException 59 * if there is an error receiving the triple. 60 */ 61 void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context) throws TripleHandlerException; 62 63 /** 64 * Invoked with a currently open context, notifies the detection of a namespace. 65 * 66 * @param prefix 67 * namespace prefix. 68 * @param uri 69 * namespace <i>IRI</i>. 70 * @param context 71 * namespace context. 72 * 73 * @throws TripleHandlerException 74 * if there is an error receiving the namespace. 75 */ 76 void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException; 77 78 /** 79 * Informs the handler that no more triples will come from a previously opened context. All contexts are guaranteed 80 * to be closed before the final close(). The document context for each document is guaranteed to be closed after 81 * all local contexts of that document. 82 * 83 * @param context 84 * the context to be closed. 85 * 86 * @throws TripleHandlerException 87 * if there is an error closing the {@link org.apache.any23.extractor.ExtractionContext}. 88 */ 89 void closeContext(ExtractionContext context) throws TripleHandlerException; 90 91 /** 92 * Informs the handler that the end of the document has been reached. 93 * 94 * @param documentIRI 95 * document IRI. 96 * 97 * @throws TripleHandlerException 98 * if there is an error ending the document. 99 */ 100 void endDocument(IRI documentIRI) throws TripleHandlerException; 101 102 /** 103 * Sets the length of the content to be processed. 104 * 105 * @param contentLength 106 * length of the content being processed. 107 */ 108 void setContentLength(long contentLength); 109 110 /** 111 * Will be called last and exactly once. 112 * 113 * @throws TripleHandlerException 114 * if there is an error closing the {@link org.apache.any23.writer.TripleHandler} implementation. 115 */ 116 void close() throws TripleHandlerException; 117 118 }