1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.filter;
19
20 import org.apache.any23.extractor.ExtractionContext;
21 import org.apache.any23.extractor.html.TitleExtractor;
22 import org.apache.any23.extractor.html.TitleExtractorFactory;
23 import org.apache.any23.writer.TripleHandler;
24 import org.apache.any23.writer.TripleHandlerException;
25 import org.eclipse.rdf4j.model.Resource;
26 import org.eclipse.rdf4j.model.IRI;
27 import org.eclipse.rdf4j.model.Value;
28
29
30
31
32
33
34
35
36 public class IgnoreTitlesOfEmptyDocuments implements TripleHandler {
37
38 private final ExtractionContextBlocker blocker;
39
40 public IgnoreTitlesOfEmptyDocuments(TripleHandler wrapped) {
41 blocker = new ExtractionContextBlocker(wrapped);
42 }
43
44 public void startDocument(IRI documentIRI) throws TripleHandlerException {
45 blocker.startDocument(documentIRI);
46 }
47
48 public void openContext(ExtractionContext context) throws TripleHandlerException {
49 blocker.openContext(context);
50 if (isTitleContext(context)) {
51 blocker.blockContext(context);
52 }
53 }
54
55 public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context)
56 throws TripleHandlerException {
57 if (!isTitleContext(context)) {
58 blocker.unblockDocument();
59 }
60 blocker.receiveTriple(s, p, o, g, context);
61 }
62
63 public void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException {
64 blocker.receiveNamespace(prefix, uri, context);
65 }
66
67 public void closeContext(ExtractionContext context) {
68 blocker.closeContext(context);
69 }
70
71 public void close() throws TripleHandlerException {
72 blocker.close();
73 }
74
75 private boolean isTitleContext(ExtractionContext context) {
76 return context.getExtractorName().equals(TitleExtractorFactory.NAME);
77 }
78
79 public void endDocument(IRI documentIRI) throws TripleHandlerException {
80 blocker.endDocument(documentIRI);
81 }
82
83 public void setContentLength(long contentLength) {
84
85 }
86 }