1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.filter;
19
20 import org.apache.any23.extractor.ExtractionContext;
21 import org.apache.any23.extractor.rdfa.RDFaExtractorFactory;
22 import org.apache.any23.vocab.XHTML;
23 import org.apache.any23.writer.TripleHandler;
24 import org.apache.any23.writer.TripleHandlerException;
25 import org.eclipse.rdf4j.model.Resource;
26 import org.eclipse.rdf4j.model.IRI;
27 import org.eclipse.rdf4j.model.Value;
28
29
30
31
32
33
34
35 public class IgnoreAccidentalRDFa implements TripleHandler {
36
37 private static final XHTML vXHTML = XHTML.getInstance();
38
39 private final ExtractionContextBlocker blocker;
40
41 private final boolean alwaysSuppressCSSTriples;
42
43
44
45
46
47
48
49
50
51
52 public IgnoreAccidentalRDFa(TripleHandler wrapped, boolean alwaysSuppressCSSTriples) {
53 this.blocker = new ExtractionContextBlocker(wrapped);
54 this.alwaysSuppressCSSTriples = alwaysSuppressCSSTriples;
55 }
56
57 public IgnoreAccidentalRDFa(TripleHandler wrapped) {
58 this(wrapped, false);
59 }
60
61 public void startDocument(IRI documentIRI) throws TripleHandlerException {
62 blocker.startDocument(documentIRI);
63 }
64
65 public void openContext(ExtractionContext context) throws TripleHandlerException {
66 blocker.openContext(context);
67 if (isRDFaContext(context)) {
68 blocker.blockContext(context);
69 }
70 }
71
72 public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context)
73 throws TripleHandlerException {
74
75 if (alwaysSuppressCSSTriples && p.stringValue().equals(vXHTML.stylesheet.stringValue())) {
76 return;
77 }
78 if (isRDFaContext(context)) {
79 blocker.unblockContext(context);
80 }
81 blocker.receiveTriple(s, p, o, g, context);
82 }
83
84 public void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException {
85 blocker.receiveNamespace(prefix, uri, context);
86 }
87
88 public void closeContext(ExtractionContext context) {
89 blocker.closeContext(context);
90 }
91
92 public void close() throws TripleHandlerException {
93 blocker.close();
94 }
95
96 private boolean isRDFaContext(ExtractionContext context) {
97 return context.getExtractorName().equals(RDFaExtractorFactory.NAME);
98 }
99
100 public void endDocument(IRI documentIRI) throws TripleHandlerException {
101 blocker.endDocument(documentIRI);
102 }
103
104 public void setContentLength(long contentLength) {
105
106 }
107 }