1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.rdf;
19
20 import org.apache.any23.extractor.IssueReport;
21 import org.apache.any23.extractor.ExtractionContext;
22 import org.apache.any23.extractor.ExtractionResult;
23 import org.apache.any23.rdf.Any23ValueFactoryWrapper;
24 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
25 import org.eclipse.rdf4j.rio.ParseErrorListener;
26 import org.eclipse.rdf4j.rio.RDFFormat;
27 import org.eclipse.rdf4j.rio.RDFHandlerException;
28 import org.eclipse.rdf4j.rio.RDFParseException;
29 import org.eclipse.rdf4j.rio.RDFParser;
30 import org.eclipse.rdf4j.rio.Rio;
31 import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
32 import org.eclipse.rdf4j.rio.helpers.RDFaParserSettings;
33 import org.eclipse.rdf4j.rio.helpers.RDFaVersion;
34 import org.eclipse.rdf4j.rio.helpers.XMLParserSettings;
35 import org.eclipse.rdf4j.rio.turtle.TurtleParser;
36 import org.semanticweb.owlapi.rio.OWLAPIRDFFormat;
37 import org.slf4j.Logger;
38 import org.slf4j.LoggerFactory;
39
40 import java.io.IOException;
41 import java.io.InputStream;
42 import java.io.Reader;
43 import java.util.Collections;
44 import java.util.HashSet;
45
46
47
48
49
50
51
52 public class RDFParserFactory {
53
54 private static final Logger logger = LoggerFactory.getLogger(RDFParserFactory.class);
55
56 private static class InstanceHolder {
57 private static final RDFParserFactory instance = new RDFParserFactory();
58 }
59
60 public static RDFParserFactory getInstance() {
61 return InstanceHolder.instance;
62 }
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78 public RDFParser getTurtleParserInstance(final boolean verifyDataType, final boolean stopAtFirstError,
79 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
80 if (extractionResult == null) {
81 throw new NullPointerException("extractionResult cannot be null.");
82 }
83 final TurtleParser parser = new ExtendedTurtleParser();
84 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
85 return parser;
86 }
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102 public RDFParser getRDFa10Parser(final boolean verifyDataType, final boolean stopAtFirstError,
103 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
104 final RDFParser parser = Rio.createParser(RDFFormat.RDFA);
105 parser.getParserConfig().set(RDFaParserSettings.RDFA_COMPATIBILITY, RDFaVersion.RDFA_1_0);
106 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
107 return parser;
108 }
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124 public RDFParser getRDFa11Parser(final boolean verifyDataType, final boolean stopAtFirstError,
125 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
126 final RDFParser parser = Rio.createParser(RDFFormat.RDFA);
127 parser.getParserConfig().set(RDFaParserSettings.RDFA_COMPATIBILITY, RDFaVersion.RDFA_1_1);
128 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
129 return parser;
130 }
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146 public RDFParser getRDFXMLParser(final boolean verifyDataType, final boolean stopAtFirstError,
147 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
148 final RDFParser parser = Rio.createParser(RDFFormat.RDFXML);
149 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
150 return parser;
151 }
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167 public RDFParser getNTriplesParser(final boolean verifyDataType, final boolean stopAtFirstError,
168 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
169 final RDFParser parser = Rio.createParser(RDFFormat.NTRIPLES);
170 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
171 return parser;
172 }
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188 public RDFParser getNQuadsParser(final boolean verifyDataType, final boolean stopAtFirstError,
189 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
190 final RDFParser parser = Rio.createParser(RDFFormat.NQUADS);
191 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
192 return parser;
193 }
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 public RDFParser getManchesterSyntaxParser(final boolean verifyDataType, final boolean stopAtFirstError,
210 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
211 final RDFParser parser = Rio.createParser(OWLAPIRDFFormat.MANCHESTER_OWL);
212 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
213 return parser;
214 }
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230 public RDFParser getFunctionalSyntaxParser(final boolean verifyDataType, final boolean stopAtFirstError,
231 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
232 final RDFParser parser = Rio.createParser(OWLAPIRDFFormat.OWL_FUNCTIONAL);
233 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
234 return parser;
235 }
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251 public RDFParser getTriXParser(final boolean verifyDataType, final boolean stopAtFirstError,
252 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
253 final RDFParser parser = Rio.createParser(RDFFormat.TRIX);
254 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
255 return parser;
256 }
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272 public RDFParser getJSONLDParser(final boolean verifyDataType, final boolean stopAtFirstError,
273 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
274 final RDFParser parser = Rio.createParser(RDFFormat.JSONLD);
275 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
276 return parser;
277 }
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295 private void configureParser(final RDFParser parser, final boolean verifyDataType, final boolean stopAtFirstError,
296 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
297 parser.getParserConfig().setNonFatalErrors(
298 stopAtFirstError ? Collections.emptySet() : new HashSet<>(parser.getSupportedSettings()));
299 parser.getParserConfig().set(XMLParserSettings.LOAD_EXTERNAL_DTD, false);
300 parser.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, verifyDataType);
301 parser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, verifyDataType);
302
303 parser.setParseErrorListener(new InternalParseErrorListener(extractionResult));
304 parser.setValueFactory(new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance(), extractionResult,
305 extractionContext.getDefaultLanguage()));
306 parser.setRDFHandler(new RDFHandlerAdapter(extractionResult));
307 }
308
309
310
311
312 private static class InternalParseErrorListener implements ParseErrorListener {
313
314 private final IssueReport extractionResult;
315
316 public InternalParseErrorListener(IssueReport er) {
317 extractionResult = er;
318 }
319
320 @Override
321 public void warning(String msg, long lineNo, long colNo) {
322 try {
323 extractionResult.notifyIssue(IssueReport.IssueLevel.WARNING, msg, lineNo, colNo);
324 } catch (Exception e) {
325 notifyExceptionInNotification(e);
326 }
327 }
328
329 @Override
330 public void error(String msg, long lineNo, long colNo) {
331 try {
332 extractionResult.notifyIssue(IssueReport.IssueLevel.ERROR, msg, lineNo, colNo);
333 } catch (Exception e) {
334 notifyExceptionInNotification(e);
335 }
336 }
337
338 @Override
339 public void fatalError(String msg, long lineNo, long colNo) {
340 try {
341 extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, msg, lineNo, colNo);
342 } catch (Exception e) {
343 notifyExceptionInNotification(e);
344 }
345 }
346
347 private void notifyExceptionInNotification(Exception e) {
348 if (logger != null) {
349 logger.error("An exception occurred while notifying an error.", e);
350 }
351 }
352 }
353
354
355
356
357 private static class ExtendedTurtleParser extends TurtleParser {
358 @Override
359 public void parse(Reader reader, String baseIRI) throws IOException, RDFParseException, RDFHandlerException {
360 setNamespace("", baseIRI);
361 super.parse(reader, baseIRI);
362 }
363
364 @Override
365 public void parse(InputStream in, String baseIRI) throws IOException, RDFParseException, RDFHandlerException {
366 setNamespace("", baseIRI);
367 super.parse(in, baseIRI);
368 }
369 }
370 }