1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.rdf;
19
20 import org.apache.any23.extractor.IssueReport;
21 import org.apache.any23.extractor.ExtractionContext;
22 import org.apache.any23.extractor.ExtractionResult;
23 import org.apache.any23.rdf.Any23ValueFactoryWrapper;
24 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
25 import org.eclipse.rdf4j.rio.ParseErrorListener;
26 import org.eclipse.rdf4j.rio.RDFFormat;
27 import org.eclipse.rdf4j.rio.RDFHandlerException;
28 import org.eclipse.rdf4j.rio.RDFParseException;
29 import org.eclipse.rdf4j.rio.RDFParser;
30 import org.eclipse.rdf4j.rio.Rio;
31 import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
32 import org.eclipse.rdf4j.rio.helpers.RDFaParserSettings;
33 import org.eclipse.rdf4j.rio.helpers.RDFaVersion;
34 import org.eclipse.rdf4j.rio.turtle.TurtleParser;
35 import org.semanticweb.owlapi.rio.OWLAPIRDFFormat;
36 import org.slf4j.Logger;
37 import org.slf4j.LoggerFactory;
38
39 import java.io.IOException;
40 import java.io.InputStream;
41 import java.io.Reader;
42 import java.util.Collections;
43 import java.util.HashSet;
44
45
46
47
48
49
50
51 public class RDFParserFactory {
52
53 private static final Logger logger = LoggerFactory.getLogger(RDFParserFactory.class);
54
55 private static class InstanceHolder {
56 private static final RDFParserFactoryserFactory.html#RDFParserFactory">RDFParserFactory instance = new RDFParserFactory();
57 }
58
59 public static RDFParserFactory getInstance() {
60 return InstanceHolder.instance;
61 }
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77 public RDFParser getTurtleParserInstance(final boolean verifyDataType, final boolean stopAtFirstError,
78 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
79 if (extractionResult == null) {
80 throw new NullPointerException("extractionResult cannot be null.");
81 }
82 final TurtleParser parser = new ExtendedTurtleParser();
83 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
84 return parser;
85 }
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101 public RDFParser getRDFa10Parser(final boolean verifyDataType, final boolean stopAtFirstError,
102 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
103 final RDFParser parser = Rio.createParser(RDFFormat.RDFA);
104 parser.getParserConfig().set(RDFaParserSettings.RDFA_COMPATIBILITY, RDFaVersion.RDFA_1_0);
105 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
106 return parser;
107 }
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123 public RDFParser getRDFa11Parser(final boolean verifyDataType, final boolean stopAtFirstError,
124 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
125 final RDFParser parser = Rio.createParser(RDFFormat.RDFA);
126 parser.getParserConfig().set(RDFaParserSettings.RDFA_COMPATIBILITY, RDFaVersion.RDFA_1_1);
127 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
128 return parser;
129 }
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145 public RDFParser getRDFXMLParser(final boolean verifyDataType, final boolean stopAtFirstError,
146 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
147 final RDFParser parser = Rio.createParser(RDFFormat.RDFXML);
148 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
149 return parser;
150 }
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166 public RDFParser getNTriplesParser(final boolean verifyDataType, final boolean stopAtFirstError,
167 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
168 final RDFParser parser = Rio.createParser(RDFFormat.NTRIPLES);
169 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
170 return parser;
171 }
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187 public RDFParser getNQuadsParser(final boolean verifyDataType, final boolean stopAtFirstError,
188 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
189 final RDFParser parser = Rio.createParser(RDFFormat.NQUADS);
190 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
191 return parser;
192 }
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208 public RDFParser getManchesterSyntaxParser(final boolean verifyDataType, final boolean stopAtFirstError,
209 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
210 final RDFParser parser = Rio.createParser(OWLAPIRDFFormat.MANCHESTER_OWL);
211 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
212 return parser;
213 }
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229 public RDFParser getFunctionalSyntaxParser(final boolean verifyDataType, final boolean stopAtFirstError,
230 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
231 final RDFParser parser = Rio.createParser(OWLAPIRDFFormat.OWL_FUNCTIONAL);
232 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
233 return parser;
234 }
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250 public RDFParser getTriXParser(final boolean verifyDataType, final boolean stopAtFirstError,
251 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
252 final RDFParser parser = Rio.createParser(RDFFormat.TRIX);
253 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
254 return parser;
255 }
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271 public RDFParser getJSONLDParser(final boolean verifyDataType, final boolean stopAtFirstError,
272 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
273 final RDFParser parser = Rio.createParser(RDFFormat.JSONLD);
274 configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
275 return parser;
276 }
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294 private void configureParser(final RDFParser parser, final boolean verifyDataType, final boolean stopAtFirstError,
295 final ExtractionContext extractionContext, final ExtractionResult extractionResult) {
296 parser.getParserConfig().setNonFatalErrors(
297 stopAtFirstError ? Collections.emptySet() : new HashSet<>(parser.getSupportedSettings()));
298 parser.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, verifyDataType);
299 parser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, verifyDataType);
300
301 parser.setParseErrorListener(new InternalParseErrorListener(extractionResult));
302 parser.setValueFactory(new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance(), extractionResult,
303 extractionContext.getDefaultLanguage()));
304 parser.setRDFHandler(new RDFHandlerAdapter(extractionResult));
305 }
306
307
308
309
310 private static class InternalParseErrorListener implements ParseErrorListener {
311
312 private final IssueReport extractionResult;
313
314 public InternalParseErrorListener(IssueReport er) {
315 extractionResult = er;
316 }
317
318 @Override
319 public void warning(String msg, long lineNo, long colNo) {
320 try {
321 extractionResult.notifyIssue(IssueReport.IssueLevel.WARNING, msg, lineNo, colNo);
322 } catch (Exception e) {
323 notifyExceptionInNotification(e);
324 }
325 }
326
327 @Override
328 public void error(String msg, long lineNo, long colNo) {
329 try {
330 extractionResult.notifyIssue(IssueReport.IssueLevel.ERROR, msg, lineNo, colNo);
331 } catch (Exception e) {
332 notifyExceptionInNotification(e);
333 }
334 }
335
336 @Override
337 public void fatalError(String msg, long lineNo, long colNo) {
338 try {
339 extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, msg, lineNo, colNo);
340 } catch (Exception e) {
341 notifyExceptionInNotification(e);
342 }
343 }
344
345 private void notifyExceptionInNotification(Exception e) {
346 if (logger != null) {
347 logger.error("An exception occurred while notifying an error.", e);
348 }
349 }
350 }
351
352
353
354
355 private static class ExtendedTurtleParser extends TurtleParser {
356 @Override
357 public void parse(Reader reader, String baseIRI) throws IOException, RDFParseException, RDFHandlerException {
358 setNamespace("", baseIRI);
359 super.parse(reader, baseIRI);
360 }
361
362 @Override
363 public void parse(InputStream in, String baseIRI) throws IOException, RDFParseException, RDFHandlerException {
364 setNamespace("", baseIRI);
365 super.parse(in, baseIRI);
366 }
367 }
368 }