1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor;
19
20 import org.apache.any23.configuration.Configuration;
21 import org.apache.any23.configuration.DefaultConfiguration;
22 import org.apache.any23.encoding.EncodingDetector;
23 import org.apache.any23.encoding.TikaEncodingDetector;
24 import org.apache.any23.extractor.html.DocumentReport;
25 import org.apache.any23.extractor.html.HTMLDocument;
26 import org.apache.any23.extractor.html.MicroformatExtractor;
27 import org.apache.any23.extractor.html.TagSoupParser;
28 import org.apache.any23.mime.MIMEType;
29 import org.apache.any23.mime.MIMETypeDetector;
30 import org.apache.any23.rdf.Any23ValueFactoryWrapper;
31 import org.apache.any23.rdf.RDFUtils;
32 import org.apache.any23.source.DocumentSource;
33 import org.apache.any23.source.LocalCopyFactory;
34 import org.apache.any23.source.MemCopyFactory;
35 import org.apache.any23.validator.EmptyValidationReport;
36 import org.apache.any23.validator.ValidatorException;
37 import org.apache.any23.vocab.SINDICE;
38 import org.apache.any23.writer.CompositeTripleHandler;
39 import org.apache.any23.writer.CountingTripleHandler;
40 import org.apache.any23.writer.TripleHandler;
41 import org.apache.any23.writer.TripleHandlerException;
42 import org.apache.any23.extractor.Extractor.BlindExtractor;
43 import org.apache.any23.extractor.Extractor.ContentExtractor;
44 import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
45 import org.apache.tika.mime.MimeTypes;
46 import org.eclipse.rdf4j.model.BNode;
47 import org.eclipse.rdf4j.model.IRI;
48 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
49 import org.slf4j.Logger;
50 import org.slf4j.LoggerFactory;
51
52 import java.io.BufferedInputStream;
53 import java.io.ByteArrayOutputStream;
54 import java.io.IOException;
55 import java.io.InputStream;
56 import java.io.PrintStream;
57 import java.net.URISyntaxException;
58 import java.nio.charset.StandardCharsets;
59 import java.util.ArrayList;
60 import java.util.Collection;
61 import java.util.Collections;
62 import java.util.Date;
63 import java.util.HashMap;
64 import java.util.List;
65 import java.util.Locale;
66 import java.util.Map;
67 import java.util.UUID;
68 import java.util.stream.Collectors;
69
70 import static org.apache.any23.extractor.TagSoupExtractionResult.PropertyPath;
71 import static org.apache.any23.extractor.TagSoupExtractionResult.ResourceRoot;
72
73
74
75
76
77 public class SingleDocumentExtraction {
78
79 private static final SINDICE vSINDICE = SINDICE.getInstance();
80
81 private static final Logger log = LoggerFactory.getLogger(SingleDocumentExtraction.class);
82
83 private final Configuration configuration;
84
85 private final DocumentSource in;
86
87 private IRI documentIRI;
88
89 private final ExtractorGroup extractors;
90
91 private final TripleHandler output;
92
93 private final EncodingDetector encoderDetector;
94
95 private LocalCopyFactory copyFactory = null;
96
97 private DocumentSource localDocumentSource = null;
98
99 private MIMETypeDetector detector = null;
100
101 private ExtractorGroup matchingExtractors = null;
102
103 private MIMEType detectedMIMEType = null;
104
105 private DocumentReport documentReport = null;
106
107 private ExtractionParameters tagSoupDOMRelatedParameters = null;
108
109 private String parserEncoding = null;
110
111
112
113
114
115
116
117
118
119
120
121
122
123 public SingleDocumentExtraction(Configuration configuration, DocumentSource in, ExtractorGroup extractors,
124 TripleHandler output) {
125 if (configuration == null)
126 throw new NullPointerException("configuration cannot be null.");
127 if (in == null)
128 throw new NullPointerException("in cannot be null.");
129 this.configuration = configuration;
130 this.in = in;
131 this.extractors = extractors;
132
133 List<TripleHandler> tripleHandlers = new ArrayList<>();
134 tripleHandlers.add(output);
135 tripleHandlers.add(new CountingTripleHandler());
136 this.output = new CompositeTripleHandler(tripleHandlers);
137 this.encoderDetector = new TikaEncodingDetector();
138 }
139
140
141
142
143
144
145
146
147
148
149
150
151
152 public SingleDocumentExtraction(Configuration configuration, DocumentSource in, ExtractorFactory<?> factory,
153 TripleHandler output) {
154 this(configuration, in, new ExtractorGroup(Collections.<ExtractorFactory<?>> singletonList(factory)), output);
155 this.setMIMETypeDetector(null);
156 }
157
158
159
160
161
162
163
164
165
166
167
168
169 public SingleDocumentExtraction(DocumentSource in, ExtractorFactory<?> factory, TripleHandler output) {
170 this(DefaultConfiguration.singleton(), in,
171 new ExtractorGroup(Collections.<ExtractorFactory<?>> singletonList(factory)), output);
172 this.setMIMETypeDetector(null);
173 }
174
175
176
177
178
179
180
181
182
183
184 public void setLocalCopyFactory(LocalCopyFactory copyFactory) {
185 this.copyFactory = copyFactory;
186 }
187
188
189
190
191
192
193
194
195 public void setMIMETypeDetector(MIMETypeDetector detector) {
196 this.detector = detector;
197 }
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213 public SingleDocumentExtractionReport run(ExtractionParameters extractionParameters)
214 throws ExtractionException, IOException {
215 if (extractionParameters == null) {
216 extractionParameters = ExtractionParameters.newDefault(configuration);
217 }
218
219 final String contextIRI = extractionParameters
220 .getProperty(ExtractionParameters.EXTRACTION_CONTEXT_IRI_PROPERTY);
221 ensureHasLocalCopy();
222 try {
223 this.documentIRI = new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance())
224 .createIRI("?".equals(contextIRI) ? in.getDocumentIRI() : contextIRI);
225 } catch (Exception ex) {
226 throw new IllegalArgumentException("Invalid IRI: " + in.getDocumentIRI(), ex);
227 }
228 if (log.isDebugEnabled()) {
229 log.debug("Processing " + this.documentIRI);
230 }
231 filterExtractorsByMIMEType();
232
233 if (log.isDebugEnabled()) {
234 StringBuilder sb = new StringBuilder("Extractors ");
235 for (ExtractorFactory<?> factory : matchingExtractors) {
236 sb.append(factory.getExtractorName());
237 sb.append(' ');
238 }
239 sb.append("match ").append(documentIRI);
240 log.debug(sb.toString());
241 }
242
243 final List<ResourceRoot> resourceRoots = new ArrayList<>();
244 final List<PropertyPath> propertyPaths = new ArrayList<>();
245 final Map<String, Collection<IssueReport.Issue>> extractorToIssues = new HashMap<>();
246
247
248 try {
249 output.startDocument(documentIRI);
250 } catch (TripleHandlerException e) {
251 log.error(String.format(Locale.ROOT, "Error starting document with IRI %s", documentIRI));
252 throw new ExtractionException(
253 String.format(Locale.ROOT, "Error starting document with IRI %s", documentIRI), e);
254 }
255 try {
256 output.setContentLength(in.getContentLength());
257
258 final String documentLanguage;
259 try {
260 documentLanguage = extractDocumentLanguage(extractionParameters);
261 ArrayList<ExtractorFactory<?>> filteredList = new ArrayList<>(matchingExtractors.getNumOfExtractors());
262 final boolean mimeTypeIsTooGeneric = isTooGeneric(detectedMIMEType);
263 ArrayList<String> intersectionOfRdfMimetypes = null;
264 for (ExtractorFactory<?> factory : matchingExtractors) {
265 final Extractor<?> extractor = factory.createExtractor();
266 final SingleExtractionReport er = runExtractor(extractionParameters, documentLanguage, extractor);
267
268 if (mimeTypeIsTooGeneric) {
269 List<String> rdfMimetypes = factory.getSupportedMIMETypes().stream()
270 .filter(mt -> !isTooGeneric(mt)).map(MIMEType::getFullType)
271 .collect(Collectors.toList());
272 if (er.touched) {
273
274
275
276
277
278 if (intersectionOfRdfMimetypes == null) {
279 intersectionOfRdfMimetypes = new ArrayList<>(rdfMimetypes);
280 } else {
281 intersectionOfRdfMimetypes.retainAll(rdfMimetypes);
282 }
283 } else if (!rdfMimetypes.isEmpty()) {
284
285
286
287
288 continue;
289 }
290 }
291 resourceRoots.addAll(er.resourceRoots);
292 propertyPaths.addAll(er.propertyPaths);
293 filteredList.add(factory);
294 extractorToIssues.put(factory.getExtractorName(), er.issues);
295 }
296 matchingExtractors = new ExtractorGroup(filteredList);
297 if (intersectionOfRdfMimetypes != null && !intersectionOfRdfMimetypes.isEmpty()) {
298
299
300
301 detectedMIMEType = MIMEType.parse(intersectionOfRdfMimetypes.get(0));
302 }
303 } catch (ValidatorException ve) {
304 throw new ExtractionException("An error occurred during the validation phase.", ve);
305 }
306
307
308 final boolean addDomainTriples = extractionParameters
309 .getFlag(ExtractionParameters.METADATA_DOMAIN_PER_ENTITY_FLAG);
310 final ExtractionContext consolidationContext;
311 if (extractionParameters.getFlag(ExtractionParameters.METADATA_NESTING_FLAG)) {
312
313 consolidationContext = consolidateResources(resourceRoots, propertyPaths, addDomainTriples, output,
314 documentLanguage);
315 } else {
316 consolidationContext = consolidateResources(resourceRoots, addDomainTriples, output, documentLanguage);
317 }
318
319
320 if (extractionParameters.getFlag(ExtractionParameters.METADATA_TIMESIZE_FLAG)) {
321 try {
322 addExtractionTimeSizeMetaTriples(consolidationContext);
323 } catch (TripleHandlerException e) {
324 throw new ExtractionException(
325 String.format(Locale.ROOT,
326 "Error while adding extraction metadata triples document with IRI %s", documentIRI),
327 e);
328 }
329 }
330 } finally {
331 try {
332 output.endDocument(documentIRI);
333 } catch (TripleHandlerException e) {
334 log.error(String.format(Locale.ROOT, "Error ending document with IRI %s", documentIRI));
335 throw new ExtractionException(
336 String.format(Locale.ROOT, "Error ending document with IRI %s", documentIRI), e);
337 }
338 }
339
340 return new SingleDocumentExtractionReport(
341 documentReport == null ? EmptyValidationReport.getInstance() : documentReport.getReport(),
342 extractorToIssues);
343 }
344
345 private static boolean isTooGeneric(MIMEType type) {
346 if (type == null || type.isAnySubtype()) {
347 return true;
348 }
349 String mt = type.getFullType();
350 return mt.equals(MimeTypes.PLAIN_TEXT) || mt.equals(MimeTypes.OCTET_STREAM) || mt.equals(MimeTypes.XML);
351 }
352
353
354
355
356
357
358
359
360
361
362
363
364 public SingleDocumentExtractionReport run() throws IOException, ExtractionException {
365 return run(ExtractionParameters.newDefault(configuration));
366 }
367
368
369
370
371
372
373
374
375
376 public String getDetectedMIMEType() throws IOException {
377 filterExtractorsByMIMEType();
378 return detectedMIMEType == null ? null : detectedMIMEType.toString();
379 }
380
381
382
383
384
385
386
387
388
389
390 public boolean hasMatchingExtractors() throws IOException {
391 filterExtractorsByMIMEType();
392 return !matchingExtractors.isEmpty();
393 }
394
395
396
397
398 @SuppressWarnings("rawtypes")
399 public List<Extractor> getMatchingExtractors() {
400 final List<Extractor> extractorsList = new ArrayList<>();
401 for (ExtractorFactory extractorFactory : matchingExtractors) {
402 extractorsList.add(extractorFactory.createExtractor());
403 }
404 return extractorsList;
405 }
406
407
408
409
410 public String getParserEncoding() {
411 if (this.parserEncoding == null) {
412 this.parserEncoding = detectEncoding();
413 }
414 return this.parserEncoding;
415 }
416
417
418
419
420
421
422
423 public void setParserEncoding(String encoding) {
424 this.parserEncoding = encoding;
425 documentReport = null;
426 }
427
428
429
430
431
432
433
434
435
436 private boolean isHTMLDocument() throws IOException {
437 filterExtractorsByMIMEType();
438 return !matchingExtractors.filterByMIMEType(MIMEType.parse("text/html")).isEmpty();
439 }
440
441
442
443
444
445
446
447
448
449
450
451
452
453 private String extractDocumentLanguage(ExtractionParameters extractionParameters)
454 throws IOException, ValidatorException {
455 if (!isHTMLDocument()) {
456 return null;
457 }
458 final HTMLDocument document;
459 try {
460 document = new HTMLDocument(getTagSoupDOM(extractionParameters).getDocument());
461 } catch (IOException ioe) {
462 log.debug("Cannot extract language from document.", ioe);
463 return null;
464 }
465 return document.getDefaultLanguage();
466 }
467
468
469
470
471
472
473 private void filterExtractorsByMIMEType() throws IOException {
474 if (matchingExtractors != null)
475 return;
476
477 if (detector == null || extractors.allExtractorsSupportAllContentTypes()) {
478 matchingExtractors = extractors;
479 return;
480 }
481 ensureHasLocalCopy();
482
483 detectedMIMEType = detector.guessMIMEType(java.net.URI.create(in.getDocumentIRI()).getPath(),
484 localDocumentSource.openInputStream(), MIMEType.parse(localDocumentSource.getContentType()));
485 log.debug("detected media type: " + detectedMIMEType);
486 matchingExtractors = extractors.filterByMIMEType(detectedMIMEType);
487 }
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507 private SingleExtractionReport runExtractor(final ExtractionParameters extractionParameters,
508 final String documentLanguage, final Extractor<?> extractor)
509 throws ExtractionException, IOException, ValidatorException {
510 if (log.isDebugEnabled()) {
511 log.debug("Running {} on {}", extractor.getDescription().getExtractorName(), documentIRI);
512 }
513 long startTime = System.currentTimeMillis();
514 final ExtractionContext extractionContext = new ExtractionContext(extractor.getDescription().getExtractorName(),
515 documentIRI, documentLanguage);
516 final ExtractionResultImpl extractionResult = new ExtractionResultImpl(extractionContext, extractor, output);
517 try {
518 if (extractor instanceof BlindExtractor) {
519 final BlindExtractor blindExtractor = (BlindExtractor) extractor;
520 blindExtractor.run(extractionParameters, extractionContext, documentIRI, extractionResult);
521 } else if (extractor instanceof ContentExtractor) {
522 ensureHasLocalCopy();
523 final ContentExtractor contentExtractor = (ContentExtractor) extractor;
524 contentExtractor.run(extractionParameters, extractionContext, localDocumentSource.openInputStream(),
525 extractionResult);
526 } else if (extractor instanceof TagSoupDOMExtractor) {
527 final TagSoupDOMExtractor tagSoupDOMExtractor = (TagSoupDOMExtractor) extractor;
528 final DocumentReport documentReport = getTagSoupDOM(extractionParameters);
529 tagSoupDOMExtractor.run(extractionParameters, extractionContext, documentReport.getDocument(),
530 extractionResult);
531 } else {
532 throw new IllegalStateException("Extractor type not supported: " + extractor.getClass());
533 }
534 return new SingleExtractionReport(extractionResult.getIssues(),
535 new ArrayList<ResourceRoot>(extractionResult.getResourceRoots()),
536 new ArrayList<PropertyPath>(extractionResult.getPropertyPaths()), extractionResult.wasTouched());
537 } catch (ExtractionException ex) {
538 if (log.isDebugEnabled()) {
539 log.debug(extractor.getDescription().getExtractorName() + ": " + ex.getMessage());
540 }
541 throw ex;
542 } finally {
543
544 if (log.isDebugEnabled() && extractionResult.hasIssues()) {
545 ByteArrayOutputStream baos = new ByteArrayOutputStream();
546 extractionResult.printReport(new PrintStream(baos, true, "UTF-8"));
547 log.debug(baos.toString("UTF-8"));
548 }
549 extractionResult.close();
550
551 long elapsed = System.currentTimeMillis() - startTime;
552 if (log.isDebugEnabled()) {
553 log.debug("Completed " + extractor.getDescription().getExtractorName() + ", " + elapsed + "ms");
554 }
555 }
556 }
557
558
559
560
561
562
563 private void ensureHasLocalCopy() throws IOException {
564 if (localDocumentSource != null)
565 return;
566 if (in.isLocal()) {
567 localDocumentSource = in;
568 return;
569 }
570 if (copyFactory == null) {
571 copyFactory = new MemCopyFactory();
572 }
573 localDocumentSource = copyFactory.createLocalCopy(in);
574 }
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590 private DocumentReport getTagSoupDOM(ExtractionParameters extractionParameters)
591 throws IOException, ValidatorException {
592 if (documentReport == null || !extractionParameters.equals(tagSoupDOMRelatedParameters)) {
593 ensureHasLocalCopy();
594 final InputStream is = new BufferedInputStream(localDocumentSource.openInputStream());
595 is.mark(Integer.MAX_VALUE);
596 final String candidateEncoding = getParserEncoding();
597 is.reset();
598 final TagSoupParser tagSoupParser = new TagSoupParser(is, documentIRI.stringValue(), candidateEncoding);
599 if (extractionParameters.isValidate()) {
600 documentReport = tagSoupParser.getValidatedDOM(extractionParameters.isFix());
601 } else {
602 documentReport = new DocumentReport(EmptyValidationReport.getInstance(), tagSoupParser.getDOM());
603 }
604 tagSoupDOMRelatedParameters = extractionParameters;
605 }
606 return documentReport;
607 }
608
609
610
611
612
613
614 private String detectEncoding() {
615 try {
616 ensureHasLocalCopy();
617 InputStream is = new BufferedInputStream(localDocumentSource.openInputStream());
618 String encoding = this.encoderDetector.guessEncoding(is, localDocumentSource.getContentType());
619 is.close();
620 return encoding;
621 } catch (Exception e) {
622 throw new RuntimeException("An error occurred while trying to detect the input encoding.", e);
623 }
624 }
625
626
627
628
629
630
631
632
633
634
635
636 private boolean subPath(String[] list, String[] candidateSub) {
637 if (candidateSub.length > list.length) {
638 return false;
639 }
640 for (int i = 0; i < candidateSub.length; i++) {
641 if (!candidateSub[i].equals(list[i])) {
642 return false;
643 }
644 }
645 return true;
646 }
647
648
649
650
651
652
653
654
655
656
657
658 private void addDomainTriplesPerResourceRoots(List<ResourceRoot> resourceRoots, ExtractionContext context)
659 throws ExtractionException {
660 try {
661
662 String domain;
663 try {
664 domain = new java.net.URI(in.getDocumentIRI()).getHost();
665 } catch (URISyntaxException urise) {
666 throw new IllegalArgumentException("An error occurred while extracting the host from the document IRI.",
667 urise);
668 }
669 if (domain != null) {
670 for (ResourceRoot resourceRoot : resourceRoots) {
671 output.receiveTriple(resourceRoot.getRoot(), vSINDICE.getProperty(SINDICE.DOMAIN),
672 SimpleValueFactory.getInstance().createLiteral(domain), null, context);
673 }
674 }
675 } catch (TripleHandlerException e) {
676 throw new ExtractionException("Error while writing triple triple.", e);
677 } finally {
678 try {
679 output.closeContext(context);
680 } catch (TripleHandlerException e) {
681 throw new ExtractionException("Error while closing context.", e);
682 }
683 }
684 }
685
686
687
688
689 private ExtractionContext createExtractionContext(String defaultLanguage) {
690 return new ExtractionContext("consolidation-extractor", documentIRI, defaultLanguage,
691 UUID.randomUUID().toString());
692 }
693
694
695
696
697
698
699
700
701
702
703 private void addNestingRelationship(List<ResourceRoot> resourceRoots, List<PropertyPath> propertyPaths,
704 ExtractionContext context) throws TripleHandlerException {
705 ResourceRoot currentResourceRoot;
706 PropertyPath currentPropertyPath;
707 for (int r = 0; r < resourceRoots.size(); r++) {
708 currentResourceRoot = resourceRoots.get(r);
709 for (int p = 0; p < propertyPaths.size(); p++) {
710 currentPropertyPath = propertyPaths.get(p);
711 Class<? extends MicroformatExtractor> currentResourceRootExtractor = currentResourceRoot.getExtractor();
712 Class<? extends MicroformatExtractor> currentPropertyPathExtractor = currentPropertyPath.getExtractor();
713
714 if (currentResourceRootExtractor.equals(currentPropertyPathExtractor)) {
715 continue;
716 }
717
718 if (MicroformatExtractor.includes(currentPropertyPathExtractor, currentResourceRootExtractor)) {
719 continue;
720 }
721 if (subPath(currentResourceRoot.getPath(), currentPropertyPath.getPath())) {
722 createNestingRelationship(currentPropertyPath, currentResourceRoot, output, context);
723 }
724 }
725 }
726 }
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749 private ExtractionContext consolidateResources(List<ResourceRoot> resourceRoots, List<PropertyPath> propertyPaths,
750 boolean addDomainTriples, TripleHandler output, String defaultLanguage) throws ExtractionException {
751 final ExtractionContext context = createExtractionContext(defaultLanguage);
752
753 try {
754 output.openContext(context);
755 } catch (TripleHandlerException e) {
756 throw new ExtractionException(
757 String.format(Locale.ROOT, "Error starting document with IRI %s", documentIRI), e);
758 }
759
760 try {
761 if (addDomainTriples) {
762 addDomainTriplesPerResourceRoots(resourceRoots, context);
763 }
764 addNestingRelationship(resourceRoots, propertyPaths, context);
765 } catch (TripleHandlerException the) {
766 throw new ExtractionException("Error while writing triple triple.", the);
767 } finally {
768 try {
769 output.closeContext(context);
770 } catch (TripleHandlerException e) {
771 throw new ExtractionException("Error while closing context.", e);
772 }
773 }
774
775 return context;
776 }
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795 private ExtractionContext consolidateResources(List<ResourceRoot> resourceRoots, boolean addDomainTriples,
796 TripleHandler output, String defaultLanguage) throws ExtractionException {
797 final ExtractionContext context = createExtractionContext(defaultLanguage);
798
799 try {
800 output.openContext(context);
801 } catch (TripleHandlerException e) {
802 throw new ExtractionException(
803 String.format(Locale.ROOT, "Error starting document with IRI %s", documentIRI), e);
804 }
805
806 try {
807 if (addDomainTriples) {
808 addDomainTriplesPerResourceRoots(resourceRoots, context);
809 }
810 } finally {
811 try {
812 output.closeContext(context);
813 } catch (TripleHandlerException the) {
814 throw new ExtractionException("Error while closing context.", the);
815 }
816 }
817
818 return context;
819 }
820
821
822
823
824
825
826
827
828 private void addExtractionTimeSizeMetaTriples(ExtractionContext context) throws TripleHandlerException {
829
830 String xsdDateTimeNow = RDFUtils.toXSDDateTime(new Date());
831 output.receiveTriple(SimpleValueFactory.getInstance().createIRI(documentIRI.toString()),
832 vSINDICE.getProperty(SINDICE.DATE), SimpleValueFactory.getInstance().createLiteral(xsdDateTimeNow),
833 null, context);
834
835
836 int numberOfTriples = 0;
837 CompositeTripleHandler cth = (CompositeTripleHandler) output;
838 for (TripleHandler th : cth.getChilds()) {
839 if (th instanceof CountingTripleHandler) {
840 numberOfTriples = ((CountingTripleHandler) th).getCount();
841 }
842 }
843 output.receiveTriple(SimpleValueFactory.getInstance().createIRI(documentIRI.toString()),
844 vSINDICE.getProperty(SINDICE.SIZE), SimpleValueFactory.getInstance().createLiteral(numberOfTriples + 1),
845
846
847
848
849
850 null, context);
851 }
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867 private void createNestingRelationship(PropertyPath from, ResourceRoot to, TripleHandler th, ExtractionContext ec)
868 throws TripleHandlerException {
869 final BNode fromObject = from.getObject();
870 final String bNodeHash = from.getProperty().stringValue() + (fromObject == null ? "" : fromObject.getID());
871 BNode bnode = RDFUtils.getBNode(bNodeHash);
872 th.receiveTriple(bnode, vSINDICE.getProperty(SINDICE.NESTING_ORIGINAL), from.getProperty(), null, ec);
873 th.receiveTriple(bnode, vSINDICE.getProperty(SINDICE.NESTING_STRUCTURED),
874 from.getObject() == null ? to.getRoot() : from.getObject(), null, ec);
875 th.receiveTriple(from.getSubject(), vSINDICE.getProperty(SINDICE.NESTING), bnode, null, ec);
876 }
877
878
879
880
881 private static class SingleExtractionReport {
882 private final Collection<IssueReport.Issue> issues;
883 private final List<ResourceRoot> resourceRoots;
884 private final List<PropertyPath> propertyPaths;
885 private final boolean touched;
886
887 public SingleExtractionReport(Collection<IssueReport.Issue> issues, List<ResourceRoot> resourceRoots,
888 List<PropertyPath> propertyPaths, boolean wasTouched) {
889 this.issues = issues;
890 this.resourceRoots = resourceRoots;
891 this.propertyPaths = propertyPaths;
892 this.touched = wasTouched;
893 }
894 }
895
896 }