View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.html;
19  
20  import org.apache.any23.AbstractAny23TestBase;
21  import org.apache.any23.extractor.IssueReport;
22  import org.apache.any23.extractor.IssueReport.Issue;
23  import org.apache.any23.extractor.IssueReport.IssueLevel;
24  import org.apache.any23.extractor.ExtractionException;
25  import org.apache.any23.extractor.ExtractorFactory;
26  import org.apache.any23.extractor.SingleDocumentExtraction;
27  import org.apache.any23.extractor.SingleDocumentExtractionReport;
28  import org.apache.any23.rdf.RDFUtils;
29  import org.apache.any23.vocab.SINDICE;
30  import org.apache.any23.writer.RepositoryWriter;
31  import org.junit.After;
32  import org.junit.Assert;
33  import org.junit.Before;
34  import org.eclipse.rdf4j.common.iteration.Iterations;
35  import org.eclipse.rdf4j.model.BNode;
36  import org.eclipse.rdf4j.model.Literal;
37  import org.eclipse.rdf4j.model.Resource;
38  import org.eclipse.rdf4j.model.Statement;
39  import org.eclipse.rdf4j.model.IRI;
40  import org.eclipse.rdf4j.model.Value;
41  import org.eclipse.rdf4j.repository.RepositoryConnection;
42  import org.eclipse.rdf4j.repository.RepositoryException;
43  import org.eclipse.rdf4j.repository.RepositoryResult;
44  import org.eclipse.rdf4j.repository.sail.SailRepository;
45  import org.eclipse.rdf4j.rio.RDFFormat;
46  import org.eclipse.rdf4j.rio.RDFHandlerException;
47  import org.eclipse.rdf4j.rio.RDFParseException;
48  import org.eclipse.rdf4j.rio.Rio;
49  import org.eclipse.rdf4j.sail.Sail;
50  import org.eclipse.rdf4j.sail.memory.MemoryStore;
51  import org.slf4j.Logger;
52  import org.slf4j.LoggerFactory;
53  
54  import java.io.ByteArrayOutputStream;
55  import java.io.IOException;
56  import java.io.PrintStream;
57  import java.io.StringWriter;
58  import java.lang.invoke.MethodHandles;
59  import java.nio.charset.StandardCharsets;
60  import java.util.ArrayList;
61  import java.util.Collection;
62  import java.util.Collections;
63  import java.util.List;
64  import java.util.Locale;
65  import java.util.Map;
66  
67  /**
68   * Abstract class used to write {@link org.apache.any23.extractor.Extractor} specific test cases.
69   */
70  public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase {
71  
72      private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
73  
74      /**
75       * Base test document.
76       */
77      protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/");
78  
79      /**
80       * Internal connection used to collect extraction results.
81       */
82      protected RepositoryConnection conn;
83  
84      /**
85       * The latest generated report.
86       */
87      private SingleDocumentExtractionReport report;
88  
89      private Sail store;
90  
91      private SailRepository repository;
92  
93      /**
94       * Constructor.
95       */
96      public AbstractExtractorTestCase() {
97          super();
98      }
99  
100     /**
101      * @return the factory of the extractor to be tested.
102      */
103     protected abstract ExtractorFactory<?> getExtractorFactory();
104 
105     /**
106      * Test case initialization.
107      * 
108      * @throws Exception
109      *             if there is an error constructing input objects
110      */
111     @Before
112     public void setUp() throws Exception {
113         super.setUp();
114         store = new MemoryStore();
115         repository = new SailRepository(store);
116         repository.init();
117         conn = repository.getConnection();
118     }
119 
120     /**
121      * Test case resources release.
122      *
123      * @throws RepositoryException
124      *             if an error is encountered whilst loading content from a storage connection
125      * 
126      */
127     @After
128     public void tearDown() throws RepositoryException {
129         try {
130             conn.close();
131         } finally {
132             repository.shutDown();
133         }
134         conn = null;
135         report = null;
136         store = null;
137         repository = null;
138     }
139 
140     /**
141      * @return the connection to the memory repository.
142      */
143     protected RepositoryConnection getConnection() {
144         return conn;
145     }
146 
147     /**
148      * @return the last generated report.
149      */
150     protected SingleDocumentExtractionReport getReport() {
151         return report;
152     }
153 
154     /**
155      * Returns the list of issues raised by a given extractor.
156      *
157      * @param extractorName
158      *            name of the extractor.
159      * 
160      * @return collection of issues.
161      */
162     protected Collection<IssueReport.Issue> getIssues(String extractorName) {
163         for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report.getExtractorToIssues().entrySet()) {
164             if (issueEntry.getKey().equals(extractorName)) {
165                 return issueEntry.getValue();
166             }
167         }
168         return Collections.emptyList();
169     }
170 
171     /**
172      * Returns the list of issues raised by the extractor under testing.
173      *
174      * @return collection of issues.
175      */
176     protected Collection<IssueReport.Issue> getIssues() {
177         return getIssues(getExtractorFactory().getExtractorName());
178     }
179 
180     /**
181      * Applies the extractor provided by the {@link #getExtractorFactory()} to the specified resource.
182      *
183      * @param resource
184      *            resource name.
185      * 
186      * @throws org.apache.any23.extractor.ExtractionException
187      *             if there is an exception during extraction
188      * @throws IOException
189      *             if there is an error processing the input data
190      */
191     // TODO: MimeType detector to null forces the execution of all extractors,
192     // but extraction
193     // tests should be based on mimetype detection.
194     protected void extract(String resource) throws ExtractionException, IOException {
195         SingleDocumentExtraction ex = new SingleDocumentExtraction(
196                 new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI.stringValue()),
197                 getExtractorFactory(), new RepositoryWriter(conn));
198         ex.setMIMETypeDetector(null);
199         report = ex.run();
200     }
201 
202     /**
203      * Performs data extraction over the content of a resource and assert that the extraction was fine.
204      *
205      * @param resource
206      *            resource name.
207      * @param assertNoIssues
208      *            if <code>true</code>invokes {@link #assertNoIssues()} after the extraction.
209      */
210     protected void assertExtract(String resource, boolean assertNoIssues) {
211         try {
212             extract(resource);
213             if (assertNoIssues)
214                 assertNoIssues();
215         } catch (ExtractionException ex) {
216             throw new RuntimeException(ex);
217         } catch (IOException ex) {
218             throw new RuntimeException(ex);
219         }
220     }
221 
222     /**
223      * Performs data extraction over the content of a resource and assert that the extraction was fine and raised no
224      * issues.
225      *
226      * @param resource
227      *            input resource to test extraction on.
228      */
229     protected void assertExtract(String resource) {
230         assertExtract(resource, true);
231     }
232 
233     /**
234      * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
235      *
236      * @param p
237      *            predicate
238      * @param o
239      *            object.
240      * 
241      * @throws RepositoryException
242      *             if an error is encountered whilst loading content from a storage connection
243      * 
244      */
245     protected void assertContains(IRI p, Resource o) throws RepositoryException {
246         assertContains(null, p, o);
247     }
248 
249     /**
250      * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
251      *
252      * @param p
253      *            predicate
254      * @param o
255      *            object.
256      * 
257      * @throws RepositoryException
258      *             if an error is encountered whilst loading content from a storage connection
259      * 
260      */
261     protected void assertContains(IRI p, String o) throws RepositoryException {
262         assertContains(null, p, RDFUtils.literal(o));
263     }
264 
265     /**
266      * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
267      *
268      * @param p
269      *            predicate
270      * @param o
271      *            object.
272      * 
273      * @throws RepositoryException
274      *             if an error is encountered whilst loading content from a storage connection
275      * 
276      */
277     protected void assertNotContains(IRI p, Resource o) throws RepositoryException {
278         assertNotContains(null, p, o);
279     }
280 
281     /**
282      * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
283      *
284      * @param s
285      *            subject.
286      * @param p
287      *            predicate.
288      * @param o
289      *            object.
290      * 
291      * @throws RepositoryException
292      *             if an error is encountered whilst loading content from a storage connection
293      * 
294      */
295     protected void assertContains(Resource s, IRI p, Value o) throws RepositoryException {
296         Assert.assertTrue(
297                 getFailedExtractionMessage() + String.format(Locale.ROOT, "Cannot find triple (%s %s %s)", s, p, o),
298                 conn.hasStatement(s, p, o, false));
299     }
300 
301     /**
302      * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
303      *
304      * @param s
305      *            subject.
306      * @param p
307      *            predicate.
308      * @param o
309      *            object.
310      * 
311      * @throws RepositoryException
312      *             if an error is encountered whilst loading content from a storage connection
313      * 
314      */
315     protected void assertNotContains(Resource s, IRI p, String o) throws RepositoryException {
316         Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, RDFUtils.literal(o), false));
317     }
318 
319     /**
320      * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
321      *
322      * @param s
323      *            subject.
324      * @param p
325      *            predicate.
326      * @param o
327      *            object.
328      * 
329      * @throws RepositoryException
330      *             if an error is encountered whilst loading content from a storage connection
331      * 
332      */
333     protected void assertNotContains(Resource s, IRI p, Resource o) throws RepositoryException {
334         Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false));
335     }
336 
337     /**
338      * Asserts that the model contains at least a statement.
339      *
340      * @throws RepositoryException
341      *             if an error is encountered whilst loading content from a storage connection
342      * 
343      */
344     protected void assertModelNotEmpty() throws RepositoryException {
345         Assert.assertFalse("The model is expected to not be empty." + getFailedExtractionMessage(), conn.isEmpty());
346     }
347 
348     /**
349      * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
350      *
351      * @param s
352      *            subject.
353      * @param p
354      *            predicate.
355      * @param o
356      *            object.
357      * 
358      * @throws RepositoryException
359      *             if an error is encountered whilst loading content from a storage connection
360      * 
361      */
362     protected void assertNotContains(Resource s, IRI p, Literal o) throws RepositoryException {
363         Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false));
364     }
365 
366     /**
367      * Asserts that the model is expected to contains no statements.
368      *
369      * @throws RepositoryException
370      *             if an error is encountered whilst loading content from a storage connection
371      * 
372      */
373     protected void assertModelEmpty() throws RepositoryException {
374         Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
375     }
376 
377     /**
378      * Asserts that the extraction generated no issues.
379      */
380     protected void assertNoIssues() {
381         for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report.getExtractorToIssues().entrySet()) {
382             if (entry.getValue().size() > 0) {
383                 log.debug("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue());
384             }
385             for (Issue nextIssue : entry.getValue()) {
386                 if (nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
387                     Assert.fail("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue());
388                 }
389             }
390         }
391     }
392 
393     /**
394      * Asserts that an issue has been produced by the processed {@link org.apache.any23.extractor.Extractor}.
395      *
396      * @param level
397      *            expected issue level
398      * @param issueRegex
399      *            regex matching the expected human readable issue message.
400      */
401     protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
402         final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory().getExtractorName());
403         boolean found = false;
404         for (IssueReport.Issue issue : issues) {
405             if (issue.getLevel() == level && issue.getMessage().matches(issueRegex)) {
406                 found = true;
407                 break;
408             }
409         }
410         Assert.assertTrue(String.format(Locale.ROOT, "Cannot find issue with level %s matching expression '%s'", level,
411                 issueRegex), found);
412     }
413 
414     /**
415      * Verifies that the current model contains all the given statements.
416      *
417      * @param statements
418      *            list of statements to be verified.
419      * 
420      * @throws RepositoryException
421      *             if an error is encountered whilst loading content from a storage connection
422      * 
423      */
424     public void assertContainsModel(Statement[] statements) throws RepositoryException {
425         for (Statement statement : statements) {
426             assertContains(statement);
427         }
428     }
429 
430     /**
431      * Verifies that the current model contains all the statements declared in the specified <code>modelFile</code>.
432      *
433      * @param modelResource
434      *            the resource containing the model.
435      * 
436      * @throws RDFHandlerException
437      *             if there is an error within the {@link org.eclipse.rdf4j.rio.RDFHandler}
438      * @throws IOException
439      *             if there is an error processing the input data
440      * @throws RDFParseException
441      *             if there is an exception parsing the RDF stream
442      * @throws RepositoryException
443      *             if an error is encountered whilst loading content from a storage connection
444      * 
445      */
446     public void assertContainsModel(String modelResource)
447             throws RDFHandlerException, IOException, RDFParseException, RepositoryException {
448         getConnection().remove(null, SINDICE.getInstance().date, (Value) null, (Resource) null);
449         getConnection().remove(null, SINDICE.getInstance().size, (Value) null, (Resource) null);
450         assertContainsModel(RDFUtils.parseRDF(modelResource));
451     }
452 
453     /**
454      * Asserts that the given pattern <code>(s p o)</code> satisfies the expected number of statements.
455      *
456      * @param s
457      *            subject.
458      * @param p
459      *            predicate.
460      * @param o
461      *            object.
462      * @param expected
463      *            expected matches.
464      * 
465      * @throws RepositoryException
466      *             if an error is encountered whilst loading content from a storage connection
467      * 
468      */
469     protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
470             throws RDFHandlerException, RepositoryException {
471         int statementsSize = getStatementsSize(s, p, o);
472         if (statementsSize != expected) {
473             final ByteArrayOutputStream baos = new ByteArrayOutputStream();
474             PrintStream ps = new PrintStream(baos, true, StandardCharsets.UTF_8);
475             getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, ps));
476         }
477 
478         Assert.assertEquals("Unexpected number of matching statements.", expected, statementsSize);
479     }
480 
481     /**
482      * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements.
483      *
484      * @param p
485      *            predicate.
486      * @param o
487      *            object.
488      * @param expected
489      *            expected matches.
490      * 
491      * @throws RepositoryException
492      *             if an error is encountered whilst loading content from a storage connection
493      * 
494      */
495     protected void assertStatementsSize(IRI p, Value o, int expected) throws RDFHandlerException, RepositoryException {
496         assertStatementsSize(null, p, o, expected);
497     }
498 
499     /**
500      * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements.
501      *
502      * @param p
503      *            predicate.
504      * @param o
505      *            object.
506      * @param expected
507      *            expected matches.
508      * 
509      * @throws RepositoryException
510      *             if an error is encountered whilst loading content from a storage connection
511      * 
512      */
513     protected void assertStatementsSize(IRI p, String o, int expected) throws RDFHandlerException, RepositoryException {
514         assertStatementsSize(p, o == null ? null : RDFUtils.literal(o), expected);
515     }
516 
517     /**
518      * Asserts that the given pattern <code>(s p _)</code> is not present.
519      *
520      * @param s
521      *            subject.
522      * @param p
523      *            predicate.
524      * 
525      * @throws RepositoryException
526      *             if an error is encountered whilst loading content from a storage connection
527      * 
528      */
529     protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
530         RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true);
531         try {
532             Assert.assertFalse("Expected no statements.", statements.hasNext());
533         } finally {
534             statements.close();
535         }
536     }
537 
538     /**
539      * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it is expected to exists and be just one.
540      *
541      * @param p
542      *            predicate.
543      * @param o
544      *            object.
545      * 
546      * @return the matching blank subject.
547      * 
548      * @throws RepositoryException
549      *             if an error is encountered whilst loading content from a storage connection
550      * 
551      */
552     protected Resource findExactlyOneBlankSubject(IRI p, Value o) throws RepositoryException {
553         RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
554         try {
555             Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
556             Statement stmt = it.next();
557             Resource result = stmt.getSubject();
558             Assert.assertTrue(getFailedExtractionMessage(), result instanceof BNode);
559             Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
560             return result;
561         } finally {
562             it.close();
563         }
564     }
565 
566     /**
567      * Returns the object matching the pattern <code>(s p o)</code>, it is expected to exists and be just one.
568      *
569      * @param s
570      *            subject.
571      * @param p
572      *            predicate.
573      * 
574      * @return the matching object.
575      * 
576      * @throws RepositoryException
577      *             if an error is encountered whilst loading content from a storage connection
578      * 
579      */
580     protected Value findExactlyOneObject(Resource s, IRI p) throws RepositoryException {
581         RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
582         try {
583             Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
584             return it.next().getObject();
585         } finally {
586             it.close();
587         }
588     }
589 
590     /**
591      * Returns all the subjects matching the pattern <code>(s? p o)</code>.
592      *
593      * @param p
594      *            predicate.
595      * @param o
596      *            object.
597      * 
598      * @return list of matching subjects.
599      * 
600      * @throws RepositoryException
601      *             if an error is encountered whilst loading content from a storage connection
602      * 
603      */
604     protected List<Resource> findSubjects(IRI p, Value o) throws RepositoryException {
605         RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
606         List<Resource> subjects = new ArrayList<Resource>();
607         try {
608             Statement statement;
609             while (it.hasNext()) {
610                 statement = it.next();
611                 subjects.add(statement.getSubject());
612             }
613         } finally {
614             it.close();
615         }
616         return subjects;
617     }
618 
619     /**
620      * Returns all the objects matching the pattern <code>(s p _)</code>.
621      *
622      * @param s
623      *            predicate.
624      * @param p
625      *            predicate.
626      * 
627      * @return list of matching objects.
628      * 
629      * @throws RepositoryException
630      *             if an error is encountered whilst loading content from a storage connection
631      * 
632      */
633     protected List<Value> findObjects(Resource s, IRI p) throws RepositoryException {
634         RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
635         List<Value> objects = new ArrayList<Value>();
636         try {
637             Statement statement;
638             while (it.hasNext()) {
639                 statement = it.next();
640                 objects.add(statement.getObject());
641             }
642         } finally {
643             it.close();
644         }
645         return objects;
646     }
647 
648     /**
649      * Finds the object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
650      *
651      * @param s
652      *            subject.
653      * @param p
654      *            predicate
655      * 
656      * @return matching object.
657      * 
658      * @throws org.eclipse.rdf4j.repository.RepositoryException
659      *             if an error is encountered whilst loading content from a storage connection
660      */
661     protected Value findObject(Resource s, IRI p) throws RepositoryException {
662         RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true);
663         try {
664             Assert.assertTrue("Expected at least a statement.", statements.hasNext());
665             return (statements.next().getObject());
666         } finally {
667             statements.close();
668         }
669     }
670 
671     /**
672      * Finds the resource object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
673      *
674      * @param s
675      *            subject.
676      * @param p
677      *            predicate.
678      * 
679      * @return matching object.
680      * 
681      * @throws RepositoryException
682      *             if an error is encountered whilst loading content from a storage connection
683      * 
684      */
685     protected Resource findObjectAsResource(Resource s, IRI p) throws RepositoryException {
686         final Value v = findObject(s, p);
687         try {
688             return (Resource) v;
689         } catch (ClassCastException cce) {
690             Assert.fail("Expected resource object, found: " + v.getClass().getSimpleName());
691             throw new IllegalStateException();
692         }
693     }
694 
695     /**
696      * Finds the literal object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
697      *
698      * @param s
699      *            subject.
700      * @param p
701      *            predicate.
702      * 
703      * @return matching object.
704      * 
705      * @throws RepositoryException
706      *             if an error is encountered whilst loading content from a storage connection
707      * 
708      */
709     protected String findObjectAsLiteral(Resource s, IRI p) throws RepositoryException {
710         return findObject(s, p).stringValue();
711     }
712 
713     /**
714      * Dumps the extracted model in <i>Turtle</i> format.
715      *
716      * @return a string containing the model in Turtle.
717      * 
718      * @throws RepositoryException
719      *             if an error is encountered whilst loading content from a storage connection
720      * 
721      */
722     protected String dumpModelToTurtle() throws RepositoryException {
723         StringWriter w = new StringWriter();
724         try {
725             conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
726             return w.toString();
727         } catch (RDFHandlerException ex) {
728             throw new RuntimeException(ex);
729         }
730     }
731 
732     /**
733      * Dumps the extracted model in <i>NQuads</i> format.
734      *
735      * @return a string containing the model in NQuads.
736      * 
737      * @throws RepositoryException
738      *             if an error is encountered whilst loading content from a storage connection
739      * 
740      */
741     protected String dumpModelToNQuads() throws RepositoryException {
742         StringWriter w = new StringWriter();
743         try {
744             conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
745             return w.toString();
746         } catch (RDFHandlerException ex) {
747             throw new RuntimeException(ex);
748         }
749     }
750 
751     /**
752      * Dumps the extracted model in <i>RDFXML</i> format.
753      *
754      * @return a string containing the model in RDFXML.
755      * 
756      * @throws RepositoryException
757      *             if an error is encountered whilst loading content from a storage connection
758      * 
759      */
760     protected String dumpModelToRDFXML() throws RepositoryException {
761         StringWriter w = new StringWriter();
762         try {
763             conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
764             return w.toString();
765         } catch (RDFHandlerException ex) {
766             throw new RuntimeException(ex);
767         }
768     }
769 
770     /**
771      * Dumps the list of statements contained in the extracted model.
772      *
773      * @return list of extracted statements.
774      * 
775      * @throws RepositoryException
776      *             if an error is encountered whilst loading content from a storage connection
777      * 
778      */
779     protected List<Statement> dumpAsListOfStatements() throws RepositoryException {
780         return Iterations.asList(conn.getStatements(null, null, null, false));
781     }
782 
783     /**
784      * @return string containing human readable statements.
785      * 
786      * @throws RepositoryException
787      *             if an error is encountered whilst loading content from a storage connection
788      * 
789      */
790     protected String dumpHumanReadableTriples() throws RepositoryException {
791         StringBuilder sb = new StringBuilder();
792         RepositoryResult<Statement> result = conn.getStatements(null, null, null, false);
793         while (result.hasNext()) {
794             Statement statement = result.next();
795             sb.append(String.format(Locale.ROOT, "%s %s %s %s\n", statement.getSubject(), statement.getPredicate(),
796                     statement.getObject(), statement.getContext()));
797 
798         }
799         return sb.toString();
800     }
801 
802     /**
803      * Checks that a statement is contained in the extracted model. If the statement declares bnodes, they are replaced
804      * with <code>_</code> patterns.
805      *
806      * @param statement
807      *            an RDF {@link org.eclipse.rdf4j.model.Statement} implementation
808      * 
809      * @throws RepositoryException
810      *             if an error is encountered whilst loading content from a storage connection
811      * 
812      */
813     // TODO: bnode check is too weak, introduce graph omomorphism check.
814     protected void assertContains(Statement statement) throws RepositoryException {
815         Assert.assertTrue("Cannot find statement " + statement + " in model.",
816                 conn.hasStatement(statement.getSubject() instanceof BNode ? null : statement.getSubject(),
817                         statement.getPredicate(), statement.getObject() instanceof BNode ? null : statement.getObject(),
818                         false));
819     }
820 
821     /**
822      * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a literal.
823      *
824      * @param s
825      *            subject.
826      * @param p
827      *            predicate.
828      * @param l
829      *            literal content.
830      * 
831      * @throws RepositoryException
832      *             if an error is encountered whilst loading content from a storage connection
833      * 
834      */
835     protected void assertContains(Resource s, IRI p, String l) throws RepositoryException {
836         assertContains(s, p, RDFUtils.literal(l));
837     }
838 
839     /**
840      * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a language literal.
841      *
842      * @param s
843      *            subject.
844      * @param p
845      *            predicate.
846      * @param l
847      *            literal content.
848      * @param lang
849      *            literal language.
850      * 
851      * @throws RepositoryException
852      *             if an error is encountered whilst loading content from a storage connection
853      * 
854      */
855     protected void assertContains(Resource s, IRI p, String l, String lang) throws RepositoryException {
856         assertContains(s, p, RDFUtils.literal(l, lang));
857     }
858 
859     /**
860      * Returns all statements matching the pattern <code>(s p o)</code>.
861      *
862      * @param s
863      *            subject.
864      * @param p
865      *            predicate.
866      * @param o
867      *            object.
868      * 
869      * @return list of statements.
870      * 
871      * @throws RepositoryException
872      *             if an error is encountered whilst loading content from a storage connection
873      * 
874      */
875     protected RepositoryResult<Statement> getStatements(Resource s, IRI p, Value o) throws RepositoryException {
876         return conn.getStatements(s, p, o, false);
877     }
878 
879     /**
880      * Counts all statements matching the pattern <code>(s p o)</code>.
881      *
882      * @param s
883      *            subject.
884      * @param p
885      *            predicate.
886      * @param o
887      *            object.
888      * 
889      * @return number of matches.
890      * 
891      * @throws RepositoryException
892      *             if an error is encountered whilst loading content from a storage connection
893      * 
894      */
895     protected int getStatementsSize(Resource s, IRI p, Value o) throws RepositoryException {
896         RepositoryResult<Statement> result = getStatements(s, p, o);
897         int count = 0;
898         try {
899             while (result.hasNext()) {
900                 result.next();
901                 count++;
902             }
903         } finally {
904             result.close();
905         }
906         return count;
907     }
908 
909     private String getFailedExtractionMessage() throws RepositoryException {
910         return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
911     }
912 
913 }