1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.csv;
19
20 import org.apache.any23.extractor.ExtractorFactory;
21 import org.apache.any23.extractor.html.AbstractExtractorTestCase;
22 import org.apache.any23.vocab.CSV;
23 import org.junit.Test;
24 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
25 import org.eclipse.rdf4j.model.vocabulary.RDF;
26 import org.eclipse.rdf4j.model.vocabulary.XSD;
27 import org.slf4j.Logger;
28 import org.slf4j.LoggerFactory;
29
30
31
32
33
34
35 public class CSVExtractorTest extends AbstractExtractorTestCase {
36
37 private static final Logger logger = LoggerFactory.getLogger(CSVExtractorTest.class);
38
39 @Override
40 protected ExtractorFactory<?> getExtractorFactory() {
41 return new CSVExtractorFactory();
42 }
43
44 @Test
45 public void testExtractionCommaSeparated() throws Exception {
46 CSV csv = CSV.getInstance();
47 assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
48 logger.debug(dumpModelToRDFXML());
49
50 assertModelNotEmpty();
51 assertStatementsSize(null, null, null, 28);
52 assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
53 assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", XSD.INTEGER));
54 assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
55 }
56
57 @Test
58 public void testExtractionSemicolonSeparated() throws Exception {
59 CSV csv = CSV.getInstance();
60 assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
61 logger.debug(dumpModelToRDFXML());
62
63 assertModelNotEmpty();
64 assertStatementsSize(null, null, null, 28);
65 assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
66 assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", XSD.INTEGER));
67 assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
68 }
69
70 @Test
71 public void testExtractionTabSeparated() throws Exception {
72 CSV csv = CSV.getInstance();
73 assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
74 logger.debug(dumpModelToRDFXML());
75
76 assertModelNotEmpty();
77 assertStatementsSize(null, null, null, 28);
78 assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
79 assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", XSD.INTEGER));
80 assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
81 }
82
83 @Test
84 public void testTypeManagement() throws Exception {
85 CSV csv = CSV.getInstance();
86 assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
87 logger.debug(dumpModelToRDFXML());
88
89 assertModelNotEmpty();
90 assertStatementsSize(null, null, null, 21);
91 assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
92 assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2", XSD.INTEGER));
93 assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
94 assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XSD.FLOAT));
95 assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XSD.FLOAT));
96 assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XSD.INTEGER));
97 }
98
99 @Test
100 public void testExtractionEmptyValue() throws Exception {
101 CSV csv = CSV.getInstance();
102 assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
103 logger.debug(dumpModelToRDFXML());
104
105 assertModelNotEmpty();
106 assertStatementsSize(null, null, null, 25);
107 assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
108 assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", XSD.INTEGER));
109 assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
110 assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XSD.STRING));
111 assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Giovanni", XSD.STRING));
112 }
113
114 }