View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *  http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.any23.extractor.csv;
19  
20  import org.apache.any23.extractor.ExtractorFactory;
21  import org.apache.any23.extractor.html.AbstractExtractorTestCase;
22  import org.apache.any23.vocab.CSV;
23  import org.junit.Test;
24  import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
25  import org.eclipse.rdf4j.model.vocabulary.RDF;
26  import org.eclipse.rdf4j.model.vocabulary.XSD;
27  import org.slf4j.Logger;
28  import org.slf4j.LoggerFactory;
29  
30  /**
31   * Reference test case for {@link CSVExtractor}.
32   *
33   * @author Davide Palmisano ( dpalmisano@gmail.com )
34   */
35  public class CSVExtractorTest extends AbstractExtractorTestCase {
36  
37      private static final Logger logger = LoggerFactory.getLogger(CSVExtractorTest.class);
38  
39      @Override
40      protected ExtractorFactory<?> getExtractorFactory() {
41          return new CSVExtractorFactory();
42      }
43  
44      @Test
45      public void testExtractionCommaSeparated() throws Exception {
46          CSV csv = CSV.getInstance();
47          assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
48          logger.debug(dumpModelToRDFXML());
49  
50          assertModelNotEmpty();
51          assertStatementsSize(null, null, null, 28);
52          assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
53          assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", XSD.INTEGER));
54          assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
55      }
56  
57      @Test
58      public void testExtractionSemicolonSeparated() throws Exception {
59          CSV csv = CSV.getInstance();
60          assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
61          logger.debug(dumpModelToRDFXML());
62  
63          assertModelNotEmpty();
64          assertStatementsSize(null, null, null, 28);
65          assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
66          assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", XSD.INTEGER));
67          assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
68      }
69  
70      @Test
71      public void testExtractionTabSeparated() throws Exception {
72          CSV csv = CSV.getInstance();
73          assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
74          logger.debug(dumpModelToRDFXML());
75  
76          assertModelNotEmpty();
77          assertStatementsSize(null, null, null, 28);
78          assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
79          assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", XSD.INTEGER));
80          assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
81      }
82  
83      @Test
84      public void testTypeManagement() throws Exception {
85          CSV csv = CSV.getInstance();
86          assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
87          logger.debug(dumpModelToRDFXML());
88  
89          assertModelNotEmpty();
90          assertStatementsSize(null, null, null, 21);
91          assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
92          assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2", XSD.INTEGER));
93          assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
94          assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XSD.FLOAT));
95          assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XSD.FLOAT));
96          assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XSD.INTEGER));
97      }
98  
99      @Test
100     public void testExtractionEmptyValue() throws Exception {
101         CSV csv = CSV.getInstance();
102         assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
103         logger.debug(dumpModelToRDFXML());
104 
105         assertModelNotEmpty();
106         assertStatementsSize(null, null, null, 25);
107         assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
108         assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", XSD.INTEGER));
109         assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", XSD.INTEGER));
110         assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XSD.STRING));
111         assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Giovanni", XSD.STRING));
112     }
113 
114 }