1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractionContext;
21 import org.apache.any23.extractor.ExtractionException;
22 import org.apache.any23.extractor.ExtractionParameters;
23 import org.apache.any23.extractor.ExtractionResult;
24 import org.apache.any23.extractor.ExtractorDescription;
25 import org.apache.any23.rdf.Any23ValueFactoryWrapper;
26 import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
27 import org.eclipse.rdf4j.model.BNode;
28 import org.eclipse.rdf4j.model.IRI;
29 import org.eclipse.rdf4j.model.ValueFactory;
30 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
31 import org.w3c.dom.Document;
32
33 import java.io.IOException;
34
35
36
37
38
39
40
41 public class ICBMExtractor implements TagSoupDOMExtractor {
42
43 @Override
44 public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, Document in,
45 ExtractionResult out) throws IOException, ExtractionException {
46
47
48 String props = DomUtils.find(in, "//META[@name=\"ICBM\" or @name=\"geo.position\"]/@content");
49 if ("".equals(props))
50 return;
51
52 String[] coords = props.split("[;,]");
53 float lat, lon;
54 try {
55 lat = Float.parseFloat(coords[0]);
56 lon = Float.parseFloat(coords[1]);
57 } catch (NumberFormatException nfe) {
58 return;
59 }
60
61 final ValueFactory factory = new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance(), out);
62 BNode point = factory.createBNode();
63 out.writeTriple(extractionContext.getDocumentIRI(), expand("dcterms:related"), point);
64 out.writeTriple(point, expand("rdf:type"), expand("geo:Point"));
65 out.writeTriple(point, expand("geo:lat"), factory.createLiteral(Float.toString(lat)));
66 out.writeTriple(point, expand("geo:long"), factory.createLiteral(Float.toString(lon)));
67 }
68
69 private IRI expand(String curie) {
70 return getDescription().getPrefixes().expand(curie);
71 }
72
73 @Override
74 public ExtractorDescription getDescription() {
75 return ICBMExtractorFactory.getDescriptionInstance();
76 }
77
78 }