1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractionResult;
21 import org.apache.any23.extractor.ExtractorDescription;
22 import org.apache.any23.extractor.TagSoupExtractionResult;
23 import org.apache.any23.vocab.VCard;
24 import org.eclipse.rdf4j.model.BNode;
25 import org.eclipse.rdf4j.model.vocabulary.RDF;
26 import org.w3c.dom.Node;
27
28
29
30
31
32
33 public class GeoExtractor extends EntityBasedMicroformatExtractor {
34
35 private static final VCard vVCARD = VCard.getInstance();
36
37 @Override
38 public ExtractorDescription getDescription() {
39 return GeoExtractorFactory.getDescriptionInstance();
40 }
41
42 protected String getBaseClassName() {
43 return "geo";
44 }
45
46 @Override
47 protected void resetExtractor() {
48
49 }
50
51 protected boolean extractEntity(Node node, ExtractionResult out) {
52 if (null == node)
53 return false;
54
55 final HTMLDocument document = new HTMLDocument(node);
56 HTMLDocument.TextField latNode = document.getSingularTextField("latitude");
57 HTMLDocument.TextField lonNode = document.getSingularTextField("longitude");
58 String lat = latNode.value();
59 String lon = lonNode.value();
60 if ("".equals(lat) || "".equals(lon)) {
61 String[] both = document.getSingularUrlField("geo").value().split(";");
62 if (both.length != 2)
63 return false;
64 lat = both[0];
65 lon = both[1];
66 }
67 BNode geo = getBlankNodeFor(node);
68 out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
69 conditionallyAddStringProperty(latNode.source(), geo, vVCARD.latitude, lat);
70 conditionallyAddStringProperty(lonNode.source(), geo, vVCARD.longitude, lon);
71
72 final TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
73 tser.addResourceRoot(document.getPathToLocalRoot(), geo, this.getClass());
74
75 return true;
76 }
77
78 }