1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html.microformats2;
19
20 import org.apache.any23.extractor.ExtractionResult;
21 import org.apache.any23.extractor.ExtractorDescription;
22 import org.apache.any23.extractor.TagSoupExtractionResult;
23 import org.apache.any23.vocab.VCard;
24 import org.eclipse.rdf4j.model.BNode;
25 import org.eclipse.rdf4j.model.vocabulary.RDF;
26 import org.w3c.dom.Node;
27 import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
28 import org.apache.any23.extractor.html.HTMLDocument;
29
30 import java.util.ArrayList;
31
32
33
34
35
36
37 public class HGeoExtractor extends EntityBasedMicroformatExtractor {
38
39 private static final VCard vVCARD = VCard.getInstance();
40
41 private static final String[] geoFields = { "latitude", "longitude", "altitude" };
42
43 @Override
44 public ExtractorDescription getDescription() {
45 return HGeoExtractorFactory.getDescriptionInstance();
46 }
47
48 protected String getBaseClassName() {
49 return Microformats2Prefixes.CLASS_PREFIX + "geo";
50 }
51
52 @Override
53 protected void resetExtractor() {
54
55 }
56
57 protected boolean extractEntity(Node node, ExtractionResult out) {
58 if (null == node)
59 return false;
60 final HTMLDocumentl/HTMLDocument.html#HTMLDocument">HTMLDocument document = new HTMLDocument(node);
61 BNode geo = getBlankNodeFor(node);
62 out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
63 final String extractorName = getDescription().getExtractorName();
64 ArrayList<HTMLDocument.TextField> geoNodes = new ArrayList<HTMLDocument.TextField>();
65 for (String field : geoFields) {
66 geoNodes.add(document.getSingularTextField(Microformats2Prefixes.PROPERTY_PREFIX + field));
67 }
68 if (geoNodes.get(0).source() == null) {
69 String[] composed = document.getSingularUrlField(Microformats2Prefixes.CLASS_PREFIX + "geo").value()
70 .split(";");
71 for (int counter = 0; counter < composed.length; counter++) {
72 conditionallyAddStringProperty(
73 document.getSingularUrlField(Microformats2Prefixes.CLASS_PREFIX + "geo").source(), geo,
74 vVCARD.getProperty(geoFields[counter]), composed[counter]);
75 }
76 } else {
77 for (int counter = 0; counter < geoNodes.size(); counter++) {
78 conditionallyAddStringProperty(geoNodes.get(counter).source(), geo,
79 vVCARD.getProperty(geoFields[counter]), geoNodes.get(counter).value());
80 }
81 }
82 final TagSoupExtractionResult../org/apache/any23/extractor/TagSoupExtractionResult.html#TagSoupExtractionResult">TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
83 tser.addResourceRoot(document.getPathToLocalRoot(), geo, this.getClass());
84 return true;
85 }
86
87 }