1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import java.util.List;
21
22 import org.apache.any23.extractor.ExtractionException;
23 import org.apache.any23.extractor.ExtractionResult;
24 import org.apache.any23.extractor.ExtractorDescription;
25 import org.apache.any23.extractor.TagSoupExtractionResult;
26 import org.apache.any23.extractor.html.HTMLDocument.TextField;
27 import org.apache.any23.vocab.Review;
28 import org.apache.any23.vocab.ReviewAggregate;
29 import org.apache.any23.vocab.VCard;
30 import org.eclipse.rdf4j.model.BNode;
31 import org.eclipse.rdf4j.model.Resource;
32 import org.eclipse.rdf4j.model.vocabulary.RDF;
33 import org.w3c.dom.Node;
34
35
36
37
38
39
40 public class HReviewAggregateExtractor extends EntityBasedMicroformatExtractor {
41 private static final Review vREVIEW = Review.getInstance();
42 private static final ReviewAggregate vREVIEWAGG = ReviewAggregate.getInstance();
43 private static final VCard vVCARD = VCard.getInstance();
44
45 @Override
46 public ExtractorDescription getDescription() {
47 return HReviewAggregateExtractorFactory.getDescriptionInstance();
48 }
49
50 @Override
51 protected String getBaseClassName() {
52 return "hreview-aggregate";
53 }
54
55 @Override
56 protected void resetExtractor() {
57
58 }
59
60 @Override
61 protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
62 BNode rev = getBlankNodeFor(node);
63 out.writeTriple(rev, RDF.TYPE, vREVIEWAGG.ReviewAggregate);
64 final HTMLDocument fragment = new HTMLDocument(node);
65 addRating(fragment, rev);
66 addWorst(fragment, rev);
67 addBest(fragment, rev);
68 addAverage(fragment, rev);
69 addSummary(fragment, rev);
70 addType(fragment, rev);
71 addItem(fragment, rev);
72 addCount(fragment, rev);
73 addVotes(fragment, rev);
74
75 final TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
76 tser.addResourceRoot(DomUtils.getXPathListForNode(node), rev, this.getClass());
77
78 return true;
79 }
80
81 private void addType(HTMLDocument doc, Resource rev) {
82 TextField value = doc.getSingularTextField("type");
83 conditionallyAddStringProperty(value.source(), rev, vREVIEW.type, value.value());
84 }
85
86 private void addItem(HTMLDocument root, BNode rev) throws ExtractionException {
87 List<Node> nodes = root.findAllByClassName("item");
88 for (Node node : nodes) {
89 Resource item = findDummy(new HTMLDocument(node));
90 addBNodeProperty(node, item, vREVIEW.hasReview, rev);
91 }
92 }
93
94 private Resource findDummy(HTMLDocument item) throws ExtractionException {
95 Resource blank = getBlankNodeFor(item.getDocument());
96 TextField val = item.getSingularTextField("fn");
97 conditionallyAddStringProperty(val.source(), blank, vVCARD.fn, val.value());
98 final TextField url = item.getSingularUrlField("url");
99 conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument().resolveIRI(url.value()));
100 TextField pics[] = item.getPluralUrlField("photo");
101 for (TextField pic : pics) {
102 addIRIProperty(blank, vVCARD.photo, getHTMLDocument().resolveIRI(pic.value()));
103 }
104 return blank;
105 }
106
107 private void addRating(HTMLDocument doc, Resource rev) {
108 HTMLDocument.TextField value = doc.getSingularTextField("rating");
109 conditionallyAddStringProperty(value.source(), rev, vREVIEW.rating, value.value());
110 }
111
112 private void addWorst(HTMLDocument doc, Resource rev) {
113 HTMLDocument.TextField value = doc.getSingularTextField("worst");
114 conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.worst, value.value());
115 }
116
117 private void addBest(HTMLDocument doc, Resource rev) {
118 HTMLDocument.TextField value = doc.getSingularTextField("best");
119 conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.best, value.value());
120 }
121
122 private void addAverage(HTMLDocument doc, Resource rev) {
123 HTMLDocument.TextField value = doc.getSingularTextField("average");
124 conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.average, value.value());
125 }
126
127 private void addCount(HTMLDocument doc, Resource rev) {
128 HTMLDocument.TextField value = doc.getSingularTextField("count");
129 conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.count, value.value());
130 }
131
132 private void addVotes(HTMLDocument doc, Resource rev) {
133 HTMLDocument.TextField value = doc.getSingularTextField("votes");
134 conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.votes, value.value());
135 }
136
137 private void addSummary(HTMLDocument doc, Resource rev) {
138 TextField value = doc.getSingularTextField("summary");
139 conditionallyAddStringProperty(value.source(), rev, vREVIEW.title, value.value());
140 }
141 }