1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractionException;
21 import org.apache.any23.extractor.ExtractionResult;
22 import org.apache.any23.extractor.ExtractorDescription;
23 import org.apache.any23.vocab.HRecipe;
24 import org.eclipse.rdf4j.model.BNode;
25 import org.eclipse.rdf4j.model.IRI;
26 import org.eclipse.rdf4j.model.vocabulary.RDF;
27 import org.w3c.dom.Node;
28
29
30
31
32
33
34 public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
35
36 private static final HRecipe vHRECIPE = HRecipe.getInstance();
37
38 @Override
39 public ExtractorDescription getDescription() {
40 return HRecipeExtractorFactory.getDescriptionInstance();
41 }
42
43 @Override
44 protected String getBaseClassName() {
45 return "hrecipe";
46 }
47
48 @Override
49 protected void resetExtractor() {
50
51 }
52
53 @Override
54 protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
55 final BNode recipe = getBlankNodeFor(node);
56 conditionallyAddResourceProperty(recipe, RDF.TYPE, vHRECIPE.Recipe);
57 final HTMLDocumentTMLDocument.html#HTMLDocument">HTMLDocument fragment = new HTMLDocument(node);
58 addFN(fragment, recipe);
59 addIngredients(fragment, recipe);
60 addYield(fragment, recipe);
61 addInstructions(fragment, recipe);
62 addDurations(fragment, recipe);
63 addPhoto(fragment, recipe);
64 addSummary(fragment, recipe);
65 addAuthors(fragment, recipe);
66 addPublished(fragment, recipe);
67 addNutritions(fragment, recipe);
68 addTags(fragment, recipe);
69 return true;
70 }
71
72
73
74
75
76
77
78
79
80 private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, IRI property) {
81 HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
82 conditionallyAddStringProperty(title.source(), recipe, property, title.value());
83 }
84
85
86
87
88
89
90
91 private void addFN(HTMLDocument fragment, BNode recipe) {
92 mapFieldWithProperty(fragment, recipe, "fn", vHRECIPE.fn);
93 }
94
95
96
97
98
99
100
101
102
103 private BNode addIngredient(HTMLDocument fragment, HTMLDocument.TextField ingredient) {
104 final BNode ingredientBnode = getBlankNodeFor(ingredient.source());
105 addIRIProperty(ingredientBnode, RDF.TYPE, vHRECIPE.Ingredient);
106 conditionallyAddStringProperty(ingredient.source(), ingredientBnode, vHRECIPE.ingredientName,
107 HTMLDocument.readNodeContent(ingredient.source(), true));
108 mapFieldWithProperty(fragment, ingredientBnode, "value", vHRECIPE.ingredientQuantity);
109 mapFieldWithProperty(fragment, ingredientBnode, "type", vHRECIPE.ingredientQuantityType);
110 return ingredientBnode;
111 }
112
113
114
115
116
117
118
119
120
121 private void addIngredients(HTMLDocument fragment, BNode recipe) {
122 final HTMLDocument.TextField[] ingredients = fragment.getPluralTextField("ingredient");
123 for (HTMLDocument.TextField ingredient : ingredients) {
124 addBNodeProperty(recipe, vHRECIPE.ingredient, addIngredient(fragment, ingredient));
125 }
126 }
127
128
129
130
131
132
133
134 private void addInstructions(HTMLDocument fragment, BNode recipe) {
135 mapFieldWithProperty(fragment, recipe, "instructions", vHRECIPE.instructions);
136
137 }
138
139
140
141
142
143
144
145 private void addYield(HTMLDocument fragment, BNode recipe) {
146 mapFieldWithProperty(fragment, recipe, "yield", vHRECIPE.yield);
147 }
148
149
150
151
152
153
154
155
156
157
158 private BNode addDuration(HTMLDocument fragment, HTMLDocument.TextField duration) {
159 final BNode durationBnode = getBlankNodeFor(duration.source());
160 addIRIProperty(durationBnode, RDF.TYPE, vHRECIPE.Duration);
161 conditionallyAddStringProperty(duration.source(), durationBnode, vHRECIPE.durationTime, duration.value());
162 mapFieldWithProperty(fragment, durationBnode, "value-title", vHRECIPE.durationTitle);
163 return durationBnode;
164 }
165
166
167
168
169
170
171
172 private void addDurations(HTMLDocument fragment, BNode recipe) {
173 final HTMLDocument.TextField[] durations = fragment.getPluralTextField("duration");
174 for (HTMLDocument.TextField duration : durations) {
175 addBNodeProperty(recipe, vHRECIPE.duration, addDuration(fragment, duration));
176 }
177 }
178
179
180
181
182
183
184
185
186
187 private void addPhoto(HTMLDocument fragment, BNode recipe) throws ExtractionException {
188 final HTMLDocument.TextField[] photos = fragment.getPluralUrlField("photo");
189 for (HTMLDocument.TextField photo : photos) {
190 addIRIProperty(recipe, vHRECIPE.photo, fragment.resolveIRI(photo.value()));
191 }
192 }
193
194
195
196
197
198
199
200 private void addSummary(HTMLDocument fragment, BNode recipe) {
201 mapFieldWithProperty(fragment, recipe, "summary", vHRECIPE.summary);
202 }
203
204
205
206
207
208
209
210 private void addAuthors(HTMLDocument fragment, BNode recipe) {
211 final HTMLDocument.TextField[] authors = fragment.getPluralTextField("author");
212 for (HTMLDocument.TextField author : authors) {
213 conditionallyAddStringProperty(author.source(), recipe, vHRECIPE.author, author.value());
214 }
215 }
216
217
218
219
220
221
222
223
224 private void addPublished(HTMLDocument fragment, BNode recipe) {
225 mapFieldWithProperty(fragment, recipe, "published", vHRECIPE.published);
226 }
227
228
229
230
231
232
233
234
235
236 private BNode addNutrition(HTMLDocument fragment, HTMLDocument.TextField nutrition) {
237 final BNode nutritionBnode = getBlankNodeFor(nutrition.source());
238 addIRIProperty(nutritionBnode, RDF.TYPE, vHRECIPE.Nutrition);
239 conditionallyAddStringProperty(nutrition.source(), nutritionBnode, vHRECIPE.nutritionValue, nutrition.value());
240 mapFieldWithProperty(fragment, nutritionBnode, "value", vHRECIPE.nutritionValue);
241 mapFieldWithProperty(fragment, nutritionBnode, "type", vHRECIPE.nutritionValueType);
242 return nutritionBnode;
243 }
244
245
246
247
248
249
250
251 private void addNutritions(HTMLDocument fragment, BNode recipe) {
252 HTMLDocument.TextField[] nutritions = fragment.getPluralTextField("nutrition");
253 for (HTMLDocument.TextField nutrition : nutritions) {
254 addBNodeProperty(recipe, vHRECIPE.nutrition, addNutrition(fragment, nutrition));
255 }
256 }
257
258
259
260
261
262
263
264 private void addTags(HTMLDocument fragment, BNode recipe) {
265 HTMLDocument.TextField[] tags = fragment.extractRelTagNodes();
266 for (HTMLDocument.TextField tag : tags) {
267 conditionallyAddStringProperty(tag.source(), recipe, vHRECIPE.tag, tag.value());
268 }
269 }
270
271 }