1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html.microformats2;
19
20 import org.apache.any23.extractor.ExtractionException;
21 import org.apache.any23.extractor.ExtractionResult;
22 import org.apache.any23.extractor.ExtractorDescription;
23 import org.apache.any23.vocab.HRecipe;
24 import org.eclipse.rdf4j.model.BNode;
25 import org.eclipse.rdf4j.model.IRI;
26 import org.eclipse.rdf4j.model.vocabulary.RDF;
27 import org.w3c.dom.Node;
28 import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
29 import org.apache.any23.extractor.html.HTMLDocument;
30
31
32
33
34
35
36 public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
37
38 private static final HRecipe vHRECIPE = HRecipe.getInstance();
39
40 private static final String[] recipeFields = { "name", "ingredient", "yield", "instructions", "duration", "photo",
41 "summary", "author", "published", "nutrition" };
42
43 @Override
44 public ExtractorDescription getDescription() {
45 return HRecipeExtractorFactory.getDescriptionInstance();
46 }
47
48 @Override
49 protected String getBaseClassName() {
50 return Microformats2Prefixes.CLASS_PREFIX + "recipe";
51 }
52
53 @Override
54 protected void resetExtractor() {
55
56 }
57
58 @Override
59 protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
60 final BNode recipe = getBlankNodeFor(node);
61 conditionallyAddResourceProperty(recipe, RDF.TYPE, vHRECIPE.Recipe);
62 final HTMLDocument fragment = new HTMLDocument(node);
63 addName(fragment, recipe);
64 addIngredients(fragment, recipe);
65 addYield(fragment, recipe);
66 addInstructions(fragment, recipe);
67 addDurations(fragment, recipe);
68 addPhoto(fragment, recipe);
69 addSummary(fragment, recipe);
70 addAuthors(fragment, recipe);
71 addPublished(fragment, recipe);
72 addNutritions(fragment, recipe);
73 return true;
74 }
75
76 private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, IRI property) {
77 HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
78 conditionallyAddStringProperty(title.source(), recipe, property, title.value());
79 }
80
81 private void addName(HTMLDocument fragment, BNode recipe) {
82 mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[0], vHRECIPE.fn);
83 }
84
85 private void addIngredients(HTMLDocument fragment, BNode recipe) {
86 final HTMLDocument.TextField[] ingredients = fragment
87 .getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[1]);
88 for (HTMLDocument.TextField ingredient : ingredients) {
89 conditionallyAddStringProperty(ingredient.source(), recipe, vHRECIPE.ingredient, ingredient.value());
90 }
91 }
92
93 private void addInstructions(HTMLDocument fragment, BNode recipe) {
94 mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX + recipeFields[2],
95 vHRECIPE.instructions);
96 }
97
98 private void addYield(HTMLDocument fragment, BNode recipe) {
99 mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[3], vHRECIPE.yield);
100 }
101
102 private void addDurations(HTMLDocument fragment, BNode recipe) {
103 final HTMLDocument.TextField[] durations = fragment
104 .getPluralTextField(Microformats2Prefixes.TIME_PROPERTY_PREFIX + recipeFields[4]);
105 for (HTMLDocument.TextField duration : durations) {
106 Node attribute = duration.source().getAttributes().getNamedItem("datetime");
107 if (attribute == null) {
108 conditionallyAddStringProperty(duration.source(), recipe, vHRECIPE.duration, duration.value());
109 } else {
110 conditionallyAddStringProperty(duration.source(), recipe, vHRECIPE.duration, attribute.getNodeValue());
111
112 }
113
114 }
115 }
116
117 private void addPhoto(HTMLDocument fragment, BNode recipe) throws ExtractionException {
118 final HTMLDocument.TextField[] photos = fragment
119 .getPluralUrlField(Microformats2Prefixes.URL_PROPERTY_PREFIX + recipeFields[5]);
120 for (HTMLDocument.TextField photo : photos) {
121 addIRIProperty(recipe, vHRECIPE.photo, fragment.resolveIRI(photo.value()));
122 }
123 }
124
125 private void addSummary(HTMLDocument fragment, BNode recipe) {
126 mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[6],
127 vHRECIPE.summary);
128 }
129
130 private void addAuthors(HTMLDocument fragment, BNode recipe) {
131 final HTMLDocument.TextField[] authors = fragment
132 .getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[7]);
133 for (HTMLDocument.TextField author : authors) {
134 conditionallyAddStringProperty(author.source(), recipe, vHRECIPE.author, author.value());
135 }
136 }
137
138 private void addPublished(HTMLDocument fragment, BNode recipe) {
139 final HTMLDocument.TextField[] durations = fragment
140 .getPluralTextField(Microformats2Prefixes.TIME_PROPERTY_PREFIX + recipeFields[8]);
141 for (HTMLDocument.TextField duration : durations) {
142 Node attribute = duration.source().getAttributes().getNamedItem("datetime");
143 if (attribute == null) {
144 conditionallyAddStringProperty(duration.source(), recipe, vHRECIPE.published, duration.value());
145 } else {
146 conditionallyAddStringProperty(duration.source(), recipe, vHRECIPE.published, attribute.getNodeValue());
147 }
148 }
149 }
150
151 private void addNutritions(HTMLDocument fragment, BNode recipe) {
152 final HTMLDocument.TextField[] nutritions = fragment
153 .getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[9]);
154 for (HTMLDocument.TextField nutrition : nutritions) {
155 conditionallyAddStringProperty(nutrition.source(), recipe, vHRECIPE.nutrition, nutrition.value());
156 }
157 }
158 }