1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor;
19
20 import org.apache.any23.extractor.html.MicroformatExtractor;
21 import org.apache.any23.rdf.Prefixes;
22 import org.apache.any23.writer.TripleHandler;
23 import org.apache.any23.writer.TripleHandlerException;
24 import org.eclipse.rdf4j.model.BNode;
25 import org.eclipse.rdf4j.model.Resource;
26 import org.eclipse.rdf4j.model.IRI;
27 import org.eclipse.rdf4j.model.Value;
28
29 import java.io.PrintStream;
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Locale;
36 import java.util.Set;
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 public class ExtractionResultImpl implements TagSoupExtractionResult {
58
59 private final ExtractionContext context;
60
61 private final Extractor<?> extractor;
62
63 private final TripleHandler tripleHandler;
64
65 private final Collection<ExtractionResult> subResults = new ArrayList<>();
66
67 private final Set<Object> knownContextIDs = new HashSet<>();
68
69 private boolean isClosed = false;
70
71 private boolean isInitialized = false;
72
73 private List<Issue> issues;
74
75 private List<ResourceRoot> resourceRoots;
76
77 private List<PropertyPath> propertyPaths;
78
79 public ExtractionResultImpl(ExtractionContext context, Extractor<?> extractor, TripleHandler tripleHandler) {
80 this(context, extractor, tripleHandler, new ArrayList<>());
81 }
82
83 private ExtractionResultImpl(ExtractionContext context, Extractor<?> extractor, TripleHandler tripleHandler,
84 List<Issue> issues) {
85 if (context == null) {
86 throw new NullPointerException("context cannot be null.");
87 }
88 if (extractor == null) {
89 throw new NullPointerException("extractor cannot be null.");
90 }
91 if (tripleHandler == null) {
92 throw new NullPointerException("triple handler cannot be null.");
93 }
94
95 this.extractor = extractor;
96 this.tripleHandler = tripleHandler;
97 this.context = context;
98 this.issues = issues;
99
100 knownContextIDs.add(context.getUniqueID());
101
102 try {
103
104
105
106 tripleHandler.openContext(context);
107 } catch (TripleHandlerException e) {
108 throw new RuntimeException("Error while opening context", e);
109 }
110 }
111
112 public boolean hasIssues() {
113 return !issues.isEmpty();
114 }
115
116 public int getIssuesCount() {
117 return issues.size();
118 }
119
120 @Override
121 public void printReport(PrintStream ps) {
122 ps.print(String.format(Locale.ROOT, "Context: %s [errors: %d] {\n", context, getIssuesCount()));
123 for (Issue issue : issues) {
124 ps.print(issue.toString());
125 ps.print("\n");
126 }
127
128 for (ExtractionResult er : subResults) {
129 er.printReport(ps);
130 }
131 ps.print("}\n");
132 }
133
134 @Override
135 public Collection<Issue> getIssues() {
136 return issues.isEmpty() ? Collections.<Issue> emptyList() : Collections.unmodifiableList(issues);
137 }
138
139 @Override
140 public ExtractionResult openSubResult(ExtractionContext context) {
141 final String contextID = context.getUniqueID();
142 if (knownContextIDs.contains(contextID)) {
143 throw new IllegalArgumentException("Duplicate contextID: " + contextID);
144 }
145 knownContextIDs.add(contextID);
146
147 checkOpen();
148 ExtractionResult result = new ExtractionResultImpl(context, extractor, tripleHandler, this.issues);
149 subResults.add(result);
150 return result;
151 }
152
153 public ExtractionContext getExtractionContext() {
154 return context;
155 }
156
157 @Override
158 public void writeTriple(Resource s, IRI p, Value o, IRI g) {
159 if (s == null || p == null || o == null)
160 return;
161
162 if (s.stringValue() == null || p.stringValue() == null || o.stringValue() == null) {
163 throw new IllegalArgumentException("The statement arguments must be not null.");
164 }
165 checkOpen();
166 try {
167 tripleHandler.receiveTriple(s, p, o, g, context);
168 } catch (TripleHandlerException e) {
169 throw new RuntimeException(String.format(Locale.ROOT, "Error while receiving triple %s %s %s", s, p, o), e);
170 }
171 }
172
173 boolean wasTouched() {
174 return isInitialized;
175 }
176
177 @Override
178 public void writeTriple(Resource s, IRI p, Value o) {
179 writeTriple(s, p, o, null);
180 }
181
182 @Override
183 public void writeNamespace(String prefix, String uri) {
184 checkOpen();
185 try {
186 tripleHandler.receiveNamespace(prefix, uri, context);
187 } catch (TripleHandlerException e) {
188 throw new RuntimeException(String.format(Locale.ROOT, "Error while writing namespace %s:%s", prefix, uri),
189 e);
190 }
191 }
192
193 @Override
194 public void notifyIssue(IssueLevel level, String msg, long row, long col) {
195 issues.add(new Issue(level, msg, row, col));
196 }
197
198 @Override
199 public void close() {
200 if (isClosed)
201 return;
202 isClosed = true;
203 for (ExtractionResult subResult : subResults) {
204 subResult.close();
205 }
206 try {
207 tripleHandler.closeContext(context);
208 } catch (TripleHandlerException e) {
209 throw new RuntimeException("Error while opening context", e);
210 }
211 }
212
213 private void checkOpen() {
214 if (!isInitialized) {
215 isInitialized = true;
216 Prefixes prefixes = extractor.getDescription().getPrefixes();
217 for (String prefix : prefixes.allPrefixes()) {
218 try {
219 tripleHandler.receiveNamespace(prefix, prefixes.getNamespaceIRIFor(prefix), context);
220 } catch (TripleHandlerException e) {
221 throw new RuntimeException(String.format(Locale.ROOT, "Error while writing namespace %s", prefix),
222 e);
223 }
224 }
225 }
226 if (isClosed) {
227 throw new IllegalStateException("Not open: " + context);
228 }
229 }
230
231 @Override
232 public void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) {
233 if (resourceRoots == null) {
234 resourceRoots = new ArrayList<>();
235 }
236 resourceRoots.add(new ResourceRoot(path, root, extractor));
237 }
238
239 @Override
240 public List<ResourceRoot> getResourceRoots() {
241 List<ResourceRoot> allRoots = new ArrayList<>();
242 if (resourceRoots != null) {
243 allRoots.addAll(resourceRoots);
244 }
245 for (ExtractionResult er : subResults) {
246 ExtractionResultImpl eri = (ExtractionResultImpl) er;
247 if (eri.resourceRoots != null) {
248 allRoots.addAll(eri.resourceRoots);
249 }
250 }
251 return allRoots;
252 }
253
254 @Override
255 public void addPropertyPath(Class<? extends MicroformatExtractor> extractor, Resource propertySubject,
256 Resource property, BNode object, String[] path) {
257 if (propertyPaths == null) {
258 propertyPaths = new ArrayList<>();
259 }
260 propertyPaths.add(new PropertyPath(path, propertySubject, property, object, extractor));
261 }
262
263 @Override
264 public List<PropertyPath> getPropertyPaths() {
265 List<PropertyPath> allPaths = new ArrayList<>();
266 if (propertyPaths != null) {
267 allPaths.addAll(propertyPaths);
268 }
269 for (ExtractionResult er : subResults) {
270 ExtractionResultImpl eri = (ExtractionResultImpl) er;
271 if (eri.propertyPaths != null) {
272 allPaths.addAll(eri.propertyPaths);
273 }
274 }
275 return allPaths;
276 }
277
278 @Override
279 public String toString() {
280 final StringBuilder sb = new StringBuilder();
281 sb.append(context.toString());
282 sb.append('\n');
283 if (issues != null) {
284 sb.append("Errors {\n");
285 for (Issue issue : issues) {
286 sb.append('\t');
287 sb.append(issue.toString());
288 sb.append('\n');
289 }
290 }
291 sb.append("}\n");
292 return sb.toString();
293 }
294
295 }