1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor;
19
20 import org.apache.any23.extractor.html.MicroformatExtractor;
21 import org.eclipse.rdf4j.model.BNode;
22 import org.eclipse.rdf4j.model.Resource;
23
24 import java.util.Arrays;
25 import java.util.List;
26 import java.util.Locale;
27
28
29
30
31
32
33
34 public interface TagSoupExtractionResult extends ExtractionResult {
35
36
37
38
39
40
41
42
43
44
45
46
47 void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor);
48
49
50
51
52
53
54 List<ResourceRoot> getResourceRoots();
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70 void addPropertyPath(Class<? extends MicroformatExtractor> extractor, Resource propertySubject, Resource property,
71 BNode object, String[] path);
72
73
74
75
76
77
78 List<PropertyPath> getPropertyPaths();
79
80
81
82
83 class ResourceRoot {
84 private String[] path;
85 private Resource root;
86 private Class<? extends MicroformatExtractor> extractor;
87
88 public ResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) {
89 if (path == null || path.length == 0) {
90 throw new IllegalArgumentException(
91 String.format(Locale.ROOT, "Invalid xpath: '%s'.", Arrays.toString(path)));
92 }
93 if (root == null) {
94 throw new IllegalArgumentException("Invalid root, cannot be null.");
95 }
96 if (extractor == null) {
97 throw new IllegalArgumentException("Invalid extractor, cannot ne null");
98 }
99 this.path = path;
100 this.root = root;
101 this.extractor = extractor;
102 }
103
104 public String[] getPath() {
105 return path;
106 }
107
108 public Resource getRoot() {
109 return root;
110 }
111
112 public Class<? extends MicroformatExtractor> getExtractor() {
113 return extractor;
114 }
115
116 @Override
117 public String toString() {
118 return String.format(Locale.ROOT, "%s-%s-%s %s", this.getClass().getCanonicalName(), Arrays.toString(path),
119 root, extractor);
120 }
121 }
122
123
124
125
126 class PropertyPath {
127
128 private Class<? extends MicroformatExtractor> extractor;
129 private String[] path;
130 private Resource subject;
131 private Resource property;
132 private BNode object;
133
134 public PropertyPath(String[] path, Resource subject, Resource property, BNode object,
135 Class<? extends MicroformatExtractor> extractor) {
136 if (path == null) {
137 throw new NullPointerException("path cannot be null.");
138 }
139 if (subject == null) {
140 throw new NullPointerException("subject cannot be null.");
141 }
142 if (property == null) {
143 throw new NullPointerException("property cannot be null.");
144 }
145 if (extractor == null) {
146 throw new NullPointerException("extractor cannot be null.");
147 }
148 this.path = path;
149 this.subject = subject;
150 this.property = property;
151 this.object = object;
152 this.extractor = extractor;
153 }
154
155 public String[] getPath() {
156 return path;
157 }
158
159 public Resource getSubject() {
160 return subject;
161 }
162
163 public Resource getProperty() {
164 return property;
165 }
166
167 public BNode getObject() {
168 return object;
169 }
170
171 public Class<? extends MicroformatExtractor> getExtractor() {
172 return extractor;
173 }
174
175 @Override
176 public String toString() {
177 return String.format(Locale.ROOT, "%s %s - %s - %s -- %s -->", this.getClass().getCanonicalName(),
178 Arrays.toString(path), extractor, subject, property);
179 }
180 }
181
182 }