1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.w3c.dom.Node;
21
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.Collection;
25 import java.util.Collections;
26 import java.util.HashMap;
27 import java.util.List;
28 import java.util.Map;
29
30 import static org.apache.any23.extractor.html.HTMLDocument.TextField;
31
32
33
34
35
36
37
38 public class HCardName {
39
40 public static final String GIVEN_NAME = "given-name";
41 public static final String FAMILY_NAME = "family-name";
42 public static final String ADDITIONAL_NAME = "additional-name";
43 public static final String NICKNAME = "nickname";
44 public static final String HONORIFIC_PREFIX = "honorific-prefix";
45 public static final String HONORIFIC_SUFFIX = "honorific-suffix";
46
47 public static final String[] FIELDS = { GIVEN_NAME, FAMILY_NAME, ADDITIONAL_NAME, NICKNAME, HONORIFIC_PREFIX,
48 HONORIFIC_SUFFIX };
49
50 private static final String[] NAME_COMPONENTS = { HONORIFIC_PREFIX, GIVEN_NAME, ADDITIONAL_NAME, FAMILY_NAME,
51 HONORIFIC_SUFFIX };
52
53 private Map<String, FieldValue> fields = new HashMap<String, FieldValue>();
54 private TextField[] fullName = null;
55 private TextField organization = null;
56 private TextField unit = null;
57
58 private static TextField join(TextField[] sarray, String delimiter) {
59 StringBuilder builder = new StringBuilder();
60 final int sarrayLengthMin2 = sarray.length - 1;
61 for (int i = 0; i < sarray.length; i++) {
62 builder.append(sarray[i].value());
63 if (i < sarrayLengthMin2) {
64 builder.append(delimiter);
65 }
66 }
67 return new TextField(builder.toString(), sarray[0].source());
68 }
69
70
71
72
73 public void reset() {
74 fields.clear();
75 fullName = null;
76 organization = null;
77 unit = null;
78 }
79
80 public void setField(String fieldName, TextField nd) {
81 final String value = fixWhiteSpace(nd.value());
82 if (value == null)
83 return;
84 FieldValue fieldValue = fields.get(fieldName);
85 if (fieldValue == null) {
86 fieldValue = new FieldValue();
87 fields.put(fieldName, fieldValue);
88 }
89 fieldValue.addValue(new TextField(value, nd.source()));
90 }
91
92 public void setFullName(TextField nd) {
93 final String value = fixWhiteSpace(nd.value());
94 if (value == null)
95 return;
96 String[] split = value.split("\\s+");
97
98 final String split0 = split[0];
99 final int split0Length = split0.length();
100 if (split.length > 1 && split0.charAt(split0Length - 1) == ',') {
101 String swap = split[1];
102 split[1] = split0.substring(0, split0Length - 1);
103 split[0] = swap;
104 }
105 TextField[] splitFields = new TextField[split.length];
106 for (int i = 0; i < split.length; i++) {
107 splitFields[i] = new TextField(split[i], nd.source());
108 }
109 this.fullName = splitFields;
110 }
111
112 public void setOrganization(TextField nd) {
113 final String value = fixWhiteSpace(nd.value());
114 if (value == null)
115 return;
116 this.organization = new TextField(value, nd.source());
117 }
118
119 public boolean isMultiField(String fieldName) {
120 FieldValue fieldValue = fields.get(fieldName);
121 return fieldValue != null && fieldValue.isMultiField();
122 }
123
124 public boolean containsField(String fieldName) {
125 return GIVEN_NAME.equals(fieldName) || FAMILY_NAME.equals(fieldName) || fields.containsKey(fieldName);
126 }
127
128 public TextField getField(String fieldName) {
129 if (GIVEN_NAME.equals(fieldName)) {
130 return getFullNamePart(GIVEN_NAME, 0);
131 }
132 if (FAMILY_NAME.equals(fieldName)) {
133 return getFullNamePart(FAMILY_NAME, Integer.MAX_VALUE);
134 }
135 FieldValue v = fields.get(fieldName);
136 return v == null ? null : v.getValue();
137 }
138
139 public Collection<TextField> getFields(String fieldName) {
140 FieldValue v = fields.get(fieldName);
141 return v == null ? Collections.<TextField> emptyList() : v.getValues();
142 }
143
144 private TextField getFullNamePart(String fieldName, int index) {
145 if (fields.containsKey(fieldName)) {
146 return fields.get(fieldName).getValue();
147 }
148 if (fullName == null)
149 return null;
150
151 if (organization != null && fullName[0].value().equals(organization.value())) {
152 return null;
153 }
154 if (index != Integer.MAX_VALUE && fullName.length <= index)
155 return null;
156 return fullName[index == Integer.MAX_VALUE ? fullName.length - 1 : index];
157 }
158
159 public boolean hasField(String fieldName) {
160 return getField(fieldName) != null;
161 }
162
163 public boolean hasAnyField() {
164 for (String fieldName : FIELDS) {
165 if (hasField(fieldName))
166 return true;
167 }
168 return false;
169 }
170
171 public TextField getFullName() {
172 if (fullName != null)
173 return join(fullName, " ");
174 StringBuffer s = new StringBuffer();
175 boolean empty = true;
176 Node first = null;
177 TextField current;
178 for (String fieldName : NAME_COMPONENTS) {
179 if (!hasField(fieldName))
180 continue;
181 if (!empty) {
182 s.append(' ');
183 }
184 current = getField(fieldName);
185 if (first == null) {
186 first = current.source();
187 }
188 s.append(current.value());
189 empty = false;
190 }
191 if (empty)
192 return null;
193 return new TextField(s.toString(), first);
194 }
195
196 public TextField getOrganization() {
197 return organization;
198 }
199
200 public void setOrganizationUnit(TextField nd) {
201 final String value = fixWhiteSpace(nd.value());
202 if (value == null)
203 return;
204 this.unit = new TextField(value, nd.source());
205 }
206
207 public TextField getOrganizationUnit() {
208 return unit;
209 }
210
211 private String fixWhiteSpace(String s) {
212 if (s == null)
213 return null;
214 s = s.trim().replaceAll("\\s+", " ");
215 if ("".equals(s))
216 return null;
217 return s;
218 }
219
220
221
222
223 private static class FieldValue {
224
225 private TextField value;
226 private List<TextField> multiValue = new ArrayList<TextField>();
227
228 FieldValue() {
229 }
230
231 void addValue(TextField v) {
232 if (value == null && multiValue == null) {
233 value = v;
234 } else if (multiValue == null) {
235 multiValue = new ArrayList<TextField>();
236 multiValue.add(value);
237 value = null;
238 multiValue.add(v);
239 } else {
240 multiValue.add(v);
241 }
242 }
243
244 boolean isMultiField() {
245 return value == null;
246 }
247
248 TextField getValue() {
249 return value != null ? value : multiValue.get(0);
250 }
251
252 Collection<TextField> getValues() {
253 return value != null ? Arrays.asList(value) : multiValue;
254 }
255 }
256
257 }