1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.calendar;
19
20 import biweekly.ICalDataType;
21 import biweekly.ICalVersion;
22 import biweekly.ICalendar;
23 import biweekly.component.ICalComponent;
24 import biweekly.component.VTimezone;
25 import biweekly.io.ParseWarning;
26 import biweekly.io.SkipMeException;
27 import biweekly.io.StreamReader;
28 import biweekly.io.TimezoneAssignment;
29 import biweekly.io.TimezoneInfo;
30 import biweekly.io.WriteContext;
31 import biweekly.io.json.JCalValue;
32 import biweekly.io.json.JsonValue;
33 import biweekly.io.scribe.ScribeIndex;
34 import biweekly.io.scribe.property.ICalPropertyScribe;
35 import biweekly.parameter.Encoding;
36 import biweekly.parameter.ICalParameters;
37 import biweekly.property.Geo;
38 import biweekly.property.ICalProperty;
39 import biweekly.util.DateTimeComponents;
40 import biweekly.util.ICalDateFormat;
41 import org.apache.any23.extractor.ExtractionContext;
42 import org.apache.any23.extractor.ExtractionException;
43 import org.apache.any23.extractor.ExtractionParameters;
44 import org.apache.any23.extractor.ExtractionResult;
45 import org.apache.any23.extractor.Extractor;
46 import org.apache.any23.extractor.IssueReport;
47 import org.apache.any23.vocab.ICAL;
48 import org.apache.commons.lang3.StringUtils;
49 import org.eclipse.rdf4j.model.BNode;
50 import org.eclipse.rdf4j.model.IRI;
51 import org.eclipse.rdf4j.model.Value;
52 import org.eclipse.rdf4j.model.ValueFactory;
53 import org.eclipse.rdf4j.model.datatypes.XMLDatatypeUtil;
54 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
55 import org.eclipse.rdf4j.model.vocabulary.RDF;
56 import org.eclipse.rdf4j.model.vocabulary.XSD;
57
58 import java.io.IOException;
59 import java.io.InputStream;
60 import java.io.PrintWriter;
61 import java.io.StringWriter;
62 import java.math.BigDecimal;
63 import java.math.BigInteger;
64 import java.util.Collection;
65 import java.util.List;
66 import java.util.Locale;
67 import java.util.Map;
68 import java.util.Objects;
69 import java.util.Set;
70 import java.util.TimeZone;
71 import java.util.regex.Matcher;
72 import java.util.regex.Pattern;
73 import java.util.stream.Collectors;
74 import java.util.stream.Stream;
75
76
77
78
79 abstract class BaseCalendarExtractor implements Extractor.ContentExtractor {
80
81 @Override
82 public void setStopAtFirstError(boolean b) {
83
84 }
85
86 private static final ValueFactory f = SimpleValueFactory.getInstance();
87 private static final ICAL vICAL = ICAL.getInstance();
88
89 abstract StreamReader reader(InputStream inputStream);
90
91 @Override
92 public final void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext,
93 InputStream inputStream, ExtractionResult result) throws IOException, ExtractionException {
94 result.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
95 result.writeNamespace(ICAL.PREFIX, ICAL.NS);
96 result.writeNamespace(XSD.PREFIX, XSD.NAMESPACE);
97
98 ScribeIndex index = new ScribeIndex();
99 try (StreamReader reader = reader(inputStream)) {
100 ICalendar cal;
101 while ((cal = reader.readNext()) != null) {
102 for (ParseWarning warning : reader.getWarnings()) {
103 String message = warning.getMessage();
104 Integer lineNumber = warning.getLineNumber();
105 if (lineNumber == null) {
106 result.notifyIssue(IssueReport.IssueLevel.WARNING, message, -1, -1);
107 } else {
108 result.notifyIssue(IssueReport.IssueLevel.WARNING, message, lineNumber, -1);
109 }
110 }
111
112 BNode calNode = f.createBNode();
113 result.writeTriple(calNode, RDF.TYPE, vICAL.Vcalendar);
114 WriteContext ctx = new WriteContext(ICalVersion.V2_0, cal.getTimezoneInfo(), null);
115 extract(index, ctx, calNode, cal, result, true);
116 }
117 } catch (Exception e) {
118 result.notifyIssue(IssueReport.IssueLevel.FATAL, toString(e), -1, -1);
119 }
120 }
121
122 private static String toString(Throwable th) {
123 StringWriter writer = new StringWriter();
124 try (PrintWriter pw = new PrintWriter(writer)) {
125 th.printStackTrace(pw);
126 }
127 String string = writer.toString();
128 if (string.length() > 200) {
129 return string.substring(0, 197) + "...";
130 }
131 return string;
132 }
133
134 private static String localNameOfType(String typeName) {
135 return camelCase(typeName, false);
136 }
137
138 private static String localNameOfProperty(String propertyName) {
139 return camelCase(propertyName, true);
140 }
141
142 private static String camelCase(String name, boolean forProperty) {
143 String[] nameComponents = name.toLowerCase(Locale.ENGLISH).split("-");
144 StringBuilder sb = new StringBuilder(name.length());
145 int i = 0;
146 if (forProperty) {
147 sb.append(nameComponents[i++]);
148 }
149 for (int len = nameComponents.length; i < len; i++) {
150 String n = nameComponents[i];
151 if (!n.isEmpty()) {
152 int ind = Character.charCount(n.codePointAt(0));
153 sb.append(n.substring(0, ind).toUpperCase(Locale.ENGLISH)).append(n.substring(ind));
154 }
155 }
156 return sb.toString();
157 }
158
159 private static IRI type(String originalName) {
160 if (originalName.regionMatches(true, 0, "X-", 0, 2)) {
161
162 return f.createIRI(ICAL.NS, "X-" + localNameOfType(originalName.substring(2)));
163 }
164
165 String name = localNameOfType(originalName);
166
167 try {
168 return Objects.requireNonNull(vICAL.getClass(name));
169 } catch (RuntimeException e) {
170 return null;
171 }
172 }
173
174 private static IRI predicate(String originalName, ExtractionResult result) {
175 if (originalName.regionMatches(true, 0, "X-", 0, 2)) {
176
177 return f.createIRI(ICAL.NS, "x-" + localNameOfProperty(originalName.substring(2)));
178 }
179
180 String name = localNameOfProperty(originalName);
181
182 try {
183 return Objects.requireNonNull(vICAL.getProperty(name));
184 } catch (RuntimeException e) {
185 IRI iri = f.createIRI(ICAL.NS, name);
186 result.notifyIssue(IssueReport.IssueLevel.ERROR,
187 "property " + iri + " (" + originalName + ") not defined in " + ICAL.class.getName(), -1, -1);
188 return iri;
189 }
190 }
191
192 private static final String NaN = Double.toString(Double.NaN);
193
194 private static String str(Double d) {
195 return d == null ? NaN : d.toString();
196 }
197
198 private static BNode writeParams(BNode subject, IRI predicate, ICalParameters params, ExtractionResult result) {
199 BNode bNode = f.createBNode();
200 result.writeTriple(subject, predicate, bNode);
201 writeParams(bNode, params, result);
202 return bNode;
203 }
204
205 private static void writeParams(BNode subject, ICalParameters params, ExtractionResult result) {
206 for (Map.Entry<String, List<String>> entry : params.getMap().entrySet()) {
207 List<String> strings = entry.getValue();
208 if (strings != null && !strings.isEmpty()) {
209 IRI predicate = predicate(entry.getKey(), result);
210 for (String v : strings) {
211 result.writeTriple(subject, predicate, f.createLiteral(v));
212 }
213 }
214 }
215 }
216
217 private static IRI dataType(ICalDataType dataType, Boolean isFloating) {
218 if (dataType == null || ICalDataType.TEXT.equals(dataType)) {
219 return XSD.STRING;
220 } else if (ICalDataType.BOOLEAN.equals(dataType)) {
221 return XSD.BOOLEAN;
222 } else if (ICalDataType.INTEGER.equals(dataType)) {
223 return XSD.INTEGER;
224 } else if (ICalDataType.FLOAT.equals(dataType)) {
225 return XSD.FLOAT;
226 } else if (ICalDataType.BINARY.equals(dataType)) {
227 return XSD.BASE64BINARY;
228 } else if (ICalDataType.URI.equals(dataType) || ICalDataType.URL.equals(dataType)
229 || ICalDataType.CONTENT_ID.equals(dataType) || ICalDataType.CAL_ADDRESS.equals(dataType)) {
230 return XSD.ANYURI;
231 } else if (ICalDataType.DATE_TIME.equals(dataType)) {
232 if (isFloating == null) {
233 return null;
234 }
235 return isFloating ? vICAL.DATE_TIME : XSD.DATETIME;
236 } else if (ICalDataType.DATE.equals(dataType)) {
237 return XSD.DATE;
238 } else if (ICalDataType.TIME.equals(dataType)) {
239 return XSD.TIME;
240 } else if (ICalDataType.DURATION.equals(dataType)) {
241 return XSD.DURATION;
242 } else if (ICalDataType.PERIOD.equals(dataType)) {
243 return vICAL.Value_PERIOD;
244 } else if (ICalDataType.RECUR.equals(dataType)) {
245 return vICAL.Value_RECUR;
246 } else {
247 return XSD.STRING;
248 }
249 }
250
251 private static final Pattern durationWeeksPattern = Pattern.compile("(-?P)(\\d+)W");
252
253 private static String normalizeAndReportIfInvalid(String s, IRI dataType, TimeZone zone, ExtractionResult result) {
254 if (dataType == null) {
255 return s;
256 }
257 try {
258 if (XSD.DURATION.equals(dataType)) {
259 Matcher m = durationWeeksPattern.matcher(s);
260 if (m.matches()) {
261 long days = Long.parseLong(m.group(2)) * 7;
262 return m.group(1) + days + "D";
263 }
264 } else if (vICAL.Value_PERIOD.equals(dataType)) {
265 if (s.indexOf('/') == -1) {
266 throw new IllegalArgumentException();
267 }
268 } else if (zone != null && XSD.DATETIME.equals(dataType)) {
269 try {
270 DateTimeComponents dt = DateTimeComponents.parse(s);
271 if (!dt.isUtc()) {
272 s = ICalDateFormat.DATE_TIME_EXTENDED.format(dt.toDate(zone), zone);
273 }
274 } catch (IllegalArgumentException e) {
275
276 }
277 } else {
278 s = XMLDatatypeUtil.normalize(s, dataType);
279 }
280
281 if (!XMLDatatypeUtil.isValidValue(s, dataType)) {
282 throw new IllegalArgumentException();
283 }
284 } catch (IllegalArgumentException e) {
285 String m = e.getMessage();
286 if (StringUtils.isBlank(m)) {
287 m = "Not a valid " + dataType + " value: " + s;
288 }
289 result.notifyIssue(IssueReport.IssueLevel.ERROR, m, -1, -1);
290 }
291 return s;
292 }
293
294 private static boolean writeValue(BNode subject, IRI predicate, JsonValue jsonValue, String lang, IRI dataType,
295 TimeZone zone, ExtractionResult result) {
296 if (jsonValue == null || jsonValue.isNull()) {
297 return false;
298 }
299 Object val = jsonValue.getValue();
300 if (val != null) {
301 Value v;
302 if (val instanceof Byte) {
303 v = f.createLiteral((byte) val);
304 } else if (val instanceof Short) {
305 v = f.createLiteral((short) val);
306 } else if (val instanceof Integer) {
307 v = f.createLiteral((int) val);
308 } else if (val instanceof Long) {
309 v = f.createLiteral((long) val);
310 } else if (val instanceof Float) {
311 v = f.createLiteral((float) val);
312 } else if (val instanceof Double) {
313 v = f.createLiteral((double) val);
314 } else if (val instanceof Boolean) {
315 v = f.createLiteral((boolean) val);
316 } else if (val instanceof BigInteger) {
317 v = f.createLiteral((BigInteger) val);
318 } else if (val instanceof BigDecimal) {
319 v = f.createLiteral((BigDecimal) val);
320 } else {
321 String str = normalizeAndReportIfInvalid(val.toString(), dataType, zone, result);
322
323 if (XSD.STRING.equals(dataType)) {
324 if (lang == null) {
325 v = f.createLiteral(str);
326 } else {
327 v = f.createLiteral(str, lang);
328 }
329 } else if (XSD.ANYURI.equals(dataType)) {
330 try {
331 v = f.createIRI(str);
332 } catch (IllegalArgumentException e) {
333 v = f.createLiteral(str, dataType);
334 }
335 } else if (vICAL.Value_PERIOD.equals(dataType)) {
336 String[] strs = str.split("/");
337 if (strs.length == 2) {
338 String firstPart = normalizeAndReportIfInvalid(strs[0], XSD.DATETIME, zone, result);
339 String secondPart = strs[1];
340 if (secondPart.indexOf('P') != -1) {
341 secondPart = normalizeAndReportIfInvalid(secondPart, XSD.DURATION, zone, result);
342 } else {
343 secondPart = normalizeAndReportIfInvalid(secondPart, XSD.DATETIME, zone, result);
344 }
345 str = firstPart + "/" + secondPart;
346 }
347 v = f.createLiteral(str);
348 } else if (dataType != null) {
349 v = f.createLiteral(str, dataType);
350 } else {
351 v = f.createLiteral(str);
352 }
353
354 }
355 result.writeTriple(subject, predicate, v);
356 return true;
357 }
358
359 List<JsonValue> array = jsonValue.getArray();
360 if (array != null && !array.isEmpty()) {
361 if (array.size() == 1) {
362 return writeValue(subject, predicate, array.get(0), lang, dataType, zone, result);
363 } else {
364 BNode bNode = f.createBNode();
365 result.writeTriple(subject, predicate, bNode);
366 for (JsonValue value : array) {
367 writeValue(bNode, RDF.VALUE, value, lang, dataType, zone, result);
368 }
369 return true;
370 }
371 }
372
373 Map<String, JsonValue> object = jsonValue.getObject();
374 if (object != null) {
375 BNode bNode = f.createBNode();
376 result.writeTriple(subject, predicate, bNode);
377 for (Map.Entry<String, JsonValue> entry : object.entrySet()) {
378 writeValue(bNode, predicate(entry.getKey(), result), entry.getValue(), lang, XSD.STRING, zone, result);
379 }
380 return true;
381 }
382
383 return false;
384 }
385
386 private static TimeZone parseTimeZoneId(String tzId) {
387 for (;;) {
388 TimeZone zone = ICalDateFormat.parseTimeZoneId(tzId);
389 if (zone != null) {
390 return zone;
391 }
392 int ind = tzId.indexOf('/');
393 if (ind == -1) {
394 return null;
395 }
396 tzId = tzId.substring(ind + 1);
397 }
398 }
399
400 @SuppressWarnings("unchecked")
401 private static <T extends ICalProperty> void writeProperty(BNode subject, ICalPropertyScribe<T> scribe,
402 ICalProperty property, WriteContext ctx, ExtractionResult result) {
403 try {
404 T prop = (T) property;
405
406 ICalVersion version = ctx.getVersion();
407
408 ICalDataType dataType = scribe.dataType(prop, version);
409
410 ICalParameters params = scribe.prepareParameters(prop, ctx);
411
412 String lang = params.getLanguage();
413 params.removeAll(ICalParameters.LANGUAGE);
414
415 Encoding encoding = params.getEncoding();
416
417 if (dataType == null) {
418 dataType = params.getValue();
419 if (dataType == null && Encoding.BASE64.equals(encoding)) {
420 dataType = ICalDataType.BINARY;
421 }
422 }
423 params.removeAll(ICalParameters.VALUE);
424
425 if (ICalDataType.BINARY.equals(dataType)) {
426
427
428
429
430 if (encoding != null && !Encoding.BASE64.equals(encoding)) {
431 result.notifyIssue(IssueReport.IssueLevel.ERROR,
432 "Invalid encoding " + encoding + " specified for BINARY value", -1, -1);
433 dataType = null;
434 }
435 params.removeAll(ICalParameters.ENCODING);
436 }
437
438 if (Encoding._8BIT.equals(encoding)) {
439
440
441
442 params.removeAll(ICalParameters.ENCODING);
443 }
444
445
446
447
448
449 params.removeAll(ICalParameters.CHARSET);
450
451 IRI predicate = predicate(scribe.getPropertyName(version), result);
452
453 if (ICalDataType.CAL_ADDRESS.equals(dataType)) {
454 subject = writeParams(subject, predicate, params, result);
455 predicate = vICAL.calAddress;
456 } else if (!params.isEmpty()) {
457 subject = writeParams(subject, predicate, params, result);
458 predicate = RDF.VALUE;
459 }
460
461 if (prop instanceof Geo) {
462
463 Geo g = (Geo) prop;
464 IRI value = f.createIRI("geo:" + str(g.getLatitude()) + "," + str(g.getLongitude()));
465 result.writeTriple(subject, predicate, value);
466 } else {
467
468 String tzId = params.getTimezoneId();
469 TimezoneInfo tzInfo = ctx.getTimezoneInfo();
470 TimeZone timeZone = null;
471 Boolean floating;
472 if (tzId != null) {
473 TimezoneAssignment assign = tzInfo.getTimezone(prop);
474 if (assign != null) {
475 timeZone = assign.getTimeZone();
476 } else {
477 timeZone = parseTimeZoneId(tzId);
478 tzInfo.setFloating(prop, true);
479 }
480 floating = timeZone == null ? null : Boolean.FALSE;
481 } else {
482 floating = tzInfo.isFloating(prop);
483 }
484
485 IRI dataTypeIRI = dataType(dataType, floating);
486
487 JCalValue jsonVal = scribe.writeJson(prop, ctx);
488 List<JsonValue> jsonVals = jsonVal.getValues();
489
490 boolean mod = false;
491 for (JsonValue value : jsonVals) {
492 mod |= writeValue(subject, predicate, value, lang, dataTypeIRI, timeZone, result);
493 }
494 if (!mod) {
495 result.writeTriple(subject, predicate, f.createLiteral(jsonVal.asSingle()));
496 }
497 }
498 } catch (SkipMeException e) {
499
500 }
501 }
502
503 private static void extract(ScribeIndex index, WriteContext ctx, BNode node, ICalComponent component,
504 ExtractionResult result, boolean writeTimezones) {
505 for (ICalProperty property : component.getProperties().values()) {
506 ctx.setParent(component);
507 writeProperty(node, index.getPropertyScribe(property), property, ctx, result);
508 }
509
510 Stream<ICalComponent> components = component.getComponents().values().stream();
511
512 if (writeTimezones) {
513 Collection<VTimezone> tzs = ctx.getTimezoneInfo().getComponents();
514 Set<String> tzIds = tzs.stream().map(tz -> tz.getTimezoneId().getValue()).collect(Collectors.toSet());
515 components = Stream.concat(tzs.stream(), components.filter(
516 c -> !(c instanceof VTimezone && tzIds.contains(((VTimezone) c).getTimezoneId().getValue()))));
517 }
518
519 components.forEachOrdered(child -> {
520 BNode childNode = f.createBNode();
521 String componentName = index.getComponentScribe(child).getComponentName();
522 IRI childType = type(componentName);
523
524 if (childType == null) {
525 result.writeTriple(node, predicate(componentName, result), childNode);
526 } else {
527 result.writeTriple(node, vICAL.component, childNode);
528 result.writeTriple(childNode, RDF.TYPE, childType);
529 }
530 extract(index, ctx, childNode, child, result, false);
531 });
532 }
533
534 }