1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.writer;
19
20 import org.apache.any23.extractor.ExtractionContext;
21 import org.eclipse.rdf4j.model.Resource;
22 import org.eclipse.rdf4j.model.IRI;
23 import org.eclipse.rdf4j.model.Value;
24
25 import java.io.PrintWriter;
26 import java.util.HashMap;
27 import java.util.Locale;
28 import java.util.Map;
29 import java.util.concurrent.atomic.AtomicBoolean;
30 import java.util.stream.Collectors;
31 import org.apache.any23.util.StringUtils;
32
33
34
35
36 public class LoggingTripleHandler implements TripleHandler {
37
38
39
40
41 private final TripleHandler underlyingHandler;
42
43 private final Map<String, Integer> contextTripleMap = new HashMap<String, Integer>();
44 private long startTime = 0;
45 private long contentLength = 0;
46 private final PrintWriter destination;
47
48 public LoggingTripleHandler(TripleHandler tripleHandler, PrintWriter destination) {
49 if (tripleHandler == null) {
50 throw new NullPointerException("tripleHandler cannot be null.");
51 }
52 if (destination == null) {
53 throw new NullPointerException("destination cannot be null.");
54 }
55 underlyingHandler = tripleHandler;
56 this.destination = destination;
57
58 printHeader(destination);
59 }
60
61 public void startDocument(IRI documentIRI) throws TripleHandlerException {
62 underlyingHandler.startDocument(documentIRI);
63 startTime = System.currentTimeMillis();
64 }
65
66 public void close() throws TripleHandlerException {
67 underlyingHandler.close();
68 destination.flush();
69 destination.close();
70 }
71
72 public void closeContext(ExtractionContext context) throws TripleHandlerException {
73 underlyingHandler.closeContext(context);
74 }
75
76 public void openContext(ExtractionContext context) throws TripleHandlerException {
77 underlyingHandler.openContext(context);
78 }
79
80 public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context)
81 throws TripleHandlerException {
82 underlyingHandler.receiveTriple(s, p, o, g, context);
83 Integer i = contextTripleMap.get(context.getExtractorName());
84 if (i == null)
85 i = 0;
86 contextTripleMap.put(context.getExtractorName(), (i + 1));
87 }
88
89 public void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException {
90 underlyingHandler.receiveNamespace(prefix, uri, context);
91 }
92
93 @Override
94 public void endDocument(IRI documentIRI) throws TripleHandlerException {
95 underlyingHandler.endDocument(documentIRI);
96 long elapsedTime = System.currentTimeMillis() - startTime;
97 final AtomicBoolean success = new AtomicBoolean(true);
98
99 StringBuilder sb = new StringBuilder("[ ");
100 String[] parsers = contextTripleMap.entrySet().stream().map(e -> {
101 if (e.getValue() > 0) {
102 success.set(true);
103 }
104 return String.format(Locale.ROOT, "%s:%d", e.getKey(), e.getValue());
105 }).collect(Collectors.toList()).toArray(new String[] {});
106 sb.append(StringUtils.join(", ", parsers));
107 sb.append(" ]");
108 destination.println(
109 documentIRI + "\t" + contentLength + "\t" + elapsedTime + "\t" + success.get() + "\t" + sb.toString());
110 contextTripleMap.clear();
111 }
112
113 public void setContentLength(long contentLength) {
114 underlyingHandler.setContentLength(contentLength);
115 this.contentLength = contentLength;
116 }
117
118 private void printHeader(PrintWriter writer) {
119 writer.println("# Document-IRI\tContent-Length\tElapsed-Time\tSuccess\tExtractors:Triples");
120 }
121 }