1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.source;
19
20 import org.apache.any23.http.HTTPClient;
21 import org.slf4j.Logger;
22 import org.slf4j.LoggerFactory;
23
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.net.URI;
27 import java.net.URISyntaxException;
28
29
30
31
32 public class HTTPDocumentSource implements DocumentSource {
33
34 private static final Logger LOG = LoggerFactory.getLogger(HTTPDocumentSource.class);
35
36 private final HTTPClient client;
37
38 private String uri;
39
40 private InputStream unusedInputStream = null;
41
42 private boolean loaded = false;
43
44 public HTTPDocumentSource(HTTPClient client, String uri) throws URISyntaxException {
45 this.client = client;
46 this.uri = normalize(uri);
47 }
48
49 private String normalize(String uri) throws URISyntaxException {
50 try {
51 URI normalized = new URI(uri).normalize();
52 return normalized.toString();
53 } catch (URISyntaxException e) {
54 LOG.warn("Invalid uri: {}", uri);
55 LOG.error("Can not convert URL", e);
56 throw e;
57 }
58 }
59
60 private void ensureOpen() throws IOException {
61 if (loaded)
62 return;
63 loaded = true;
64 unusedInputStream = client.openInputStream(uri);
65 if (client.getActualDocumentIRI() != null) {
66 uri = client.getActualDocumentIRI();
67 }
68 }
69
70 public InputStream openInputStream() throws IOException {
71 ensureOpen();
72 if (unusedInputStream != null) {
73 InputStream temp = unusedInputStream;
74 unusedInputStream = null;
75 return temp;
76 }
77 return client.openInputStream(uri);
78 }
79
80 public long getContentLength() {
81 return client.getContentLength();
82 }
83
84 public String getDocumentIRI() {
85 return uri;
86 }
87
88 public String getContentType() {
89 return client.getContentType();
90 }
91
92 public boolean isLocal() {
93 return false;
94 }
95
96 }