1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.http;
19
20 import org.apache.commons.io.IOUtils;
21 import org.apache.http.Header;
22 import org.apache.http.HttpResponse;
23 import org.apache.http.client.HttpClient;
24 import org.apache.http.client.config.RequestConfig;
25 import org.apache.http.client.methods.HttpGet;
26 import org.apache.http.client.protocol.HttpClientContext;
27 import org.apache.http.config.SocketConfig;
28 import org.apache.http.impl.client.HttpClients;
29 import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
30 import org.apache.http.message.BasicHeader;
31
32 import java.io.ByteArrayInputStream;
33 import java.io.IOException;
34 import java.io.InputStream;
35 import java.net.URI;
36 import java.util.ArrayList;
37 import java.util.List;
38
39
40
41
42
43
44
45 public class DefaultHTTPClient implements HTTPClient {
46
47 private final PoolingHttpClientConnectionManager manager = new PoolingHttpClientConnectionManager();
48
49 private HTTPClientConfiguration configuration;
50
51 private HttpClient client = null;
52
53 private long _contentLength = -1;
54
55 private String actualDocumentIRI = null;
56
57 private String contentType = null;
58
59
60
61
62
63
64 public static DefaultHTTPClient createInitializedHTTPClient() {
65 final DefaultHTTPClient defaultHTTPClient = new DefaultHTTPClient();
66 defaultHTTPClient.init(DefaultHTTPClientConfiguration.singleton());
67 return defaultHTTPClient;
68 }
69
70 public void init(HTTPClientConfiguration configuration) {
71 if (configuration == null)
72 throw new NullPointerException("Illegal configuration, cannot be null.");
73 this.configuration = configuration;
74 }
75
76
77
78
79
80
81
82
83
84
85
86
87
88 public InputStream openInputStream(String uri) throws IOException {
89 HttpGet method = null;
90 try {
91 ensureClientInitialized();
92 HttpClientContext context = HttpClientContext.create();
93 method = new HttpGet(uri);
94 HttpResponse response = client.execute(method, context);
95 List<URI> locations = context.getRedirectLocations();
96
97 URI actualURI = locations == null || locations.isEmpty() ? method.getURI()
98 : locations.get(locations.size() - 1);
99 actualDocumentIRI = actualURI.toString();
100
101 final Header contentTypeHeader = response.getFirstHeader("Content-Type");
102 contentType = contentTypeHeader == null ? null : contentTypeHeader.getValue();
103 if (response.getStatusLine().getStatusCode() != 200) {
104 throw new IOException("Failed to fetch " + uri + ": " + response.getStatusLine().getStatusCode() + " "
105 + response.getStatusLine().getReasonPhrase());
106 }
107
108 byte[] bytes = IOUtils.toByteArray(response.getEntity().getContent());
109 _contentLength = bytes.length;
110 return new ByteArrayInputStream(bytes);
111 } finally {
112 if (method != null) {
113 method.reset();
114 }
115 }
116 }
117
118
119
120
121 public void close() {
122 manager.shutdown();
123 }
124
125 public long getContentLength() {
126 return _contentLength;
127 }
128
129 public String getActualDocumentIRI() {
130 return actualDocumentIRI;
131 }
132
133 public String getContentType() {
134 return contentType;
135 }
136
137 protected int getConnectionTimeout() {
138 return configuration.getDefaultTimeout();
139 }
140
141 protected int getSoTimeout() {
142 return configuration.getDefaultTimeout();
143 }
144
145 private void ensureClientInitialized() {
146 if (configuration == null)
147 throw new IllegalStateException("client must be initialized first.");
148 if (client != null)
149 return;
150
151 RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(getConnectionTimeout())
152 .setSocketTimeout(getSoTimeout()).setRedirectsEnabled(true).build();
153
154 SocketConfig socketConfig = SocketConfig.custom().setSoTimeout(getSoTimeout()).build();
155
156 List<Header> headers = new ArrayList<>();
157 headers.add(new BasicHeader("User-Agent", configuration.getUserAgent()));
158 if (configuration.getAcceptHeader() != null) {
159 headers.add(new BasicHeader("Accept", configuration.getAcceptHeader()));
160 }
161 headers.add(new BasicHeader("Accept-Language", "en-us,en-gb,en,*;q=0.3"));
162
163 headers.add(new BasicHeader("Accept-Charset", "utf-8,iso-8859-1;q=0.7,*;q=0.5"));
164
165 client = HttpClients.custom().setConnectionManager(manager).setDefaultRequestConfig(requestConfig)
166 .setDefaultSocketConfig(socketConfig).setMaxConnTotal(configuration.getMaxConnections())
167 .setDefaultHeaders(headers).build();
168 }
169
170 }