Refactoring HttpClientFeedFetcher

This commit is contained in:
Patrick Gotthard 2015-02-16 21:28:28 +01:00
parent 1b3886ee6a
commit 9378ff1a7e

View file

@ -30,6 +30,7 @@ import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpMethodRetryHandler; import org.apache.commons.httpclient.HttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpState;
import org.apache.commons.httpclient.auth.AuthScope; import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpClientParams; import org.apache.commons.httpclient.params.HttpClientParams;
@ -51,29 +52,242 @@ public class HttpClientFeedFetcher extends AbstractFeedFetcher {
private FeedFetcherCache feedInfoCache; private FeedFetcherCache feedInfoCache;
private volatile HttpClientMethodCallbackIntf httpClientMethodCallback; private volatile HttpClientMethodCallbackIntf httpClientMethodCallback;
private volatile HttpClientParams httpClientParams; private volatile HttpClientParams httpClientParams;
private Map<String,String> customRequestHeaders; private Map<String, String> customRequestHeaders;
public HttpClientFeedFetcher() { public HttpClientFeedFetcher() {
setHttpClientParams(new HttpClientParams()); this(null, null);
} }
/**
* @param cache
*/
public HttpClientFeedFetcher(final FeedFetcherCache cache) { public HttpClientFeedFetcher(final FeedFetcherCache cache) {
this(); this(cache, null);
setFeedInfoCache(cache);
} }
public HttpClientFeedFetcher(final FeedFetcherCache cache, final CredentialSupplier credentialSupplier) { public HttpClientFeedFetcher(final FeedFetcherCache cache, final CredentialSupplier credentialSupplier) {
this(cache); setHttpClientParams(new HttpClientParams());
setFeedInfoCache(cache);
setCredentialSupplier(credentialSupplier); setCredentialSupplier(credentialSupplier);
} }
/** @Override
* @param handler Sets the retry handler public SyndFeed retrieveFeed(final URL url) throws IllegalArgumentException, IOException, FeedException, FetcherException {
*/ return this.retrieveFeed(getUserAgent(), url);
public synchronized void setRetryHandler(HttpMethodRetryHandler handler) { }
@Override
public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
if (feedUrl == null) {
throw new IllegalArgumentException("null is not a valid URL");
}
final HttpClient client = new HttpClient(httpClientParams);
if (credentialSupplier != null) {
final HttpClientParams params = client.getParams();
params.setAuthenticationPreemptive(true);
final String host = feedUrl.getHost();
final Credentials credentials = credentialSupplier.getCredentials(null, host);
if (credentials != null) {
final AuthScope authScope = new AuthScope(host, -1);
final HttpState state = client.getState();
state.setCredentials(authScope, credentials);
}
}
System.setProperty("httpclient.useragent", userAgent);
final String urlStr = feedUrl.toString();
final HttpMethod method = new GetMethod(urlStr);
if (customRequestHeaders == null) {
method.addRequestHeader("Accept-Encoding", "gzip");
method.addRequestHeader("User-Agent", userAgent);
} else {
for (final Map.Entry<String, String> entry : customRequestHeaders.entrySet()) {
method.addRequestHeader(entry.getKey(), entry.getValue());
}
if (!customRequestHeaders.containsKey("Accept-Encoding")) {
method.addRequestHeader("Accept-Encoding", "gzip");
}
if (!customRequestHeaders.containsKey("User-Agent")) {
method.addRequestHeader("User-Agent", userAgent);
}
}
method.setFollowRedirects(true);
if (httpClientMethodCallback != null) {
synchronized (httpClientMethodCallback) {
httpClientMethodCallback.afterHttpClientMethodCreate(method);
}
}
final FeedFetcherCache cache = getFeedInfoCache();
if (cache != null) {
// retrieve feed
try {
if (isUsingDeltaEncoding()) {
method.setRequestHeader("A-IM", "feed");
}
// try to get the feed info from the cache
SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);
if (syndFeedInfo != null) {
method.setRequestHeader("If-None-Match", syndFeedInfo.getETag());
final Object lastModifiedHeader = syndFeedInfo.getLastModified();
if (lastModifiedHeader instanceof String) {
method.setRequestHeader("If-Modified-Since", (String) lastModifiedHeader);
}
}
final int statusCode = client.executeMethod(method);
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
handleErrorCodes(statusCode);
SyndFeed feed = getFeed(syndFeedInfo, urlStr, method, statusCode);
syndFeedInfo = buildSyndFeedInfo(feedUrl, urlStr, method, feed, statusCode);
cache.setFeedInfo(feedUrl, syndFeedInfo);
// the feed may have been modified to pick up cached values
// (eg - for delta encoding)
feed = syndFeedInfo.getSyndFeed();
return feed;
} finally {
method.releaseConnection();
}
} else {
// cache is not in use
try {
final int statusCode = client.executeMethod(method);
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
handleErrorCodes(statusCode);
return getFeed(null, urlStr, method, statusCode);
} finally {
method.releaseConnection();
}
}
}
private SyndFeed getFeed(final SyndFeedInfo syndFeedInfo, final String urlStr, final HttpMethod method, final int statusCode) throws IOException,
HttpException, FetcherException, FeedException {
if (statusCode == HttpURLConnection.HTTP_NOT_MODIFIED && syndFeedInfo != null) {
fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, urlStr);
return syndFeedInfo.getSyndFeed();
}
final SyndFeed feed = retrieveFeed(urlStr, method);
fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, urlStr, feed);
return feed;
}
private SyndFeedInfo buildSyndFeedInfo(final URL feedUrl, final String urlStr, final HttpMethod method, SyndFeed feed, final int statusCode)
throws MalformedURLException {
SyndFeedInfo syndFeedInfo;
syndFeedInfo = new SyndFeedInfo();
// this may be different to feedURL because of 3XX redirects
syndFeedInfo.setUrl(new URL(urlStr));
syndFeedInfo.setId(feedUrl.toString());
final Header imHeader = method.getResponseHeader("IM");
if (imHeader != null && imHeader.getValue().contains("feed") && isUsingDeltaEncoding()) {
final FeedFetcherCache cache = getFeedInfoCache();
if (cache != null && statusCode == 226) {
// client is setup to use http delta encoding and the server supports it and has
// returned a delta encoded response. This response only includes new items
final SyndFeedInfo cachedInfo = cache.getFeedInfo(feedUrl);
if (cachedInfo != null) {
final SyndFeed cachedFeed = cachedInfo.getSyndFeed();
// set the new feed to be the orginal feed plus the new items
feed = combineFeeds(cachedFeed, feed);
}
}
}
final Header lastModifiedHeader = method.getResponseHeader("Last-Modified");
if (lastModifiedHeader != null) {
syndFeedInfo.setLastModified(lastModifiedHeader.getValue());
}
final Header eTagHeader = method.getResponseHeader("ETag");
if (eTagHeader != null) {
syndFeedInfo.setETag(eTagHeader.getValue());
}
syndFeedInfo.setSyndFeed(feed);
return syndFeedInfo;
}
private SyndFeed retrieveFeed(final String urlStr, final HttpMethod method) throws IOException, HttpException, FetcherException, FeedException {
final Header contentEncodingHeader = method.getResponseHeader("Content-Encoding");
final InputStream stream;
if (contentEncodingHeader != null && "gzip".equalsIgnoreCase(contentEncodingHeader.getValue())) {
stream = new GZIPInputStream(method.getResponseBodyAsStream());
} else {
stream = method.getResponseBodyAsStream();
}
try {
final Header contentTypeHeader = method.getResponseHeader("Content-Type");
final XmlReader reader;
if (contentTypeHeader != null) {
reader = new XmlReader(stream, contentTypeHeader.getValue(), true);
} else {
reader = new XmlReader(stream, true);
}
final SyndFeedInput syndFeedInput = new SyndFeedInput();
syndFeedInput.setPreserveWireFeed(isPreserveWireFeed());
return syndFeedInput.build(reader);
} finally {
if (stream != null) {
stream.close();
}
}
}
public synchronized void setRetryHandler(final HttpMethodRetryHandler handler) {
httpClientParams.setParameter(HttpMethodParams.RETRY_HANDLER, handler); httpClientParams.setParameter(HttpMethodParams.RETRY_HANDLER, handler);
} }
@ -97,30 +311,18 @@ public class HttpClientFeedFetcher extends AbstractFeedFetcher {
return (int) getHttpClientParams().getConnectionManagerTimeout(); return (int) getHttpClientParams().getConnectionManagerTimeout();
} }
/**
* @param credentialSupplier The credentialSupplier to set.
*/
public synchronized void setCredentialSupplier(final CredentialSupplier credentialSupplier) { public synchronized void setCredentialSupplier(final CredentialSupplier credentialSupplier) {
this.credentialSupplier = credentialSupplier; this.credentialSupplier = credentialSupplier;
} }
/**
* @return Returns the credentialSupplier.
*/
public synchronized CredentialSupplier getCredentialSupplier() { public synchronized CredentialSupplier getCredentialSupplier() {
return credentialSupplier; return credentialSupplier;
} }
/**
* @param feedInfoCache the feedInfoCache to set
*/
public synchronized void setFeedInfoCache(final FeedFetcherCache feedInfoCache) { public synchronized void setFeedInfoCache(final FeedFetcherCache feedInfoCache) {
this.feedInfoCache = feedInfoCache; this.feedInfoCache = feedInfoCache;
} }
/**
* @return the feedInfoCache.
*/
public synchronized FeedFetcherCache getFeedInfoCache() { public synchronized FeedFetcherCache getFeedInfoCache() {
return feedInfoCache; return feedInfoCache;
} }
@ -133,16 +335,10 @@ public class HttpClientFeedFetcher extends AbstractFeedFetcher {
return httpClientMethodCallback; return httpClientMethodCallback;
} }
/**
* @param httpClientParams The httpClientParams to set.
*/
public synchronized void setHttpClientParams(final HttpClientParams httpClientParams) { public synchronized void setHttpClientParams(final HttpClientParams httpClientParams) {
this.httpClientParams = httpClientParams; this.httpClientParams = httpClientParams;
} }
/**
* @return Returns the httpClientParams.
*/
public synchronized HttpClientParams getHttpClientParams() { public synchronized HttpClientParams getHttpClientParams() {
return httpClientParams; return httpClientParams;
} }
@ -165,234 +361,19 @@ public class HttpClientFeedFetcher extends AbstractFeedFetcher {
/** /**
* Apply any request headers to the HTTP method call. * Apply any request headers to the HTTP method call.
*
* @param customRequestHeaders * @param customRequestHeaders
*/ */
public synchronized void setCustomRequestHeaders(final Map<String,String> customRequestHeaders) { public synchronized void setCustomRequestHeaders(final Map<String, String> customRequestHeaders) {
this.customRequestHeaders = customRequestHeaders; this.customRequestHeaders = customRequestHeaders;
} }
@Override
public SyndFeed retrieveFeed(final URL url) throws IllegalArgumentException, IOException, FeedException, FetcherException {
return this.retrieveFeed(getUserAgent(), url);
}
/**
* @see com.rometools.fetcher.FeedFetcher#retrieveFeed(java.net.URL)
*/
@Override
public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
if (feedUrl == null) {
throw new IllegalArgumentException("null is not a valid URL");
}
final HttpClient client = new HttpClient(httpClientParams);
if (getCredentialSupplier() != null) {
client.getParams().setAuthenticationPreemptive(true);
final String host = feedUrl.getHost();
final Credentials credentials = getCredentialSupplier().getCredentials(null, host);
if (credentials != null) {
final AuthScope authScope = new AuthScope(host, -1);
client.getState().setCredentials(authScope, credentials);
}
}
System.setProperty("httpclient.useragent", userAgent);
final String urlStr = feedUrl.toString();
final HttpMethod method = new GetMethod(urlStr);
if (customRequestHeaders == null) {
method.addRequestHeader("Accept-Encoding", "gzip");
method.addRequestHeader("User-Agent", userAgent);
} else {
for (final Map.Entry<String,String> entry : customRequestHeaders.entrySet()) {
method.addRequestHeader(entry.getKey(), entry.getValue());
}
if (!customRequestHeaders.containsKey("Accept-Encoding")) method.addRequestHeader("Accept-Encoding", "gzip");
if (!customRequestHeaders.containsKey("User-Agent")) method.addRequestHeader("User-Agent", userAgent);
}
method.setFollowRedirects(true);
if (httpClientMethodCallback != null) {
synchronized (httpClientMethodCallback) {
httpClientMethodCallback.afterHttpClientMethodCreate(method);
}
}
final FeedFetcherCache cache = getFeedInfoCache();
if (cache != null) {
// retrieve feed
try {
if (isUsingDeltaEncoding()) {
method.setRequestHeader("A-IM", "feed");
}
// get the feed info from the cache
// Note that syndFeedInfo will be null if it is not in the cache
SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);
if (syndFeedInfo != null) {
method.setRequestHeader("If-None-Match", syndFeedInfo.getETag());
if (syndFeedInfo.getLastModified() instanceof String) {
method.setRequestHeader("If-Modified-Since", (String) syndFeedInfo.getLastModified());
}
}
final int statusCode = client.executeMethod(method);
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
handleErrorCodes(statusCode);
SyndFeed feed = getFeed(syndFeedInfo, urlStr, method, statusCode);
syndFeedInfo = buildSyndFeedInfo(feedUrl, urlStr, method, feed, statusCode);
cache.setFeedInfo(new URL(urlStr), syndFeedInfo);
// the feed may have been modified to pick up cached values
// (eg - for delta encoding)
feed = syndFeedInfo.getSyndFeed();
return feed;
} finally {
method.releaseConnection();
}
} else {
// cache is not in use
try {
final int statusCode = client.executeMethod(method);
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
handleErrorCodes(statusCode);
return getFeed(null, urlStr, method, statusCode);
} finally {
method.releaseConnection();
}
}
}
private SyndFeed getFeed(final SyndFeedInfo syndFeedInfo, final String urlStr, final HttpMethod method, final int statusCode) throws IOException,
HttpException, FetcherException, FeedException {
if (statusCode == HttpURLConnection.HTTP_NOT_MODIFIED && syndFeedInfo != null) {
fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, urlStr);
return syndFeedInfo.getSyndFeed();
}
final SyndFeed feed = retrieveFeed(urlStr, method);
fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, urlStr, feed);
return feed;
}
/**
* @param feedUrl
* @param urlStr
* @param method
* @param feed
* @return
* @throws MalformedURLException
*/
private SyndFeedInfo buildSyndFeedInfo(final URL feedUrl, final String urlStr, final HttpMethod method, SyndFeed feed, final int statusCode)
throws MalformedURLException {
SyndFeedInfo syndFeedInfo;
syndFeedInfo = new SyndFeedInfo();
// this may be different to feedURL because of 3XX redirects
syndFeedInfo.setUrl(new URL(urlStr));
syndFeedInfo.setId(feedUrl.toString());
final Header imHeader = method.getResponseHeader("IM");
if (imHeader != null && imHeader.getValue().indexOf("feed") >= 0 && isUsingDeltaEncoding()) {
final FeedFetcherCache cache = getFeedInfoCache();
if (cache != null && statusCode == 226) {
// client is setup to use http delta encoding and the server supports it and has
// returned a delta encoded response
// This response only includes new items
final SyndFeedInfo cachedInfo = cache.getFeedInfo(feedUrl);
if (cachedInfo != null) {
final SyndFeed cachedFeed = cachedInfo.getSyndFeed();
// set the new feed to be the orginal feed plus the new items
feed = combineFeeds(cachedFeed, feed);
}
}
}
final Header lastModifiedHeader = method.getResponseHeader("Last-Modified");
if (lastModifiedHeader != null) {
syndFeedInfo.setLastModified(lastModifiedHeader.getValue());
}
final Header eTagHeader = method.getResponseHeader("ETag");
if (eTagHeader != null) {
syndFeedInfo.setETag(eTagHeader.getValue());
}
syndFeedInfo.setSyndFeed(feed);
return syndFeedInfo;
}
/**
* @param client
* @param urlStr
* @param method
* @return
* @throws IOException
* @throws HttpException
* @throws FetcherException
* @throws FeedException
*/
private SyndFeed retrieveFeed(final String urlStr, final HttpMethod method) throws IOException, HttpException, FetcherException, FeedException {
InputStream stream = null;
if (method.getResponseHeader("Content-Encoding") != null && "gzip".equalsIgnoreCase(method.getResponseHeader("Content-Encoding").getValue())) {
stream = new GZIPInputStream(method.getResponseBodyAsStream());
} else {
stream = method.getResponseBodyAsStream();
}
try {
XmlReader reader = null;
if (method.getResponseHeader("Content-Type") != null) {
reader = new XmlReader(stream, method.getResponseHeader("Content-Type").getValue(), true);
} else {
reader = new XmlReader(stream, true);
}
final SyndFeedInput syndFeedInput = new SyndFeedInput();
syndFeedInput.setPreserveWireFeed(isPreserveWireFeed());
return syndFeedInput.build(reader);
} finally {
if (stream != null) {
stream.close();
}
}
}
public interface CredentialSupplier { public interface CredentialSupplier {
public Credentials getCredentials(String realm, String host); public Credentials getCredentials(String realm, String host);
} }
public interface HttpClientMethodCallbackIntf { public interface HttpClientMethodCallbackIntf {
/** /**
* Allows access to the underlying HttpClient HttpMethod object. Note that in most cases, * Allows access to the underlying HttpClient HttpMethod object. Note that in most cases,
* method.setRequestHeader(String, String) is what you want to do (rather than * method.setRequestHeader(String, String) is what you want to do (rather than