Changed so I you can pass a per-request user-agent string.
This commit is contained in:
parent
9b5acb1585
commit
9a8c0fd5ac
4 changed files with 352 additions and 317 deletions
4
pom.xml
4
pom.xml
|
@ -3,9 +3,9 @@
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>org.rometools</groupId>
|
<groupId>org.rometools</groupId>
|
||||||
<artifactId>rome-fetcher</artifactId>
|
<artifactId>fetcher</artifactId>
|
||||||
|
|
||||||
<name>Rome HTTP Fetcher</name>
|
<name>rome-fetcher</name>
|
||||||
<version>1.0.1-SNAPSHOT</version>
|
<version>1.0.1-SNAPSHOT</version>
|
||||||
<inceptionYear>2004</inceptionYear>
|
<inceptionYear>2004</inceptionYear>
|
||||||
<organization>
|
<organization>
|
||||||
|
|
|
@ -16,46 +16,53 @@
|
||||||
*/
|
*/
|
||||||
package com.sun.syndication.fetcher;
|
package com.sun.syndication.fetcher;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.URL;
|
|
||||||
|
|
||||||
import com.sun.syndication.feed.synd.SyndFeed;
|
import com.sun.syndication.feed.synd.SyndFeed;
|
||||||
import com.sun.syndication.io.FeedException;
|
import com.sun.syndication.io.FeedException;
|
||||||
|
|
||||||
public interface FeedFetcher {
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
|
|
||||||
|
public interface FeedFetcher {
|
||||||
/**
|
/**
|
||||||
* <p>The default user agent. It is not marked final so
|
* <p>The default user agent. It is not marked final so
|
||||||
* buggy java compiler will not write this string
|
* buggy java compiler will not write this string
|
||||||
* into all classes that reference it.</p>
|
* into all classes that reference it.</p>
|
||||||
*
|
*
|
||||||
* <p>http://tinyurl.com/64t5n points to https://rome.dev.java.net/
|
* <p>http://tinyurl.com/64t5n points to https://rome.dev.java.net
|
||||||
* Some servers ban user agents with "Java" in the name.</p>
|
* Some servers ban user agents with "Java" in the name.</p>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public static String DEFAULT_USER_AGENT = "Rome Client (http://tinyurl.com/64t5n)";
|
public static String DEFAULT_USER_AGENT = "Rome Client (http://tinyurl.com/64t5n)";
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the User-Agent currently being sent to servers
|
|
||||||
*/
|
|
||||||
public abstract String getUserAgent();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param string The User-Agent to sent to servers
|
* @param string The User-Agent to sent to servers
|
||||||
*/
|
*/
|
||||||
public abstract void setUserAgent(String string);
|
public abstract void setUserAgent(String string);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve a feed over HTTP
|
* @return the User-Agent currently being sent to servers
|
||||||
*
|
|
||||||
* @param feedUrl A non-null URL of a RSS/Atom feed to retrieve
|
|
||||||
* @return A {@link com.sun.syndication.feed.synd.SyndFeed} object
|
|
||||||
* @throws IllegalArgumentException if the URL is null;
|
|
||||||
* @throws IOException if a TCP error occurs
|
|
||||||
* @throws FeedException if the feed is not valid
|
|
||||||
* @throws FetcherException if a HTTP error occurred
|
|
||||||
*/
|
*/
|
||||||
public abstract SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException;
|
public abstract String getUserAgent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Turn on or off rfc3229 delta encoding</p>
|
||||||
|
*
|
||||||
|
* <p>See http://www.ietf.org/rfc/rfc3229.txt and http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html</p>
|
||||||
|
*
|
||||||
|
* <p>NOTE: This is experimental and feedback is welcome!</p>
|
||||||
|
*
|
||||||
|
* @param useDeltaEncoding
|
||||||
|
*/
|
||||||
|
public abstract void setUsingDeltaEncoding(boolean useDeltaEncoding);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Is this fetcher using rfc3229 delta encoding?</p>
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public abstract boolean isUsingDeltaEncoding();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Add a FetcherListener.</p>
|
* <p>Add a FetcherListener.</p>
|
||||||
|
@ -75,22 +82,20 @@ public interface FeedFetcher {
|
||||||
public abstract void removeFetcherEventListener(FetcherListener listener);
|
public abstract void removeFetcherEventListener(FetcherListener listener);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Is this fetcher using rfc3229 delta encoding?</p>
|
* Retrieve a feed over HTTP
|
||||||
*
|
*
|
||||||
* @return
|
* @param feedUrl A non-null URL of a RSS/Atom feed to retrieve
|
||||||
|
* @return A {@link com.sun.syndication.feed.synd.SyndFeed} object
|
||||||
|
* @throws IllegalArgumentException if the URL is null;
|
||||||
|
* @throws IOException if a TCP error occurs
|
||||||
|
* @throws FeedException if the feed is not valid
|
||||||
|
* @throws FetcherException if a HTTP error occurred
|
||||||
*/
|
*/
|
||||||
public abstract boolean isUsingDeltaEncoding();
|
public abstract SyndFeed retrieveFeed(URL feedUrl)
|
||||||
|
throws IllegalArgumentException, IOException, FeedException, FetcherException;
|
||||||
|
|
||||||
/**
|
public SyndFeed retrieveFeed(String userAgent, URL url)
|
||||||
* <p>Turn on or off rfc3229 delta encoding</p>
|
throws IllegalArgumentException, IOException, FeedException, FetcherException;
|
||||||
*
|
|
||||||
* <p>See http://www.ietf.org/rfc/rfc3229.txt and http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html</p>
|
|
||||||
*
|
|
||||||
* <p>NOTE: This is experimental and feedback is welcome!</p>
|
|
||||||
*
|
|
||||||
* @param useDeltaEncoding
|
|
||||||
*/
|
|
||||||
public abstract void setUsingDeltaEncoding(boolean useDeltaEncoding);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If set to true, the WireFeed will be made accessible from the SyndFeed object returned from the Fetcher
|
* If set to true, the WireFeed will be made accessible from the SyndFeed object returned from the Fetcher
|
||||||
|
|
|
@ -14,15 +14,14 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.sun.syndication.fetcher.impl;
|
package com.sun.syndication.fetcher.impl;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.sun.syndication.feed.synd.SyndFeed;
|
||||||
import java.io.InputStream;
|
import com.sun.syndication.fetcher.FetcherEvent;
|
||||||
import java.net.HttpURLConnection;
|
import com.sun.syndication.fetcher.FetcherException;
|
||||||
import java.net.MalformedURLException;
|
import com.sun.syndication.io.FeedException;
|
||||||
import java.net.URL;
|
import com.sun.syndication.io.SyndFeedInput;
|
||||||
import java.util.zip.GZIPInputStream;
|
import com.sun.syndication.io.XmlReader;
|
||||||
|
|
||||||
import org.apache.commons.httpclient.Credentials;
|
import org.apache.commons.httpclient.Credentials;
|
||||||
import org.apache.commons.httpclient.Header;
|
import org.apache.commons.httpclient.Header;
|
||||||
|
@ -32,49 +31,98 @@ import org.apache.commons.httpclient.HttpMethod;
|
||||||
import org.apache.commons.httpclient.methods.GetMethod;
|
import org.apache.commons.httpclient.methods.GetMethod;
|
||||||
import org.apache.commons.httpclient.params.HttpClientParams;
|
import org.apache.commons.httpclient.params.HttpClientParams;
|
||||||
|
|
||||||
import com.sun.syndication.feed.synd.SyndFeed;
|
import java.io.IOException;
|
||||||
import com.sun.syndication.fetcher.FetcherEvent;
|
import java.io.InputStream;
|
||||||
import com.sun.syndication.fetcher.FetcherException;
|
|
||||||
import com.sun.syndication.io.FeedException;
|
import java.net.HttpURLConnection;
|
||||||
import com.sun.syndication.io.SyndFeedInput;
|
import java.net.MalformedURLException;
|
||||||
import com.sun.syndication.io.XmlReader;
|
import java.net.URL;
|
||||||
|
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author Nick Lothian
|
* @author Nick Lothian
|
||||||
*/
|
*/
|
||||||
public class HttpClientFeedFetcher extends AbstractFeedFetcher {
|
public class HttpClientFeedFetcher extends AbstractFeedFetcher {
|
||||||
|
|
||||||
private FeedFetcherCache feedInfoCache;
|
|
||||||
private CredentialSupplier credentialSupplier;
|
private CredentialSupplier credentialSupplier;
|
||||||
|
private FeedFetcherCache feedInfoCache;
|
||||||
private volatile HttpClientMethodCallbackIntf httpClientMethodCallback;
|
private volatile HttpClientMethodCallbackIntf httpClientMethodCallback;
|
||||||
private volatile HttpClientParams httpClientParams;
|
private volatile HttpClientParams httpClientParams;
|
||||||
|
|
||||||
public HttpClientFeedFetcher() {
|
public HttpClientFeedFetcher() {
|
||||||
super();
|
super();
|
||||||
setHttpClientParams(new HttpClientParams());
|
setHttpClientParams(new HttpClientParams());
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param cache
|
|
||||||
*/
|
|
||||||
public HttpClientFeedFetcher(FeedFetcherCache cache) {
|
|
||||||
this();
|
|
||||||
setFeedInfoCache(cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public HttpClientFeedFetcher(FeedFetcherCache cache, CredentialSupplier credentialSupplier) {
|
|
||||||
this(cache);
|
|
||||||
setCredentialSupplier(credentialSupplier);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return Returns the httpClientParams.
|
|
||||||
*/
|
|
||||||
public synchronized HttpClientParams getHttpClientParams() {
|
|
||||||
return this.httpClientParams;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param cache
|
||||||
|
*/
|
||||||
|
public HttpClientFeedFetcher(FeedFetcherCache cache) {
|
||||||
|
this();
|
||||||
|
setFeedInfoCache(cache);
|
||||||
|
}
|
||||||
|
|
||||||
|
public HttpClientFeedFetcher(FeedFetcherCache cache, CredentialSupplier credentialSupplier) {
|
||||||
|
this(cache);
|
||||||
|
setCredentialSupplier(credentialSupplier);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param timeout Sets the connect timeout for the HttpClient but using the URLConnection method name.
|
||||||
|
* Uses the HttpClientParams method setConnectionManagerTimeout instead of setConnectTimeout
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public synchronized void setConnectTimeout(int timeout) {
|
||||||
|
httpClientParams.setConnectionManagerTimeout(timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The currently used connect timeout for the HttpClient but using the URLConnection method name.
|
||||||
|
* Uses the HttpClientParams method getConnectionManagerTimeout instead of getConnectTimeout
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public int getConnectTimeout() {
|
||||||
|
return (int) this.getHttpClientParams()
|
||||||
|
.getConnectionManagerTimeout();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param credentialSupplier The credentialSupplier to set.
|
||||||
|
*/
|
||||||
|
public synchronized void setCredentialSupplier(CredentialSupplier credentialSupplier) {
|
||||||
|
this.credentialSupplier = credentialSupplier;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Returns the credentialSupplier.
|
||||||
|
*/
|
||||||
|
public synchronized CredentialSupplier getCredentialSupplier() {
|
||||||
|
return credentialSupplier;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param feedInfoCache the feedInfoCache to set
|
||||||
|
*/
|
||||||
|
public synchronized void setFeedInfoCache(FeedFetcherCache feedInfoCache) {
|
||||||
|
this.feedInfoCache = feedInfoCache;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the feedInfoCache.
|
||||||
|
*/
|
||||||
|
public synchronized FeedFetcherCache getFeedInfoCache() {
|
||||||
|
return feedInfoCache;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void setHttpClientMethodCallback(HttpClientMethodCallbackIntf httpClientMethodCallback) {
|
||||||
|
this.httpClientMethodCallback = httpClientMethodCallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HttpClientMethodCallbackIntf getHttpClientMethodCallback() {
|
||||||
|
return httpClientMethodCallback;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param httpClientParams The httpClientParams to set.
|
* @param httpClientParams The httpClientParams to set.
|
||||||
*/
|
*/
|
||||||
|
@ -83,172 +131,152 @@ public class HttpClientFeedFetcher extends AbstractFeedFetcher {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param timeout Sets the connect timeout for the HttpClient but using the URLConnection method name.
|
* @return Returns the httpClientParams.
|
||||||
* Uses the HttpClientParams method setConnectionManagerTimeout instead of setConnectTimeout
|
*/
|
||||||
*
|
public synchronized HttpClientParams getHttpClientParams() {
|
||||||
*/
|
return this.httpClientParams;
|
||||||
public synchronized void setConnectTimeout(int timeout) {
|
|
||||||
httpClientParams.setConnectionManagerTimeout(timeout);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return The currently used connect timeout for the HttpClient but using the URLConnection method name.
|
|
||||||
* Uses the HttpClientParams method getConnectionManagerTimeout instead of getConnectTimeout
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public int getConnectTimeout() {
|
|
||||||
return (int) this.getHttpClientParams().getConnectionManagerTimeout();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return The currently used read timeout for the URLConnection, 0 is unlimited, i.e. no timeout
|
|
||||||
*/
|
|
||||||
public synchronized void setReadTimeout(int timeout) {
|
|
||||||
httpClientParams.setSoTimeout(timeout);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param timeout Sets the read timeout for the URLConnection to a specified timeout, in milliseconds.
|
|
||||||
*/
|
|
||||||
public int getReadTimeout() {
|
|
||||||
return (int) this.getHttpClientParams().getSoTimeout();
|
|
||||||
}
|
|
||||||
|
|
||||||
public HttpClientMethodCallbackIntf getHttpClientMethodCallback() {
|
|
||||||
return httpClientMethodCallback;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void setHttpClientMethodCallback(HttpClientMethodCallbackIntf httpClientMethodCallback) {
|
|
||||||
this.httpClientMethodCallback = httpClientMethodCallback;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the feedInfoCache.
|
|
||||||
*/
|
|
||||||
public synchronized FeedFetcherCache getFeedInfoCache() {
|
|
||||||
return feedInfoCache;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param feedInfoCache the feedInfoCache to set
|
|
||||||
*/
|
|
||||||
public synchronized void setFeedInfoCache(FeedFetcherCache feedInfoCache) {
|
|
||||||
this.feedInfoCache = feedInfoCache;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return Returns the credentialSupplier.
|
|
||||||
*/
|
|
||||||
public synchronized CredentialSupplier getCredentialSupplier() {
|
|
||||||
return credentialSupplier;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* @param credentialSupplier The credentialSupplier to set.
|
|
||||||
*/
|
|
||||||
public synchronized void setCredentialSupplier(CredentialSupplier credentialSupplier) {
|
|
||||||
this.credentialSupplier = credentialSupplier;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see com.sun.syndication.fetcher.FeedFetcher#retrieveFeed(java.net.URL)
|
* @return The currently used read timeout for the URLConnection, 0 is unlimited, i.e. no timeout
|
||||||
*/
|
|
||||||
public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
|
|
||||||
if (feedUrl == null) {
|
|
||||||
throw new IllegalArgumentException("null is not a valid URL");
|
|
||||||
}
|
|
||||||
// TODO Fix this
|
|
||||||
//System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.SimpleLog");
|
|
||||||
HttpClient client = new HttpClient(httpClientParams);
|
|
||||||
|
|
||||||
if (getCredentialSupplier() != null) {
|
|
||||||
client.getState().setAuthenticationPreemptive(true);
|
|
||||||
// TODO what should realm be here?
|
|
||||||
Credentials credentials = getCredentialSupplier().getCredentials(null, feedUrl.getHost());
|
|
||||||
if (credentials != null) {
|
|
||||||
client.getState().setCredentials(null, feedUrl.getHost(), credentials);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
System.setProperty("httpclient.useragent", getUserAgent());
|
|
||||||
String urlStr = feedUrl.toString();
|
|
||||||
|
|
||||||
HttpMethod method = new GetMethod(urlStr);
|
|
||||||
method.addRequestHeader("Accept-Encoding", "gzip");
|
|
||||||
method.addRequestHeader("User-Agent", getUserAgent());
|
|
||||||
method.setFollowRedirects(true);
|
|
||||||
|
|
||||||
if (httpClientMethodCallback != null) {
|
|
||||||
synchronized (httpClientMethodCallback) {
|
|
||||||
httpClientMethodCallback.afterHttpClientMethodCreate(method);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
FeedFetcherCache cache = getFeedInfoCache();
|
|
||||||
if (cache != null) {
|
|
||||||
// retrieve feed
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (isUsingDeltaEncoding()) {
|
|
||||||
method.setRequestHeader("A-IM", "feed");
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the feed info from the cache
|
|
||||||
// Note that syndFeedInfo will be null if it is not in the cache
|
|
||||||
SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);
|
|
||||||
if (syndFeedInfo != null) {
|
|
||||||
method.setRequestHeader("If-None-Match", syndFeedInfo.getETag());
|
|
||||||
|
|
||||||
if (syndFeedInfo.getLastModified() instanceof String) {
|
|
||||||
method.setRequestHeader("If-Modified-Since", (String)syndFeedInfo.getLastModified());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int statusCode = client.executeMethod(method);
|
|
||||||
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
|
|
||||||
handleErrorCodes(statusCode);
|
|
||||||
|
|
||||||
SyndFeed feed = getFeed(syndFeedInfo, urlStr, method, statusCode);
|
|
||||||
|
|
||||||
syndFeedInfo = buildSyndFeedInfo(feedUrl, urlStr, method, feed, statusCode);
|
|
||||||
|
|
||||||
cache.setFeedInfo(new URL(urlStr), syndFeedInfo);
|
|
||||||
|
|
||||||
// the feed may have been modified to pick up cached values
|
|
||||||
// (eg - for delta encoding)
|
|
||||||
feed = syndFeedInfo.getSyndFeed();
|
|
||||||
|
|
||||||
return feed;
|
|
||||||
} finally {
|
|
||||||
method.releaseConnection();
|
|
||||||
method.recycle();
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// cache is not in use
|
|
||||||
try {
|
|
||||||
int statusCode = client.executeMethod(method);
|
|
||||||
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
|
|
||||||
handleErrorCodes(statusCode);
|
|
||||||
|
|
||||||
return getFeed(null, urlStr, method, statusCode);
|
|
||||||
} finally {
|
|
||||||
method.releaseConnection();
|
|
||||||
method.recycle();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param feedUrl
|
|
||||||
* @param urlStr
|
|
||||||
* @param method
|
|
||||||
* @param feed
|
|
||||||
* @return
|
|
||||||
* @throws MalformedURLException
|
|
||||||
*/
|
*/
|
||||||
private SyndFeedInfo buildSyndFeedInfo(URL feedUrl, String urlStr, HttpMethod method, SyndFeed feed, int statusCode) throws MalformedURLException {
|
public synchronized void setReadTimeout(int timeout) {
|
||||||
|
httpClientParams.setSoTimeout(timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param timeout Sets the read timeout for the URLConnection to a specified timeout, in milliseconds.
|
||||||
|
*/
|
||||||
|
public int getReadTimeout() {
|
||||||
|
return (int) this.getHttpClientParams()
|
||||||
|
.getSoTimeout();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SyndFeed retrieveFeed(URL url) throws IllegalArgumentException, IOException, FeedException, FetcherException {
|
||||||
|
return this.retrieveFeed(this.getUserAgent(), url);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see com.sun.syndication.fetcher.FeedFetcher#retrieveFeed(java.net.URL)
|
||||||
|
*/
|
||||||
|
public SyndFeed retrieveFeed(String userAgent, URL feedUrl)
|
||||||
|
throws IllegalArgumentException, IOException, FeedException, FetcherException {
|
||||||
|
if (feedUrl == null) {
|
||||||
|
throw new IllegalArgumentException("null is not a valid URL");
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO Fix this
|
||||||
|
//System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.SimpleLog");
|
||||||
|
HttpClient client = new HttpClient(httpClientParams);
|
||||||
|
|
||||||
|
if (getCredentialSupplier() != null) {
|
||||||
|
client.getState()
|
||||||
|
.setAuthenticationPreemptive(true);
|
||||||
|
|
||||||
|
// TODO what should realm be here?
|
||||||
|
Credentials credentials = getCredentialSupplier()
|
||||||
|
.getCredentials(null, feedUrl.getHost());
|
||||||
|
|
||||||
|
if (credentials != null) {
|
||||||
|
client.getState()
|
||||||
|
.setCredentials(null, feedUrl.getHost(), credentials);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.setProperty("httpclient.useragent", getUserAgent());
|
||||||
|
|
||||||
|
String urlStr = feedUrl.toString();
|
||||||
|
|
||||||
|
HttpMethod method = new GetMethod(urlStr);
|
||||||
|
method.addRequestHeader("Accept-Encoding", "gzip");
|
||||||
|
method.addRequestHeader("User-Agent", userAgent);
|
||||||
|
method.setFollowRedirects(true);
|
||||||
|
|
||||||
|
if (httpClientMethodCallback != null) {
|
||||||
|
synchronized (httpClientMethodCallback) {
|
||||||
|
httpClientMethodCallback.afterHttpClientMethodCreate(method);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FeedFetcherCache cache = getFeedInfoCache();
|
||||||
|
|
||||||
|
if (cache != null) {
|
||||||
|
// retrieve feed
|
||||||
|
try {
|
||||||
|
if (isUsingDeltaEncoding()) {
|
||||||
|
method.setRequestHeader("A-IM", "feed");
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the feed info from the cache
|
||||||
|
// Note that syndFeedInfo will be null if it is not in the cache
|
||||||
|
SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);
|
||||||
|
|
||||||
|
if (syndFeedInfo != null) {
|
||||||
|
method.setRequestHeader("If-None-Match", syndFeedInfo.getETag());
|
||||||
|
|
||||||
|
if (syndFeedInfo.getLastModified() instanceof String) {
|
||||||
|
method.setRequestHeader("If-Modified-Since", (String) syndFeedInfo.getLastModified());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int statusCode = client.executeMethod(method);
|
||||||
|
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
|
||||||
|
handleErrorCodes(statusCode);
|
||||||
|
|
||||||
|
SyndFeed feed = getFeed(syndFeedInfo, urlStr, method, statusCode);
|
||||||
|
|
||||||
|
syndFeedInfo = buildSyndFeedInfo(feedUrl, urlStr, method, feed, statusCode);
|
||||||
|
|
||||||
|
cache.setFeedInfo(new URL(urlStr), syndFeedInfo);
|
||||||
|
|
||||||
|
// the feed may have been modified to pick up cached values
|
||||||
|
// (eg - for delta encoding)
|
||||||
|
feed = syndFeedInfo.getSyndFeed();
|
||||||
|
|
||||||
|
return feed;
|
||||||
|
} finally {
|
||||||
|
method.releaseConnection();
|
||||||
|
method.recycle();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// cache is not in use
|
||||||
|
try {
|
||||||
|
int statusCode = client.executeMethod(method);
|
||||||
|
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
|
||||||
|
handleErrorCodes(statusCode);
|
||||||
|
|
||||||
|
return getFeed(null, urlStr, method, statusCode);
|
||||||
|
} finally {
|
||||||
|
method.releaseConnection();
|
||||||
|
method.recycle();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private SyndFeed getFeed(SyndFeedInfo syndFeedInfo, String urlStr, HttpMethod method, int statusCode)
|
||||||
|
throws IOException, HttpException, FetcherException, FeedException {
|
||||||
|
if ((statusCode == HttpURLConnection.HTTP_NOT_MODIFIED) && (syndFeedInfo != null)) {
|
||||||
|
fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, urlStr);
|
||||||
|
|
||||||
|
return syndFeedInfo.getSyndFeed();
|
||||||
|
}
|
||||||
|
|
||||||
|
SyndFeed feed = retrieveFeed(urlStr, method);
|
||||||
|
fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, urlStr, feed);
|
||||||
|
|
||||||
|
return feed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param feedUrl
|
||||||
|
* @param urlStr
|
||||||
|
* @param method
|
||||||
|
* @param feed
|
||||||
|
* @return
|
||||||
|
* @throws MalformedURLException
|
||||||
|
*/
|
||||||
|
private SyndFeedInfo buildSyndFeedInfo(URL feedUrl, String urlStr, HttpMethod method, SyndFeed feed, int statusCode)
|
||||||
|
throws MalformedURLException {
|
||||||
SyndFeedInfo syndFeedInfo;
|
SyndFeedInfo syndFeedInfo;
|
||||||
syndFeedInfo = new SyndFeedInfo();
|
syndFeedInfo = new SyndFeedInfo();
|
||||||
|
|
||||||
|
@ -257,27 +285,33 @@ public class HttpClientFeedFetcher extends AbstractFeedFetcher {
|
||||||
syndFeedInfo.setId(feedUrl.toString());
|
syndFeedInfo.setId(feedUrl.toString());
|
||||||
|
|
||||||
Header imHeader = method.getResponseHeader("IM");
|
Header imHeader = method.getResponseHeader("IM");
|
||||||
if (imHeader != null && imHeader.getValue().indexOf("feed") >= 0 && isUsingDeltaEncoding()) {
|
|
||||||
FeedFetcherCache cache = getFeedInfoCache();
|
|
||||||
if (cache != null && statusCode == 226) {
|
|
||||||
// client is setup to use http delta encoding and the server supports it and has returned a delta encoded response
|
|
||||||
// This response only includes new items
|
|
||||||
SyndFeedInfo cachedInfo = cache.getFeedInfo(feedUrl);
|
|
||||||
if (cachedInfo != null) {
|
|
||||||
SyndFeed cachedFeed = cachedInfo.getSyndFeed();
|
|
||||||
|
|
||||||
// set the new feed to be the orginal feed plus the new items
|
if ((imHeader != null) && (imHeader.getValue()
|
||||||
feed = combineFeeds(cachedFeed, feed);
|
.indexOf("feed") >= 0) && isUsingDeltaEncoding()) {
|
||||||
}
|
FeedFetcherCache cache = getFeedInfoCache();
|
||||||
}
|
|
||||||
}
|
if ((cache != null) && (statusCode == 226)) {
|
||||||
|
// client is setup to use http delta encoding and the server supports it and has returned a delta encoded response
|
||||||
|
// This response only includes new items
|
||||||
|
SyndFeedInfo cachedInfo = cache.getFeedInfo(feedUrl);
|
||||||
|
|
||||||
|
if (cachedInfo != null) {
|
||||||
|
SyndFeed cachedFeed = cachedInfo.getSyndFeed();
|
||||||
|
|
||||||
|
// set the new feed to be the orginal feed plus the new items
|
||||||
|
feed = combineFeeds(cachedFeed, feed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Header lastModifiedHeader = method.getResponseHeader("Last-Modified");
|
Header lastModifiedHeader = method.getResponseHeader("Last-Modified");
|
||||||
|
|
||||||
if (lastModifiedHeader != null) {
|
if (lastModifiedHeader != null) {
|
||||||
syndFeedInfo.setLastModified(lastModifiedHeader.getValue());
|
syndFeedInfo.setLastModified(lastModifiedHeader.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
Header eTagHeader = method.getResponseHeader("ETag");
|
Header eTagHeader = method.getResponseHeader("ETag");
|
||||||
|
|
||||||
if (eTagHeader != null) {
|
if (eTagHeader != null) {
|
||||||
syndFeedInfo.setETag(eTagHeader.getValue());
|
syndFeedInfo.setETag(eTagHeader.getValue());
|
||||||
}
|
}
|
||||||
|
@ -288,66 +322,58 @@ public class HttpClientFeedFetcher extends AbstractFeedFetcher {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param client
|
* @param client
|
||||||
* @param urlStr
|
* @param urlStr
|
||||||
* @param method
|
* @param method
|
||||||
* @return
|
* @return
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws HttpException
|
* @throws HttpException
|
||||||
* @throws FetcherException
|
* @throws FetcherException
|
||||||
* @throws FeedException
|
* @throws FeedException
|
||||||
*/
|
*/
|
||||||
private SyndFeed retrieveFeed(String urlStr, HttpMethod method) throws IOException, HttpException, FetcherException, FeedException {
|
private SyndFeed retrieveFeed(String urlStr, HttpMethod method)
|
||||||
|
throws IOException, HttpException, FetcherException, FeedException {
|
||||||
|
InputStream stream = null;
|
||||||
|
|
||||||
InputStream stream = null;
|
if ((method.getResponseHeader("Content-Encoding") != null) &&
|
||||||
if ((method.getResponseHeader("Content-Encoding") != null) && ("gzip".equalsIgnoreCase(method.getResponseHeader("Content-Encoding").getValue()))) {
|
("gzip".equalsIgnoreCase(method.getResponseHeader("Content-Encoding").getValue()))) {
|
||||||
stream = new GZIPInputStream(method.getResponseBodyAsStream());
|
stream = new GZIPInputStream(method.getResponseBodyAsStream());
|
||||||
} else {
|
} else {
|
||||||
stream = method.getResponseBodyAsStream();
|
stream = method.getResponseBodyAsStream();
|
||||||
}
|
}
|
||||||
try {
|
|
||||||
XmlReader reader = null;
|
|
||||||
if (method.getResponseHeader("Content-Type") != null) {
|
|
||||||
reader = new XmlReader(stream, method.getResponseHeader("Content-Type").getValue(), true);
|
|
||||||
} else {
|
|
||||||
reader = new XmlReader(stream, true);
|
|
||||||
}
|
|
||||||
SyndFeedInput syndFeedInput = new SyndFeedInput();
|
|
||||||
syndFeedInput.setPreserveWireFeed(isPreserveWireFeed());
|
|
||||||
|
|
||||||
return syndFeedInput.build(reader);
|
try {
|
||||||
} finally {
|
XmlReader reader = null;
|
||||||
if (stream != null) {
|
|
||||||
stream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private SyndFeed getFeed(SyndFeedInfo syndFeedInfo, String urlStr, HttpMethod method, int statusCode) throws IOException, HttpException, FetcherException, FeedException {
|
if (method.getResponseHeader("Content-Type") != null) {
|
||||||
|
reader = new XmlReader(stream, method.getResponseHeader("Content-Type").getValue(), true);
|
||||||
|
} else {
|
||||||
|
reader = new XmlReader(stream, true);
|
||||||
|
}
|
||||||
|
|
||||||
if (statusCode == HttpURLConnection.HTTP_NOT_MODIFIED && syndFeedInfo != null) {
|
SyndFeedInput syndFeedInput = new SyndFeedInput();
|
||||||
fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, urlStr);
|
syndFeedInput.setPreserveWireFeed(isPreserveWireFeed());
|
||||||
return syndFeedInfo.getSyndFeed();
|
|
||||||
}
|
|
||||||
|
|
||||||
SyndFeed feed = retrieveFeed(urlStr, method);
|
return syndFeedInput.build(reader);
|
||||||
fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, urlStr, feed);
|
} finally {
|
||||||
return feed;
|
if (stream != null) {
|
||||||
}
|
stream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public interface CredentialSupplier {
|
public interface CredentialSupplier {
|
||||||
public Credentials getCredentials(String realm, String host);
|
public Credentials getCredentials(String realm, String host);
|
||||||
}
|
}
|
||||||
|
|
||||||
public interface HttpClientMethodCallbackIntf {
|
public interface HttpClientMethodCallbackIntf {
|
||||||
/**
|
/**
|
||||||
* Allows access to the underlying HttpClient HttpMethod object.
|
* Allows access to the underlying HttpClient HttpMethod object.
|
||||||
* Note that in most cases, method.setRequestHeader(String, String)
|
* Note that in most cases, method.setRequestHeader(String, String)
|
||||||
* is what you want to do (rather than method.addRequestHeader(String, String))
|
* is what you want to do (rather than method.addRequestHeader(String, String))
|
||||||
*
|
*
|
||||||
* @param method
|
* @param method
|
||||||
*/
|
*/
|
||||||
public void afterHttpClientMethodCreate(HttpMethod method);
|
public void afterHttpClientMethodCreate(HttpMethod method);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,6 +83,10 @@ public class HttpURLFeedFetcher extends AbstractFeedFetcher {
|
||||||
setFeedInfoCache(feedInfoCache);
|
setFeedInfoCache(feedInfoCache);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
|
||||||
|
return this.retrieveFeed(this.getUserAgent(), feedUrl);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve a feed over HTTP
|
* Retrieve a feed over HTTP
|
||||||
*
|
*
|
||||||
|
@ -93,7 +97,7 @@ public class HttpURLFeedFetcher extends AbstractFeedFetcher {
|
||||||
* @throws FeedException if the feed is not valid
|
* @throws FeedException if the feed is not valid
|
||||||
* @throws FetcherException if a HTTP error occurred
|
* @throws FetcherException if a HTTP error occurred
|
||||||
*/
|
*/
|
||||||
public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
|
public SyndFeed retrieveFeed(String userAgent, URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
|
||||||
if (feedUrl == null) {
|
if (feedUrl == null) {
|
||||||
throw new IllegalArgumentException("null is not a valid URL");
|
throw new IllegalArgumentException("null is not a valid URL");
|
||||||
}
|
}
|
||||||
|
@ -140,6 +144,9 @@ public class HttpURLFeedFetcher extends AbstractFeedFetcher {
|
||||||
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
|
fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
|
||||||
InputStream inputStream = null;
|
InputStream inputStream = null;
|
||||||
setRequestHeaders(connection, null);
|
setRequestHeaders(connection, null);
|
||||||
|
|
||||||
|
connection.addRequestProperty("User-Agent", userAgent);
|
||||||
|
|
||||||
httpConnection.connect();
|
httpConnection.connect();
|
||||||
try {
|
try {
|
||||||
inputStream = httpConnection.getInputStream();
|
inputStream = httpConnection.getInputStream();
|
||||||
|
@ -238,9 +245,6 @@ public class HttpURLFeedFetcher extends AbstractFeedFetcher {
|
||||||
// header to retrieve feed gzipped
|
// header to retrieve feed gzipped
|
||||||
connection.setRequestProperty("Accept-Encoding", "gzip");
|
connection.setRequestProperty("Accept-Encoding", "gzip");
|
||||||
|
|
||||||
// set the user agent
|
|
||||||
connection.addRequestProperty("User-Agent", getUserAgent());
|
|
||||||
|
|
||||||
if (isUsingDeltaEncoding()) {
|
if (isUsingDeltaEncoding()) {
|
||||||
connection.addRequestProperty("A-IM", "feed");
|
connection.addRequestProperty("A-IM", "feed");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue