From 54b4e421a391e2421579467928c1eaabb06678c0 Mon Sep 17 00:00:00 2001 From: kebernet Date: Mon, 14 Mar 2011 23:52:57 +0000 Subject: [PATCH] Initial refactoring. --- pom.xml | 18 +- .../org/rometools/fetcher/FeedFetcher.java | 105 +++++ .../org/rometools/fetcher/FetcherEvent.java | 85 ++++ .../rometools/fetcher/FetcherException.java | 51 +++ .../rometools/fetcher/FetcherListener.java | 15 + .../fetcher/impl/AbstractFeedFetcher.java | 219 +++++++++ .../impl/AbstractFeedFetcherBeanInfo.java | 29 ++ .../fetcher/impl/DiskFeedInfoCache.java | 134 ++++++ .../fetcher/impl/FeedFetcherCache.java | 56 +++ .../fetcher/impl/HashMapFeedInfoCache.java | 125 ++++++ .../fetcher/impl/HttpClientFeedFetcher.java | 379 ++++++++++++++++ .../fetcher/impl/HttpURLFeedFetcher.java | 299 +++++++++++++ .../impl/LinkedHashMapFeedInfoCache.java | 70 +++ .../fetcher/impl/ResponseHandler.java | 57 +++ .../rometools/fetcher/impl/SyndFeedInfo.java | 149 +++++++ .../fetcher/samples/FeedAggregator.java | 92 ++++ .../rometools/fetcher/samples/FeedReader.java | 96 ++++ .../org/rometools/test/AbstractJettyTest.java | 421 ++++++++++++++++++ .../rometools/test/DiskFeedInfoCacheTest.java | 42 ++ .../rometools/test/FetcherTestServlet.java | 244 ++++++++++ .../test/HashMapFeedInfoCacheTest.java | 41 ++ .../test/HttpClientFeedFetcherTest.java | 60 +++ .../test/HttpURLFeedFetcherTest.java | 55 +++ .../rometools/test/ResponseHandlerTest.java | 42 ++ .../test/TestBasicAuthenticator.java | 37 ++ 25 files changed, 2913 insertions(+), 8 deletions(-) create mode 100644 src/java/org/rometools/fetcher/FeedFetcher.java create mode 100644 src/java/org/rometools/fetcher/FetcherEvent.java create mode 100644 src/java/org/rometools/fetcher/FetcherException.java create mode 100644 src/java/org/rometools/fetcher/FetcherListener.java create mode 100644 src/java/org/rometools/fetcher/impl/AbstractFeedFetcher.java create mode 100644 src/java/org/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java create mode 100644 src/java/org/rometools/fetcher/impl/DiskFeedInfoCache.java create mode 100644 src/java/org/rometools/fetcher/impl/FeedFetcherCache.java create mode 100644 src/java/org/rometools/fetcher/impl/HashMapFeedInfoCache.java create mode 100644 src/java/org/rometools/fetcher/impl/HttpClientFeedFetcher.java create mode 100644 src/java/org/rometools/fetcher/impl/HttpURLFeedFetcher.java create mode 100644 src/java/org/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java create mode 100644 src/java/org/rometools/fetcher/impl/ResponseHandler.java create mode 100644 src/java/org/rometools/fetcher/impl/SyndFeedInfo.java create mode 100644 src/java/org/rometools/fetcher/samples/FeedAggregator.java create mode 100644 src/java/org/rometools/fetcher/samples/FeedReader.java create mode 100644 src/test/org/rometools/test/AbstractJettyTest.java create mode 100644 src/test/org/rometools/test/DiskFeedInfoCacheTest.java create mode 100644 src/test/org/rometools/test/FetcherTestServlet.java create mode 100644 src/test/org/rometools/test/HashMapFeedInfoCacheTest.java create mode 100644 src/test/org/rometools/test/HttpClientFeedFetcherTest.java create mode 100644 src/test/org/rometools/test/HttpURLFeedFetcherTest.java create mode 100644 src/test/org/rometools/test/ResponseHandlerTest.java create mode 100644 src/test/org/rometools/test/TestBasicAuthenticator.java diff --git a/pom.xml b/pom.xml index 3e0bc27..9adc1a4 100644 --- a/pom.xml +++ b/pom.xml @@ -1,21 +1,23 @@ 4.0.0 - org.rometools - fetcher - + rome-fetcher rome-fetcher - 1.0.1-SNAPSHOT + 1.5-SNAPSHOT 2004 ROME Project http://rometools.jira.com - + + https://rometools.jira.com/secure/IssueNavigator.jspa + + + scm:svn:https://rometools.jira.com/svn/FETCHER/trunk + scm:svn:https://rometools.jira.com/svn/FETCHER/trunk + https://rometools.jira.com/source/browse/FETCHER + Apache 2 diff --git a/src/java/org/rometools/fetcher/FeedFetcher.java b/src/java/org/rometools/fetcher/FeedFetcher.java new file mode 100644 index 0000000..63db65d --- /dev/null +++ b/src/java/org/rometools/fetcher/FeedFetcher.java @@ -0,0 +1,105 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.fetcher; + +import com.sun.syndication.feed.synd.SyndFeed; +import com.sun.syndication.io.FeedException; + +import java.io.IOException; + +import java.net.URL; + + +public interface FeedFetcher { + /** + *

The default user agent. It is not marked final so + * buggy java compiler will not write this string + * into all classes that reference it.

+ * + *

http://tinyurl.com/64t5n points to https://rome.dev.java.net + * Some servers ban user agents with "Java" in the name.

+ * + */ + public static String DEFAULT_USER_AGENT = "Rome Client (http://tinyurl.com/64t5n)"; + + /** + * @param string The User-Agent to sent to servers + */ + public abstract void setUserAgent(String string); + + /** + * @return the User-Agent currently being sent to servers + */ + public abstract String getUserAgent(); + + /** + *

Turn on or off rfc3229 delta encoding

+ * + *

See http://www.ietf.org/rfc/rfc3229.txt and http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html

+ * + *

NOTE: This is experimental and feedback is welcome!

+ * + * @param useDeltaEncoding + */ + public abstract void setUsingDeltaEncoding(boolean useDeltaEncoding); + + /** + *

Is this fetcher using rfc3229 delta encoding?

+ * + * @return + */ + public abstract boolean isUsingDeltaEncoding(); + + /** + *

Add a FetcherListener.

+ * + *

The FetcherListener will receive an FetcherEvent when + * a Fetcher event (feed polled, retrieved, etc) occurs

+ * + * @param listener The FetcherListener to recieve the event + */ + public abstract void addFetcherEventListener(FetcherListener listener); + + /** + *

Remove a FetcherListener

+ * + * @param listener The FetcherListener to remove + */ + public abstract void removeFetcherEventListener(FetcherListener listener); + + /** + * Retrieve a feed over HTTP + * + * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve + * @return A {@link com.sun.syndication.feed.synd.SyndFeed} object + * @throws IllegalArgumentException if the URL is null; + * @throws IOException if a TCP error occurs + * @throws FeedException if the feed is not valid + * @throws FetcherException if a HTTP error occurred + */ + public abstract SyndFeed retrieveFeed(URL feedUrl) + throws IllegalArgumentException, IOException, FeedException, FetcherException; + + public SyndFeed retrieveFeed(String userAgent, URL url) + throws IllegalArgumentException, IOException, FeedException, FetcherException; + + /** + * If set to true, the WireFeed will be made accessible from the SyndFeed object returned from the Fetcher + * via the originalWireFeed() method. Each Entry in the feed will have the corresponding wireEntry property set. + */ + void setPreserveWireFeed(boolean preserveWireFeed); +} diff --git a/src/java/org/rometools/fetcher/FetcherEvent.java b/src/java/org/rometools/fetcher/FetcherEvent.java new file mode 100644 index 0000000..232af89 --- /dev/null +++ b/src/java/org/rometools/fetcher/FetcherEvent.java @@ -0,0 +1,85 @@ +package org.rometools.fetcher; + +import java.util.EventObject; + +import com.sun.syndication.feed.synd.SyndFeed; + +/** + * Implementation note: FetcherEvent is not thread safe. Make sure that + * they are only ever accessed by one thread. If necessary, make all getters + * and setters synchronized, or alternatively make all fields final. + * + * @author nl + */ +public class FetcherEvent extends EventObject { + + private static final long serialVersionUID = 3985600601904140103L; + + public static final String EVENT_TYPE_FEED_POLLED = "FEED_POLLED"; + public static final String EVENT_TYPE_FEED_RETRIEVED = "FEED_RETRIEVED"; + public static final String EVENT_TYPE_FEED_UNCHANGED = "FEED_UNCHANGED"; + + private String eventType; + private String urlString; + private SyndFeed feed; + + public FetcherEvent(Object source) { + super(source); + } + + + public FetcherEvent(Object source, String urlStr, String eventType) { + this(source); + setUrlString(urlStr); + setEventType(eventType); + } + + public FetcherEvent(Object source, String urlStr, String eventType, SyndFeed feed) { + this(source, urlStr, eventType); + setFeed(feed); + } + + + /** + * @return Returns the feed. + * + *

The feed will only be set if the eventType is EVENT_TYPE_FEED_RETRIEVED

+ */ + public SyndFeed getFeed() { + return feed; + } + + /** + * @param feed The feed to set. + * + *

The feed will only be set if the eventType is EVENT_TYPE_FEED_RETRIEVED

+ */ + public void setFeed(SyndFeed feed) { + this.feed = feed; + } + + /** + * @return Returns the eventType. + */ + public String getEventType() { + return eventType; + } + /** + * @param eventType The eventType to set. + */ + public void setEventType(String eventType) { + this.eventType = eventType; + } + /** + * @return Returns the urlString. + */ + public String getUrlString() { + return urlString; + } + /** + * @param urlString The urlString to set. + */ + public void setUrlString(String urlString) { + this.urlString = urlString; + } +} diff --git a/src/java/org/rometools/fetcher/FetcherException.java b/src/java/org/rometools/fetcher/FetcherException.java new file mode 100644 index 0000000..48575ec --- /dev/null +++ b/src/java/org/rometools/fetcher/FetcherException.java @@ -0,0 +1,51 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.fetcher; + +/** + * @author Nick Lothian + * + */ +public class FetcherException extends Exception { + private static final long serialVersionUID = -7479645796948092380L; + + int responseCode; + + public FetcherException(Throwable cause) { + super(); + initCause(cause); + } + + public FetcherException(String message, Throwable cause) { + super(message); + initCause(cause); + } + + public FetcherException(String message) { + super(message); + } + + public FetcherException(int responseCode, String message) { + this(message); + this.responseCode = responseCode; + } + + public int getResponseCode() { + return responseCode; + } + +} diff --git a/src/java/org/rometools/fetcher/FetcherListener.java b/src/java/org/rometools/fetcher/FetcherListener.java new file mode 100644 index 0000000..3424456 --- /dev/null +++ b/src/java/org/rometools/fetcher/FetcherListener.java @@ -0,0 +1,15 @@ +package org.rometools.fetcher; + +import java.util.EventListener; + + +public interface FetcherListener extends EventListener { + + /** + *

Called when a fetcher event occurs

+ * + * @param event the event that fired + */ + public void fetcherEvent(FetcherEvent event); + +} diff --git a/src/java/org/rometools/fetcher/impl/AbstractFeedFetcher.java b/src/java/org/rometools/fetcher/impl/AbstractFeedFetcher.java new file mode 100644 index 0000000..4b1823f --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/AbstractFeedFetcher.java @@ -0,0 +1,219 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.rometools.fetcher.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URLConnection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Properties; +import java.util.Set; + +import com.sun.syndication.feed.synd.SyndFeed; +import org.rometools.fetcher.FeedFetcher; +import org.rometools.fetcher.FetcherEvent; +import org.rometools.fetcher.FetcherException; +import org.rometools.fetcher.FetcherListener; + + +public abstract class AbstractFeedFetcher implements FeedFetcher { + private final Set fetcherEventListeners; + private String userAgent; + private boolean usingDeltaEncoding; + private boolean preserveWireFeed; + + + + public AbstractFeedFetcher() { + fetcherEventListeners = Collections.synchronizedSet(new HashSet()); + + Properties props = new Properties(System.getProperties()); + String resourceName = "fetcher.properties"; + + try { + InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream(resourceName); + if (inputStream == null) { + inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(resourceName); + } + if (inputStream != null) { + props.load(inputStream); + System.getProperties().putAll(props); + inputStream.close(); + } else { + System.err.println("Could not find " + resourceName + " on classpath"); + } + } catch (IOException e) { + // do nothing - we don't want to fail just because we could not find the version + System.err.println("Error reading " + resourceName + " from classpath: " + e.getMessage()); + } + + + setUserAgent(DEFAULT_USER_AGENT + " Ver: " + System.getProperty("rome.fetcher.version", "UNKNOWN")); + } + + /** + * @return the User-Agent currently being sent to servers + */ + public synchronized String getUserAgent() { + return userAgent; + } + + /** + * @param string The User-Agent to sent to servers + */ + public synchronized void setUserAgent(String string) { + userAgent = string; + } + + /** + * @param eventType The event type to fire + * @param connection the current connection + */ + protected void fireEvent(String eventType, URLConnection connection) { + fireEvent(eventType, connection.getURL().toExternalForm(), null); + } + + + /** + * @param eventType The event type to fire + * @param connection the current connection + * @param feed The feed to pass to the event + */ + protected void fireEvent(String eventType, URLConnection connection, SyndFeed feed) { + fireEvent(eventType, connection.getURL().toExternalForm(), feed); + } + + /** + * @param eventType The event type to fire + * @param urlStr the current url as a string + */ + protected void fireEvent(String eventType, String urlStr) { + fireEvent(eventType, urlStr, null); + } + + /** + * @param eventType The event type to fire + * @param urlStr the current url as a string + * @param feed The feed to pass to the event + */ + protected void fireEvent(String eventType, String urlStr, SyndFeed feed) { + FetcherEvent fetcherEvent = new FetcherEvent(this, urlStr, eventType, feed); + synchronized(fetcherEventListeners) { + Iterator iter = fetcherEventListeners.iterator(); + while ( iter.hasNext()) { + FetcherListener fetcherEventListener = (FetcherListener) iter.next(); + fetcherEventListener.fetcherEvent(fetcherEvent); + } + } + } + + /** + * @see com.sun.syndication.fetcher.FeedFetcher#addFetcherEventListener(com.sun.syndication.fetcher.FetcherListener) + */ + public void addFetcherEventListener(FetcherListener listener) { + if (listener != null) { + fetcherEventListeners.add(listener); + } + + } + + /** + * @see com.sun.syndication.fetcher.FeedFetcher#removeFetcherEventListener(com.sun.syndication.fetcher.FetcherListener) + */ + public void removeFetcherEventListener(FetcherListener listener) { + if (listener != null) { + fetcherEventListeners.remove(listener); + } + } + + /** + * @return Returns the useDeltaEncoding. + */ + public synchronized boolean isUsingDeltaEncoding() { + return usingDeltaEncoding; + } + /** + * @param useDeltaEncoding The useDeltaEncoding to set. + */ + public synchronized void setUsingDeltaEncoding(boolean useDeltaEncoding) { + this.usingDeltaEncoding = useDeltaEncoding; + } + + /** + *

Handles HTTP error codes.

+ * + * @param responseCode the HTTP response code + * @throws FetcherException if response code is in the range 400 to 599 inclusive + */ + protected void handleErrorCodes(int responseCode) throws FetcherException { + // Handle 2xx codes as OK, so ignore them here + // 3xx codes are handled by the HttpURLConnection class + if (responseCode == 403) { + // Authentication is required + throwAuthenticationError(responseCode); + } else if (responseCode >= 400 && responseCode < 500) { + throw4XXError(responseCode); + } else if (responseCode >= 500 && responseCode < 600) { + throw new FetcherException(responseCode, "The server encounted an error. HTTP Response code was:" + responseCode); + } + } + + protected void throw4XXError(int responseCode) throws FetcherException { + throw new FetcherException(responseCode, "The requested resource could not be found. HTTP Response code was:" + responseCode); + } + + protected void throwAuthenticationError(int responseCode) throws FetcherException { + throw new FetcherException(responseCode, "Authentication required for that resource. HTTP Response code was:" + responseCode); + } + + /** + *

Combine the entries in two feeds into a single feed.

+ * + *

The returned feed will have the same data as the newFeed parameter, with + * the entries from originalFeed appended to the end of its entries.

+ * + * @param originalFeed + * @param newFeed + * @return + */ + public static SyndFeed combineFeeds(SyndFeed originalFeed, SyndFeed newFeed) { + SyndFeed result; + try { + result = (SyndFeed) newFeed.clone(); + + result.getEntries().addAll(result.getEntries().size(), originalFeed.getEntries()); + + return result; + } catch (CloneNotSupportedException e) { + IllegalArgumentException iae = new IllegalArgumentException("Cannot clone feed"); + iae.initCause(e); + throw iae; + } + } + + public boolean isPreserveWireFeed() { + return preserveWireFeed; + } + + public void setPreserveWireFeed(boolean preserveWireFeed) { + this.preserveWireFeed = preserveWireFeed; + } + +} diff --git a/src/java/org/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java b/src/java/org/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java new file mode 100644 index 0000000..7342901 --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java @@ -0,0 +1,29 @@ +package org.rometools.fetcher.impl; + +import java.beans.EventSetDescriptor; +import java.beans.SimpleBeanInfo; +import java.lang.reflect.Method; + +import org.rometools.fetcher.FetcherEvent; +import org.rometools.fetcher.FetcherListener; + +public class AbstractFeedFetcherBeanInfo extends SimpleBeanInfo { + + public EventSetDescriptor[] getEventSetDescriptors() { + try { + Class clz = AbstractFeedFetcher.class; // get the class object which we'll describe + Method addMethod = clz.getMethod("addFetcherEventListener", new Class[] { FetcherListener.class }); + Method removeMethod = clz.getMethod("removeFetcherEventListener", new Class[] { FetcherListener.class }); + Method listenerMethod = FetcherListener.class.getMethod("fetcherEvent", new Class[] { FetcherEvent.class }); + + EventSetDescriptor est = new EventSetDescriptor("fetcherEvent", clz, new Method[] { listenerMethod }, addMethod, removeMethod); + EventSetDescriptor[] results = new EventSetDescriptor[] { est }; + + return results; + } catch (Exception e) { + // IntrospectionException, SecurityException and/or NoSuchMethodException can be thrown here + // the best we can do is to convert them to runtime exceptions + throw new RuntimeException(e); + } + } +} diff --git a/src/java/org/rometools/fetcher/impl/DiskFeedInfoCache.java b/src/java/org/rometools/fetcher/impl/DiskFeedInfoCache.java new file mode 100644 index 0000000..df788bc --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/DiskFeedInfoCache.java @@ -0,0 +1,134 @@ +/* + * Copyright 2005 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.rometools.fetcher.impl; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.net.URL; +import javax.swing.text.Utilities; + + +/** + * Disk based cache. + */ +public class DiskFeedInfoCache implements FeedFetcherCache { + + protected String cachePath = null; + public DiskFeedInfoCache(String cachePath) { + this.cachePath = cachePath; + } + public SyndFeedInfo getFeedInfo(URL url) { + SyndFeedInfo info = null; + String fileName = cachePath + File.separator + "feed_" + + replaceNonAlphanumeric(url.toString(),'_').trim(); + FileInputStream fis; + try { + fis = new FileInputStream(fileName); + ObjectInputStream ois = new ObjectInputStream(fis); + info = (SyndFeedInfo)ois.readObject(); + fis.close(); + } catch (FileNotFoundException fnfe) { + // That's OK, we'l return null + } catch (ClassNotFoundException cnfe) { + // Error writing to cache is fatal + throw new RuntimeException("Attempting to read from cache", cnfe); + } catch (IOException fnfe) { + // Error writing to cache is fatal + throw new RuntimeException("Attempting to read from cache", fnfe); + } + return info; + } + + public void setFeedInfo(URL url, SyndFeedInfo feedInfo) { + String fileName = cachePath + File.separator + "feed_" + + replaceNonAlphanumeric(url.toString(),'_').trim(); + FileOutputStream fos; + try { + fos = new FileOutputStream(fileName); + ObjectOutputStream oos = new ObjectOutputStream(fos); + oos.writeObject(feedInfo); + fos.flush(); + fos.close(); + } catch (Exception e) { + // Error writing to cache is fatal + throw new RuntimeException("Attempting to write to cache", e); + } + } + + public static String replaceNonAlphanumeric(String str, char subst) { + StringBuffer ret = new StringBuffer(str.length()); + char[] testChars = str.toCharArray(); + for (int i = 0; i < testChars.length; i++) { + if (Character.isLetterOrDigit(testChars[i])) { + ret.append(testChars[i]); + } else { + ret.append( subst ); + } + } + return ret.toString(); + } + + /** + * Clear the cache. + */ + public synchronized void clear() { + final File file = new File(this.cachePath); + //only do the delete if the directory exists + if( file.exists() && file.canWrite() ) { + //make the directory empty + final String[] files = file.list(); + final int len = files.length; + for( int i=0; iAn interface to allow caching of feed details. Implementing this allows the + * {@link com.sun.syndication.fetcher.io.HttpURLFeedFetcher} class to + * enable conditional gets

+ * + * @author Nick Lothian + * + */ +public interface FeedFetcherCache { + /** + * Get a SyndFeedInfo object from the cache. + * + * @param feedUrl The url of the feed + * @return A SyndFeedInfo or null if it is not in the cache + */ + public SyndFeedInfo getFeedInfo(URL feedUrl); + + /** + * Add a SyndFeedInfo object to the cache + * + * @param feedUrl The url of the feed + * @param syndFeedInfo A SyndFeedInfo for the feed + */ + public void setFeedInfo(URL feedUrl, SyndFeedInfo syndFeedInfo); + + /** + * Removes all items from the cache. + */ + public void clear(); + + /** + * Removes the SyndFeedInfo identified by the url from the cache. + * @return The removed SyndFeedInfo + */ + public SyndFeedInfo remove( URL feedUrl ); +} diff --git a/src/java/org/rometools/fetcher/impl/HashMapFeedInfoCache.java b/src/java/org/rometools/fetcher/impl/HashMapFeedInfoCache.java new file mode 100644 index 0000000..1d8f93f --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/HashMapFeedInfoCache.java @@ -0,0 +1,125 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.fetcher.impl; + +import java.io.Serializable; +import java.net.URL; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + + +/** + *

A very simple implementation of the {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} interface.

+ * + *

This implementation uses a HashMap to cache retrieved feeds. This implementation is + * most suitible for sort term (client aggregator?) use, as the memory usage will increase + * over time as the number of feeds in the cache increases.

+ * + * @author Nick Lothian + * + */ +public class HashMapFeedInfoCache implements FeedFetcherCache, Serializable { + private static final long serialVersionUID = -1594665619950916222L; + + static HashMapFeedInfoCache _instance; + + private Map infoCache; + + /** + *

Constructor for HashMapFeedInfoCache

+ * + *

Only use this if you want multiple instances of the cache. + * Usually getInstance() is more appropriate.

+ * + */ + public HashMapFeedInfoCache() { + setInfoCache(createInfoCache()); + } + + /** + * Get the global instance of the cache + * @return an implementation of FeedFetcherCache + */ + public static synchronized FeedFetcherCache getInstance() { + if (_instance == null) { + _instance = new HashMapFeedInfoCache(); + } + return _instance; + } + + protected Map createInfoCache() { + return (Collections.synchronizedMap(new HashMap())); + } + + + protected Object get(Object key) { + return getInfoCache().get(key); + } + + /** + * @see extensions.io.FeedFetcherCache#getFeedInfo(java.net.URL) + */ + public SyndFeedInfo getFeedInfo(URL feedUrl) { + return (SyndFeedInfo) get(feedUrl.toString()); + } + + protected void put(Object key, Object value) { + getInfoCache().put(key, value); + } + + /** + * @see extensions.io.FeedFetcherCache#setFeedInfo(java.net.URL, extensions.io.SyndFeedInfo) + */ + public void setFeedInfo(URL feedUrl, SyndFeedInfo syndFeedInfo) { + put(feedUrl.toString(), syndFeedInfo); + } + + protected synchronized final Map getInfoCache() { + return infoCache; + } + + /** + * The API of this class indicates that map must thread safe. In other + * words, be sure to wrap it in a synchronized map unless you know + * what you are doing. + * + * @param map the map to use as the info cache. + */ + protected synchronized final void setInfoCache(Map map) { + infoCache = map; + } + + /** + * @see com.sun.syndication.fetcher.impl.FeedFetcherCache#clear() + */ + public void clear() { + synchronized( infoCache ) { + infoCache.clear(); + } + } + + /** + * @see com.sun.syndication.fetcher.impl.FeedFetcherCache#remove(java.net.URL) + */ + public SyndFeedInfo remove( final URL url ) { + if( url == null ) return null; + + return (SyndFeedInfo) infoCache.remove( url.toString() ); + } + +} diff --git a/src/java/org/rometools/fetcher/impl/HttpClientFeedFetcher.java b/src/java/org/rometools/fetcher/impl/HttpClientFeedFetcher.java new file mode 100644 index 0000000..e47003c --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/HttpClientFeedFetcher.java @@ -0,0 +1,379 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.fetcher.impl; + +import com.sun.syndication.feed.synd.SyndFeed; +import org.rometools.fetcher.FetcherEvent; +import org.rometools.fetcher.FetcherException; +import com.sun.syndication.io.FeedException; +import com.sun.syndication.io.SyndFeedInput; +import com.sun.syndication.io.XmlReader; + +import org.apache.commons.httpclient.Credentials; +import org.apache.commons.httpclient.Header; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.params.HttpClientParams; + +import java.io.IOException; +import java.io.InputStream; + +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; + +import java.util.zip.GZIPInputStream; + + +/** + * @author Nick Lothian + */ +public class HttpClientFeedFetcher extends AbstractFeedFetcher { + private CredentialSupplier credentialSupplier; + private FeedFetcherCache feedInfoCache; + private volatile HttpClientMethodCallbackIntf httpClientMethodCallback; + private volatile HttpClientParams httpClientParams; + + public HttpClientFeedFetcher() { + super(); + setHttpClientParams(new HttpClientParams()); + } + + /** + * @param cache + */ + public HttpClientFeedFetcher(FeedFetcherCache cache) { + this(); + setFeedInfoCache(cache); + } + + public HttpClientFeedFetcher(FeedFetcherCache cache, CredentialSupplier credentialSupplier) { + this(cache); + setCredentialSupplier(credentialSupplier); + } + + /** + * @param timeout Sets the connect timeout for the HttpClient but using the URLConnection method name. + * Uses the HttpClientParams method setConnectionManagerTimeout instead of setConnectTimeout + * + */ + public synchronized void setConnectTimeout(int timeout) { + httpClientParams.setConnectionManagerTimeout(timeout); + } + + /** + * @return The currently used connect timeout for the HttpClient but using the URLConnection method name. + * Uses the HttpClientParams method getConnectionManagerTimeout instead of getConnectTimeout + * + */ + public int getConnectTimeout() { + return (int) this.getHttpClientParams() + .getConnectionManagerTimeout(); + } + + /** + * @param credentialSupplier The credentialSupplier to set. + */ + public synchronized void setCredentialSupplier(CredentialSupplier credentialSupplier) { + this.credentialSupplier = credentialSupplier; + } + + /** + * @return Returns the credentialSupplier. + */ + public synchronized CredentialSupplier getCredentialSupplier() { + return credentialSupplier; + } + + /** + * @param feedInfoCache the feedInfoCache to set + */ + public synchronized void setFeedInfoCache(FeedFetcherCache feedInfoCache) { + this.feedInfoCache = feedInfoCache; + } + + /** + * @return the feedInfoCache. + */ + public synchronized FeedFetcherCache getFeedInfoCache() { + return feedInfoCache; + } + + public synchronized void setHttpClientMethodCallback(HttpClientMethodCallbackIntf httpClientMethodCallback) { + this.httpClientMethodCallback = httpClientMethodCallback; + } + + public HttpClientMethodCallbackIntf getHttpClientMethodCallback() { + return httpClientMethodCallback; + } + + /** + * @param httpClientParams The httpClientParams to set. + */ + public synchronized void setHttpClientParams(HttpClientParams httpClientParams) { + this.httpClientParams = httpClientParams; + } + + /** + * @return Returns the httpClientParams. + */ + public synchronized HttpClientParams getHttpClientParams() { + return this.httpClientParams; + } + + /** + * @return The currently used read timeout for the URLConnection, 0 is unlimited, i.e. no timeout + */ + public synchronized void setReadTimeout(int timeout) { + httpClientParams.setSoTimeout(timeout); + } + + /** + * @param timeout Sets the read timeout for the URLConnection to a specified timeout, in milliseconds. + */ + public int getReadTimeout() { + return (int) this.getHttpClientParams() + .getSoTimeout(); + } + + public SyndFeed retrieveFeed(URL url) throws IllegalArgumentException, IOException, FeedException, FetcherException { + return this.retrieveFeed(this.getUserAgent(), url); + } + + /** + * @see com.sun.syndication.fetcher.FeedFetcher#retrieveFeed(java.net.URL) + */ + public SyndFeed retrieveFeed(String userAgent, URL feedUrl) + throws IllegalArgumentException, IOException, FeedException, FetcherException { + if (feedUrl == null) { + throw new IllegalArgumentException("null is not a valid URL"); + } + + // TODO Fix this + //System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.SimpleLog"); + HttpClient client = new HttpClient(httpClientParams); + + if (getCredentialSupplier() != null) { + client.getState() + .setAuthenticationPreemptive(true); + + // TODO what should realm be here? + Credentials credentials = getCredentialSupplier() + .getCredentials(null, feedUrl.getHost()); + + if (credentials != null) { + client.getState() + .setCredentials(null, feedUrl.getHost(), credentials); + } + } + + System.setProperty("httpclient.useragent", userAgent); + + String urlStr = feedUrl.toString(); + + HttpMethod method = new GetMethod(urlStr); + method.addRequestHeader("Accept-Encoding", "gzip"); + method.addRequestHeader("User-Agent", userAgent); + method.setFollowRedirects(true); + + if (httpClientMethodCallback != null) { + synchronized (httpClientMethodCallback) { + httpClientMethodCallback.afterHttpClientMethodCreate(method); + } + } + + FeedFetcherCache cache = getFeedInfoCache(); + + if (cache != null) { + // retrieve feed + try { + if (isUsingDeltaEncoding()) { + method.setRequestHeader("A-IM", "feed"); + } + + // get the feed info from the cache + // Note that syndFeedInfo will be null if it is not in the cache + SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl); + + if (syndFeedInfo != null) { + method.setRequestHeader("If-None-Match", syndFeedInfo.getETag()); + + if (syndFeedInfo.getLastModified() instanceof String) { + method.setRequestHeader("If-Modified-Since", (String) syndFeedInfo.getLastModified()); + } + } + + int statusCode = client.executeMethod(method); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr); + handleErrorCodes(statusCode); + + SyndFeed feed = getFeed(syndFeedInfo, urlStr, method, statusCode); + + syndFeedInfo = buildSyndFeedInfo(feedUrl, urlStr, method, feed, statusCode); + + cache.setFeedInfo(new URL(urlStr), syndFeedInfo); + + // the feed may have been modified to pick up cached values + // (eg - for delta encoding) + feed = syndFeedInfo.getSyndFeed(); + + return feed; + } finally { + method.releaseConnection(); + method.recycle(); + } + } else { + // cache is not in use + try { + int statusCode = client.executeMethod(method); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr); + handleErrorCodes(statusCode); + + return getFeed(null, urlStr, method, statusCode); + } finally { + method.releaseConnection(); + method.recycle(); + } + } + } + + private SyndFeed getFeed(SyndFeedInfo syndFeedInfo, String urlStr, HttpMethod method, int statusCode) + throws IOException, HttpException, FetcherException, FeedException { + if ((statusCode == HttpURLConnection.HTTP_NOT_MODIFIED) && (syndFeedInfo != null)) { + fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, urlStr); + + return syndFeedInfo.getSyndFeed(); + } + + SyndFeed feed = retrieveFeed(urlStr, method); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, urlStr, feed); + + return feed; + } + + /** + * @param feedUrl + * @param urlStr + * @param method + * @param feed + * @return + * @throws MalformedURLException + */ + private SyndFeedInfo buildSyndFeedInfo(URL feedUrl, String urlStr, HttpMethod method, SyndFeed feed, int statusCode) + throws MalformedURLException { + SyndFeedInfo syndFeedInfo; + syndFeedInfo = new SyndFeedInfo(); + + // this may be different to feedURL because of 3XX redirects + syndFeedInfo.setUrl(new URL(urlStr)); + syndFeedInfo.setId(feedUrl.toString()); + + Header imHeader = method.getResponseHeader("IM"); + + if ((imHeader != null) && (imHeader.getValue() + .indexOf("feed") >= 0) && isUsingDeltaEncoding()) { + FeedFetcherCache cache = getFeedInfoCache(); + + if ((cache != null) && (statusCode == 226)) { + // client is setup to use http delta encoding and the server supports it and has returned a delta encoded response + // This response only includes new items + SyndFeedInfo cachedInfo = cache.getFeedInfo(feedUrl); + + if (cachedInfo != null) { + SyndFeed cachedFeed = cachedInfo.getSyndFeed(); + + // set the new feed to be the orginal feed plus the new items + feed = combineFeeds(cachedFeed, feed); + } + } + } + + Header lastModifiedHeader = method.getResponseHeader("Last-Modified"); + + if (lastModifiedHeader != null) { + syndFeedInfo.setLastModified(lastModifiedHeader.getValue()); + } + + Header eTagHeader = method.getResponseHeader("ETag"); + + if (eTagHeader != null) { + syndFeedInfo.setETag(eTagHeader.getValue()); + } + + syndFeedInfo.setSyndFeed(feed); + + return syndFeedInfo; + } + + /** + * @param client + * @param urlStr + * @param method + * @return + * @throws IOException + * @throws HttpException + * @throws FetcherException + * @throws FeedException + */ + private SyndFeed retrieveFeed(String urlStr, HttpMethod method) + throws IOException, HttpException, FetcherException, FeedException { + InputStream stream = null; + + if ((method.getResponseHeader("Content-Encoding") != null) && + ("gzip".equalsIgnoreCase(method.getResponseHeader("Content-Encoding").getValue()))) { + stream = new GZIPInputStream(method.getResponseBodyAsStream()); + } else { + stream = method.getResponseBodyAsStream(); + } + + try { + XmlReader reader = null; + + if (method.getResponseHeader("Content-Type") != null) { + reader = new XmlReader(stream, method.getResponseHeader("Content-Type").getValue(), true); + } else { + reader = new XmlReader(stream, true); + } + + SyndFeedInput syndFeedInput = new SyndFeedInput(); + syndFeedInput.setPreserveWireFeed(isPreserveWireFeed()); + + return syndFeedInput.build(reader); + } finally { + if (stream != null) { + stream.close(); + } + } + } + + public interface CredentialSupplier { + public Credentials getCredentials(String realm, String host); + } + + public interface HttpClientMethodCallbackIntf { + /** + * Allows access to the underlying HttpClient HttpMethod object. + * Note that in most cases, method.setRequestHeader(String, String) + * is what you want to do (rather than method.addRequestHeader(String, String)) + * + * @param method + */ + public void afterHttpClientMethodCreate(HttpMethod method); + } +} diff --git a/src/java/org/rometools/fetcher/impl/HttpURLFeedFetcher.java b/src/java/org/rometools/fetcher/impl/HttpURLFeedFetcher.java new file mode 100644 index 0000000..67981b1 --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/HttpURLFeedFetcher.java @@ -0,0 +1,299 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.fetcher.impl; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; +import java.util.zip.GZIPInputStream; + +import com.sun.syndication.feed.synd.SyndFeed; +import org.rometools.fetcher.FetcherEvent; +import org.rometools.fetcher.FetcherException; +import com.sun.syndication.io.FeedException; +import com.sun.syndication.io.SyndFeedInput; +import com.sun.syndication.io.XmlReader; + +/** + *

Class to retrieve syndication files via HTTP.

+ * + *

If passed a {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} in the + * constructor it will use conditional gets to only retrieve modified content.

+ * + *

The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where + * supported by the server.

+ * + *

Simple usage: + *

+ * 	// create the cache
+ *	FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
+ *	// retrieve the feed the first time
+ *	// any subsequent request will use conditional gets and only
+ *	// retrieve the resource if it has changed
+ *	SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
+ *
+ * + *

+ * + * @see http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers + * @see http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level + * @see http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html + * @author Nick Lothian + */ +public class HttpURLFeedFetcher extends AbstractFeedFetcher { + static final int POLL_EVENT = 1; + static final int RETRIEVE_EVENT = 2; + static final int UNCHANGED_EVENT = 3; + + private FeedFetcherCache feedInfoCache; + + + /** + * Constructor to use HttpURLFeedFetcher without caching of feeds + * + */ + public HttpURLFeedFetcher() { + super(); + } + + /** + * Constructor to enable HttpURLFeedFetcher to cache feeds + * + * @param feedCache - an instance of the FeedFetcherCache interface + */ + public HttpURLFeedFetcher(FeedFetcherCache feedInfoCache) { + this(); + setFeedInfoCache(feedInfoCache); + } + + public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { + return this.retrieveFeed(this.getUserAgent(), feedUrl); + } + + /** + * Retrieve a feed over HTTP + * + * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve + * @return A {@link com.sun.syndication.feed.synd.SyndFeed} object + * @throws IllegalArgumentException if the URL is null; + * @throws IOException if a TCP error occurs + * @throws FeedException if the feed is not valid + * @throws FetcherException if a HTTP error occurred + */ + public SyndFeed retrieveFeed(String userAgent, URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { + if (feedUrl == null) { + throw new IllegalArgumentException("null is not a valid URL"); + } + + URLConnection connection = feedUrl.openConnection(); + if (!(connection instanceof HttpURLConnection)) { + throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url"); + } + HttpURLConnection httpConnection = (HttpURLConnection)connection; + // httpConnection.setInstanceFollowRedirects(true); // this is true by default, but can be changed on a claswide basis + + FeedFetcherCache cache = getFeedInfoCache(); + if (cache != null) { + SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl); + setRequestHeaders(connection, syndFeedInfo); + httpConnection.connect(); + try { + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection); + + if (syndFeedInfo == null) { + // this is a feed that hasn't been retrieved + syndFeedInfo = new SyndFeedInfo(); + retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection); + } else { + // check the response code + int responseCode = httpConnection.getResponseCode(); + if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) { + // the response code is not 304 NOT MODIFIED + // This is either because the feed server + // does not support condition gets + // or because the feed hasn't changed + retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection); + } else { + // the feed does not need retrieving + fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection); + } + } + + return syndFeedInfo.getSyndFeed(); + } finally { + httpConnection.disconnect(); + } + } else { + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection); + InputStream inputStream = null; + setRequestHeaders(connection, null); + + connection.addRequestProperty("User-Agent", userAgent); + + httpConnection.connect(); + try { + inputStream = httpConnection.getInputStream(); + return getSyndFeedFromStream(inputStream, connection); + } catch (java.io.IOException e) { + handleErrorCodes(((HttpURLConnection)connection).getResponseCode()); + } finally { + if (inputStream != null) { + inputStream.close(); + } + httpConnection.disconnect(); + } + // we will never actually get to this line + return null; + } + } + + protected void retrieveAndCacheFeed(URL feedUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, FeedException, FetcherException, IOException { + handleErrorCodes(connection.getResponseCode()); + + resetFeedInfo(feedUrl, syndFeedInfo, connection); + FeedFetcherCache cache = getFeedInfoCache(); + // resetting feed info in the cache + // could be needed for some implementations + // of FeedFetcherCache (eg, distributed HashTables) + if (cache != null) { + cache.setFeedInfo(feedUrl, syndFeedInfo); + } + } + + protected void resetFeedInfo(URL orignalUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, IOException, FeedException { + // need to always set the URL because this may have changed due to 3xx redirects + syndFeedInfo.setUrl(connection.getURL()); + + // the ID is a persistant value that should stay the same even if the URL for the + // feed changes (eg, by 3xx redirects) + syndFeedInfo.setId(orignalUrl.toString()); + + // This will be 0 if the server doesn't support or isn't setting the last modified header + syndFeedInfo.setLastModified(new Long(connection.getLastModified())); + + // This will be null if the server doesn't support or isn't setting the ETag header + syndFeedInfo.setETag(connection.getHeaderField("ETag")); + + // get the contents + InputStream inputStream = null; + try { + inputStream = connection.getInputStream(); + SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection); + + String imHeader = connection.getHeaderField("IM"); + if (isUsingDeltaEncoding() && (imHeader!= null && imHeader.indexOf("feed") >= 0)) { + FeedFetcherCache cache = getFeedInfoCache(); + if (cache != null && connection.getResponseCode() == 226) { + // client is setup to use http delta encoding and the server supports it and has returned a delta encoded response + // This response only includes new items + SyndFeedInfo cachedInfo = cache.getFeedInfo(orignalUrl); + if (cachedInfo != null) { + SyndFeed cachedFeed = cachedInfo.getSyndFeed(); + + // set the new feed to be the orginal feed plus the new items + syndFeed = combineFeeds(cachedFeed, syndFeed); + } + } + } + + syndFeedInfo.setSyndFeed(syndFeed); + } finally { + if (inputStream != null) { + inputStream.close(); + } + } + } + + /** + *

Set appropriate HTTP headers, including conditional get and gzip encoding headers

+ * + * @param connection A URLConnection + * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null + */ + protected void setRequestHeaders(URLConnection connection, SyndFeedInfo syndFeedInfo) { + if (syndFeedInfo != null) { + // set the headers to get feed only if modified + // we support the use of both last modified and eTag headers + if (syndFeedInfo.getLastModified() != null) { + Object lastModified = syndFeedInfo.getLastModified(); + if (lastModified instanceof Long) { + connection.setIfModifiedSince(((Long)syndFeedInfo.getLastModified()).longValue()); + } + } + if (syndFeedInfo.getETag() != null) { + connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag()); + } + + } + // header to retrieve feed gzipped + connection.setRequestProperty("Accept-Encoding", "gzip"); + + if (isUsingDeltaEncoding()) { + connection.addRequestProperty("A-IM", "feed"); + } + } + + private SyndFeed readSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException { + BufferedInputStream is; + if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) { + // handle gzip encoded content + is = new BufferedInputStream(new GZIPInputStream(inputStream)); + } else { + is = new BufferedInputStream(inputStream); + } + + //InputStreamReader reader = new InputStreamReader(is, ResponseHandler.getCharacterEncoding(connection)); + + //SyndFeedInput input = new SyndFeedInput(); + + XmlReader reader = null; + if (connection.getHeaderField("Content-Type") != null) { + reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true); + } else { + reader = new XmlReader(is, true); + } + + SyndFeedInput syndFeedInput = new SyndFeedInput(); + syndFeedInput.setPreserveWireFeed(isPreserveWireFeed()); + + return syndFeedInput.build(reader); + + } + + private SyndFeed getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException { + SyndFeed feed = readSyndFeedFromStream(inputStream, connection); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed); + return feed; + } + + /** + * @return The FeedFetcherCache used by this fetcher (Could be null) + */ + public synchronized FeedFetcherCache getFeedInfoCache() { + return feedInfoCache; + } + + /** + * @param cache The cache to be used by this fetcher (pass null to stop using a cache) + */ + public synchronized void setFeedInfoCache(FeedFetcherCache cache) { + feedInfoCache = cache; + } +} diff --git a/src/java/org/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java b/src/java/org/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java new file mode 100644 index 0000000..c1eab76 --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java @@ -0,0 +1,70 @@ +package org.rometools.fetcher.impl; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + *

An implementation of the {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} interface.

+ * + *

Unlike the HashMapFeedInfoCache this implementation will not grow unbound

+ * + * @author Javier Kohen + * @author Nick Lothian + * + */ +public class LinkedHashMapFeedInfoCache extends HashMapFeedInfoCache { + private final class CacheImpl extends LinkedHashMap { + private static final long serialVersionUID = -6977191330127794920L; + + public CacheImpl() { + super(16, 0.75F, true); + } + + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > getMaxEntries(); + } + } + + private static final int DEFAULT_MAX_ENTRIES = 20; + + private static final long serialVersionUID = 1694228973357997417L; + + private int maxEntries = DEFAULT_MAX_ENTRIES; + + private final static LinkedHashMapFeedInfoCache _instance = new LinkedHashMapFeedInfoCache(); + + + /** + * Get the global instance of the cache + * @return an implementation of FeedFetcherCache + */ + public static final FeedFetcherCache getInstance() { + return _instance; + } + + /** + *

Constructor for HashMapFeedInfoCache

+ * + *

Only use this if you want multiple instances of the cache. + * Usually {@link #getInstance()} is more appropriate.

+ * + * @see #getInstance() + */ + public LinkedHashMapFeedInfoCache() { + super(); + } + + protected Map createInfoCache() { + return Collections.synchronizedMap(new CacheImpl()); + } + + public synchronized final int getMaxEntries() { + return maxEntries; + } + + public synchronized final void setMaxEntries(int maxEntries) { + this.maxEntries = maxEntries; + } + +} diff --git a/src/java/org/rometools/fetcher/impl/ResponseHandler.java b/src/java/org/rometools/fetcher/impl/ResponseHandler.java new file mode 100644 index 0000000..952edf3 --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/ResponseHandler.java @@ -0,0 +1,57 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.fetcher.impl; + +import java.net.URLConnection; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Utility class to help deal with HTTP responses + * + */ +public class ResponseHandler { + public static final String defaultCharacterEncoding = "ISO-8859-1"; + + private final static Pattern characterEncodingPattern = Pattern.compile("charset=([.[^; ]]*)"); + + public static String getCharacterEncoding(URLConnection connection) { + return getCharacterEncoding(connection.getContentType()); + } + + /** + * + *

Gets the character encoding of a response. (Note that this is different to + * the content-encoding)

+ * + * @param contentTypeHeader the value of the content-type HTTP header eg: text/html; charset=ISO-8859-4 + * @return the character encoding, eg: ISO-8859-4 + */ + public static String getCharacterEncoding(String contentTypeHeader) { + if (contentTypeHeader == null) { + return defaultCharacterEncoding; + } + + Matcher m = characterEncodingPattern.matcher(contentTypeHeader); + //if (!m.matches()) { + if (!m.find()) { + return defaultCharacterEncoding; + } else { + return m.group(1); + } + } +} diff --git a/src/java/org/rometools/fetcher/impl/SyndFeedInfo.java b/src/java/org/rometools/fetcher/impl/SyndFeedInfo.java new file mode 100644 index 0000000..587f309 --- /dev/null +++ b/src/java/org/rometools/fetcher/impl/SyndFeedInfo.java @@ -0,0 +1,149 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.fetcher.impl; + +import java.io.Serializable; +import java.net.URL; + +import com.sun.syndication.feed.impl.ObjectBean; +import com.sun.syndication.feed.synd.SyndFeed; + +/** + *

A class to represent a {@link com.sun.syndication.feed.synd.SyndFeed} + * and some useful information about it.

+ * + *

This class is thread safe, as expected by the different feed fetcher + * implementations.

+ * + * @author Nick Lothian + */ +public class SyndFeedInfo implements Cloneable, Serializable { + private static final long serialVersionUID = -1874786860901426015L; + + private final ObjectBean _objBean; + private String id; + private URL url; + private Object lastModified; + private String eTag; + private SyndFeed syndFeed; + + public SyndFeedInfo() { + _objBean = new ObjectBean(this.getClass(),this); + } + + /** + * Creates a deep 'bean' clone of the object. + *

+ * @return a clone of the object. + * @throws CloneNotSupportedException thrown if an element of the object cannot be cloned. + * + */ + public Object clone() throws CloneNotSupportedException { + return _objBean.clone(); + } + + /** + * Indicates whether some other object is "equal to" this one as defined by the Object equals() method. + *

+ * @param other he reference object with which to compare. + * @return true if 'this' object is equal to the 'other' object. + * + */ + public boolean equals(Object other) { + return _objBean.equals(other); + } + + /** + * Returns a hashcode value for the object. + *

+ * It follows the contract defined by the Object hashCode() method. + *

+ * @return the hashcode of the bean object. + * + */ + public int hashCode() { + return _objBean.hashCode(); + } + + /** + * Returns the String representation for the object. + *

+ * @return String representation for the object. + * + */ + public String toString() { + return _objBean.toString(); + } + + + /** + * @return the ETag the feed was last retrieved with + */ + public synchronized String getETag() { + return eTag; + } + + /** + * @return the last modified date for the feed + */ + public synchronized Object getLastModified() { + return lastModified; + } + + /** + * @return the URL the feed was served from + */ + public synchronized URL getUrl() { + return url; + } + + public synchronized void setETag(String string) { + eTag = string; + } + + public synchronized void setLastModified(Object o) { + lastModified = o; + } + + public synchronized void setUrl(URL url) { + this.url = url; + } + + public synchronized SyndFeed getSyndFeed() { + return syndFeed; + } + + public synchronized void setSyndFeed(SyndFeed feed) { + syndFeed = feed; + } + + /** + * @return A unique ID to identify the feed + */ + public synchronized String getId() { + return id; + } + + /** + * @param string A unique ID to identify the feed. Note that if the URL of the feed + * changes this will remain the same + */ + public synchronized void setId(String string) { + id = string; + } + +} diff --git a/src/java/org/rometools/fetcher/samples/FeedAggregator.java b/src/java/org/rometools/fetcher/samples/FeedAggregator.java new file mode 100644 index 0000000..090a0c0 --- /dev/null +++ b/src/java/org/rometools/fetcher/samples/FeedAggregator.java @@ -0,0 +1,92 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.fetcher.samples; + +import java.io.PrintWriter; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import com.sun.syndication.feed.synd.SyndFeedImpl; +import com.sun.syndication.feed.synd.SyndFeed; +import org.rometools.fetcher.FeedFetcher; +import org.rometools.fetcher.impl.FeedFetcherCache; +import org.rometools.fetcher.impl.HashMapFeedInfoCache; +import org.rometools.fetcher.impl.HttpURLFeedFetcher; +import com.sun.syndication.io.SyndFeedOutput; + +/** + *

It aggregates a list of RSS/Atom feeds (they can be of different types) + * into a single feed of the specified type.

+ * + *

Converted from the original FeedAggregator sample

+ * + * @author Alejandro Abdelnur + * @author Nick Lothian + * + */ +public class FeedAggregator { + + public static void main(String[] args) { + boolean ok = false; + if (args.length>=2) { + try { + String outputType = args[0]; + + SyndFeed feed = new SyndFeedImpl(); + feed.setFeedType(outputType); + + feed.setTitle("Aggregated Feed"); + feed.setDescription("Anonymous Aggregated Feed"); + feed.setAuthor("anonymous"); + feed.setLink("http://www.anonymous.com"); + + List entries = new ArrayList(); + feed.setEntries(entries); + + FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); + FeedFetcher feedFetcher = new HttpURLFeedFetcher(feedInfoCache); + + for (int i=1;i + * @author Alejandro Abdelnur + * @author Nick Lothian + * + */ +public class FeedReader { + public static void main(String[] args) { + boolean ok = false; + if (args.length==1) { + try { + URL feedUrl = new URL(args[0]); + FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); + FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache); + + FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); + + fetcher.addFetcherEventListener(listener); + + System.err.println("Retrieving feed " + feedUrl); + // Retrieve the feed. + // We will get a Feed Polled Event and then a + // Feed Retrieved event (assuming the feed is valid) + SyndFeed feed = fetcher.retrieveFeed(feedUrl); + + System.err.println(feedUrl + " retrieved"); + System.err.println(feedUrl + " has a title: " + feed.getTitle() + " and contains " + feed.getEntries().size() + " entries."); + // We will now retrieve the feed again. If the feed is unmodified + // and the server supports conditional gets, we will get a "Feed + // Unchanged" event after the Feed Polled event + System.err.println("Polling " + feedUrl + " again to test conditional get support."); + SyndFeed feed2 = fetcher.retrieveFeed(feedUrl); + System.err.println("If a \"Feed Unchanged\" event fired then the server supports conditional gets."); + + ok = true; + } + catch (Exception ex) { + System.out.println("ERROR: "+ex.getMessage()); + ex.printStackTrace(); + } + } + + if (!ok) { + System.out.println(); + System.out.println("FeedReader reads and prints any RSS/Atom feed type."); + System.out.println("The first parameter must be the URL of the feed to read."); + System.out.println(); + } + + } + + static class FetcherEventListenerImpl implements FetcherListener { + /** + * @see com.sun.syndication.fetcher.FetcherListener#fetcherEvent(com.sun.syndication.fetcher.FetcherEvent) + */ + public void fetcherEvent(FetcherEvent event) { + String eventType = event.getEventType(); + if (FetcherEvent.EVENT_TYPE_FEED_POLLED.equals(eventType)) { + System.err.println("\tEVENT: Feed Polled. URL = " + event.getUrlString()); + } else if (FetcherEvent.EVENT_TYPE_FEED_RETRIEVED.equals(eventType)) { + System.err.println("\tEVENT: Feed Retrieved. URL = " + event.getUrlString()); + } else if (FetcherEvent.EVENT_TYPE_FEED_UNCHANGED.equals(eventType)) { + System.err.println("\tEVENT: Feed Unchanged. URL = " + event.getUrlString()); + } + } + } +} diff --git a/src/test/org/rometools/test/AbstractJettyTest.java b/src/test/org/rometools/test/AbstractJettyTest.java new file mode 100644 index 0000000..a032f66 --- /dev/null +++ b/src/test/org/rometools/test/AbstractJettyTest.java @@ -0,0 +1,421 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.test; + +import org.rometools.fetcher.impl.HashMapFeedInfoCache; +import org.rometools.fetcher.impl.FeedFetcherCache; +import java.net.URL; + +import junit.framework.TestCase; + +import org.mortbay.http.BasicAuthenticator; +import org.mortbay.http.HashUserRealm; +import org.mortbay.http.HttpContext; +import org.mortbay.http.HttpServer; +import org.mortbay.http.SecurityConstraint; +import org.mortbay.http.SocketListener; +import org.mortbay.http.UserRealm; +import org.mortbay.http.handler.SecurityHandler; +import org.mortbay.jetty.servlet.ServletHandler; + +import com.sun.syndication.feed.atom.Entry; +import com.sun.syndication.feed.synd.SyndEntry; +import com.sun.syndication.feed.synd.SyndFeed; +import org.rometools.fetcher.FeedFetcher; +import org.rometools.fetcher.FetcherEvent; +import org.rometools.fetcher.FetcherException; +import org.rometools.fetcher.FetcherListener; + +/** + * @author nl + */ +public abstract class AbstractJettyTest extends TestCase { + + private HttpServer server; + private int testPort = 8283; + + /** + * @param s + */ + public AbstractJettyTest(String s) { + super(s); + } + + protected HttpServer getServer() { + return server; + } + + protected abstract FeedFetcher getFeedFetcher(); + + protected abstract FeedFetcher getFeedFetcher(FeedFetcherCache cache); + + /** + * @see junit.framework.TestCase#setUp() + */ + protected void setUp() throws Exception { + setupServer(); + + HttpContext context = createContext(); + + ServletHandler servlets = createServletHandler(); + context.addHandler(servlets); + + server.addContext(context); + + server.start(); + } + + /** + * @throws InterruptedException + */ + private void setupServer() throws InterruptedException { + // Create the server + if (server != null) { + server.stop(); + server = null; + } + server = new HttpServer(); + + // Create a port listener + SocketListener listener=new SocketListener(); + listener.setPort(testPort); + server.addListener(listener); + } + + /** + * @return + */ + private ServletHandler createServletHandler() { + ServletHandler servlets = new ServletHandler(); + servlets.addServlet("FetcherTestServlet",FetcherTestServlet.SERVLET_MAPPING,"org.rometools.test.FetcherTestServlet"); + servlets.addServlet("FetcherTestServlet",FetcherTestServlet.SERVLET_MAPPING2,"org.rometools.test.FetcherTestServlet"); + return servlets; + } + + /** + * @return + */ + private HttpContext createContext() { + HttpContext context = new HttpContext(); + context.setContextPath("/rome/*"); + return context; + } + + /** + * @see junit.framework.TestCase#tearDown() + */ + protected void tearDown() throws Exception { + if (server != null) { + server.stop(); + server.destroy(); + server = null; + } + } + + class FetcherEventListenerImpl implements FetcherListener { + boolean polled = false; + boolean retrieved = false; + boolean unchanged = false; + + public void reset() { + polled = false; + retrieved = false; + unchanged = false; + } + + /** + * @see com.sun.syndication.fetcher.FetcherListener#fetcherEvent(com.sun.syndication.fetcher.FetcherEvent) + */ + public void fetcherEvent(FetcherEvent event) { + String eventType = event.getEventType(); + if (FetcherEvent.EVENT_TYPE_FEED_POLLED.equals(eventType)) { + System.err.println("\tEVENT: Feed Polled. URL = " + event.getUrlString()); + polled = true; + } else if (FetcherEvent.EVENT_TYPE_FEED_RETRIEVED.equals(eventType)) { + System.err.println("\tEVENT: Feed Retrieved. URL = " + event.getUrlString()); + retrieved = true; + } else if (FetcherEvent.EVENT_TYPE_FEED_UNCHANGED.equals(eventType)) { + System.err.println("\tEVENT: Feed Unchanged. URL = " + event.getUrlString()); + unchanged = true; + } + } + } + + public void testRetrieveFeed() { + FeedFetcher feedFetcher = getFeedFetcher(); + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + public void testBasicAuthentication() { + try { + setupServer(); + + HttpContext context = createContext(); + + URL url = this.getClass().getResource("/testuser.properties"); + UserRealm ur = new HashUserRealm("test", url.getFile()); + context.setRealm(ur); + + BasicAuthenticator ba = new BasicAuthenticator(); + context.setAuthenticator(ba); + + SecurityHandler sh = new SecurityHandler(); + context.addHandler(sh); + + SecurityConstraint sc = new SecurityConstraint(); + sc.setName("test"); + sc.addRole("*"); + sc.setAuthenticate(true); + context.addSecurityConstraint("/", sc); + + ServletHandler servlets = createServletHandler(); + context.addHandler(servlets); + + server.addContext(context); + + server.start(); + + FeedFetcher feedFetcher = getAuthenticatedFeedFetcher(); + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + + + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + + } + + public abstract FeedFetcher getAuthenticatedFeedFetcher(); + + /** + * Test getting a feed via a http 301 redirect + * + */ + public void testRetrieveRedirectedFeed() { + FeedFetcher feedFetcher = getFeedFetcher(); + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet?redirect=TRUE")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + /** + * Test error handling + * + */ + public void testErrorHandling() { + FeedFetcher feedFetcher = getFeedFetcher(); + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet?error=404")); + fail("4xx error handling did not work correctly"); + } catch (FetcherException e) { + // expect this exception + assertEquals(404, e.getResponseCode()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet?error=500")); + fail("5xx error handling did not work correctly"); + } catch (FetcherException e) { + // expect this exception + assertEquals(500, e.getResponseCode()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + public void testUserAgent() { + FeedFetcher feedFetcher = getFeedFetcher(); + //System.out.println(feedFetcher.getUserAgent()); + //System.out.println(System.getProperty("rome.fetcher.version", "UNKNOWN")); + assertEquals("Rome Client (http://tinyurl.com/64t5n) Ver: " + System.getProperty("rome.fetcher.version", "UNKNOWN"), feedFetcher.getUserAgent()); + } + + /** + * Test events fired when there is no cache in use + * + */ + public void testFetchEvents() { + FeedFetcher feedFetcher = getFeedFetcher(); + FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); + feedFetcher.addFetcherEventListener(listener); + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertTrue(listener.polled); + assertTrue(listener.retrieved); + assertFalse(listener.unchanged); + listener.reset(); + + // since there is no cache, the events fired should be exactly the same if + // we re-retrieve the feed + feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertTrue(listener.polled); + assertTrue(listener.retrieved); + assertFalse(listener.unchanged); + listener.reset(); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + /** + * Test events fired when there is a cache in use + * + */ + public void testFetchEventsWithCache() { + FeedFetcherCache feedInfoCache = new HashMapFeedInfoCache(); + FeedFetcher feedFetcher = getFeedFetcher(feedInfoCache); + FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); + feedFetcher.addFetcherEventListener(listener); + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertTrue(listener.polled); + assertTrue(listener.retrieved); + assertFalse(listener.unchanged); + listener.reset(); + + // Since the feed is cached, the second request should not + // actually retrieve the feed + feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertTrue(listener.polled); + assertFalse(listener.retrieved); + assertTrue(listener.unchanged); + listener.reset(); + + // now simulate getting the feed after it has changed + feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet?refreshfeed=TRUE")); + assertNotNull(feed); + assertTrue(listener.polled); + assertTrue(listener.retrieved); + assertFalse(listener.unchanged); + listener.reset(); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + /** + * Test handling of GZipped feed + * + */ + public void testGZippedFeed() { + FeedFetcher feedFetcher = getFeedFetcher(); + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet?gzipfeed=TRUE")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + public void testPreserveWireFeed() throws Exception { + FeedFetcher feedFetcher = getFeedFetcher(); + + // first check we the WireFeed is not preserved by default + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + assertNull(feed.originalWireFeed()); + + SyndEntry syndEntry = (SyndEntry)feed.getEntries().get(0); + assertNotNull(syndEntry); + assertNull(syndEntry.getWireEntry()); + + // now turn on WireFeed preservation + feedFetcher.setPreserveWireFeed(true); + try { + feed = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + assertNotNull(feed.originalWireFeed()); + + syndEntry = (SyndEntry)feed.getEntries().get(0); + assertNotNull(syndEntry); + assertNotNull(syndEntry.getWireEntry()); + + Entry entry = (Entry) syndEntry.getWireEntry(); + assertEquals("atom_1.0.feed.entry[0].rights", entry.getRights()); + + } finally { + feedFetcher.setPreserveWireFeed(false); //reset + } + + } + + public void testDeltaEncoding() { + FeedFetcherCache feedInfoCache = new HashMapFeedInfoCache(); + FeedFetcher feedFetcher = getFeedFetcher(feedInfoCache); + try { + feedFetcher.setUsingDeltaEncoding(true); + + // first retrieval should just grab the default feed + SyndFeed feed1 = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet?deltaencode=TRUE&refreshfeed=TRUE")); + assertNotNull(feed1); + assertEquals("atom_1.0.feed.title", feed1.getTitle()); + assertEquals(2, feed1.getEntries().size()); + SyndEntry entry1 = (SyndEntry) feed1.getEntries().get(0); + assertEquals("atom_1.0.feed.entry[0].title", entry1.getTitle()); + + // second retrieval should get only the new item + /* + * This is breaking with Rome 0.5 ?? + */ + SyndFeed feed2 = feedFetcher.retrieveFeed(new URL("http://localhost:"+testPort+"/rome/FetcherTestServlet?deltaencode=TRUE&refreshfeed=TRUE")); + assertNotNull(feed2); + assertEquals(FetcherTestServlet.DELTA_FEED_TITLE, feed2.getTitle()); + assertEquals(3, feed2.getEntries().size()); + entry1 = (SyndEntry) feed2.getEntries().get(0); + assertEquals(FetcherTestServlet.DELTA_FEED_ENTRY_TITLE, entry1.getTitle()); + + SyndEntry entry2 = (SyndEntry) feed2.getEntries().get(1); + assertEquals("atom_1.0.feed.entry[0].title", entry2.getTitle()); + + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + +} diff --git a/src/test/org/rometools/test/DiskFeedInfoCacheTest.java b/src/test/org/rometools/test/DiskFeedInfoCacheTest.java new file mode 100644 index 0000000..c39d6c2 --- /dev/null +++ b/src/test/org/rometools/test/DiskFeedInfoCacheTest.java @@ -0,0 +1,42 @@ +package org.rometools.test; + +import org.rometools.fetcher.impl.DiskFeedInfoCache; +import org.rometools.fetcher.impl.SyndFeedInfo; +import java.net.*; +import java.io.File; +import junit.framework.TestCase; + +public class DiskFeedInfoCacheTest extends TestCase { + + public void testClear() throws Exception { + File cacheDir = new File("test-cache"); + cacheDir.mkdir(); + cacheDir.deleteOnExit(); + + final DiskFeedInfoCache cache = new DiskFeedInfoCache(cacheDir.getCanonicalPath()); + SyndFeedInfo info = new SyndFeedInfo(); + URL url = new URL("http://nowhere.com"); + cache.setFeedInfo(url, info); + + cache.clear(); + final Object returned = cache.getFeedInfo(url); + assertTrue( returned == null ); + } + + public void testRemove() throws Exception { + File cacheDir = new File("test-cache"); + cacheDir.mkdir(); + cacheDir.deleteOnExit(); + + final DiskFeedInfoCache cache = new DiskFeedInfoCache( cacheDir.getCanonicalPath() ); + SyndFeedInfo info = new SyndFeedInfo(); + URL url = new URL("http://nowhere.com"); + cache.setFeedInfo( url, info ); + + SyndFeedInfo removedInfo = cache.remove( url ); + assertTrue( removedInfo.equals(info) ); + SyndFeedInfo shouldBeNull = cache.remove( url ); + assertTrue( null == shouldBeNull ); + } + +} diff --git a/src/test/org/rometools/test/FetcherTestServlet.java b/src/test/org/rometools/test/FetcherTestServlet.java new file mode 100644 index 0000000..af38dc0 --- /dev/null +++ b/src/test/org/rometools/test/FetcherTestServlet.java @@ -0,0 +1,244 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.test; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPOutputStream; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.sun.syndication.feed.synd.SyndContent; +import com.sun.syndication.feed.synd.SyndContentImpl; +import com.sun.syndication.feed.synd.SyndEntry; +import com.sun.syndication.feed.synd.SyndEntryImpl; +import com.sun.syndication.feed.synd.SyndFeed; +import com.sun.syndication.feed.synd.SyndFeedImpl; +import com.sun.syndication.io.FeedException; +import com.sun.syndication.io.SyndFeedOutput; + + +public class FetcherTestServlet extends javax.servlet.http.HttpServlet { + public static final String ETAG_1 = "ETAG-1"; + public static final String ETAG_2 = "ETAG-2"; + + public static final String DELTA_FEED_TITLE = "Delta Encoded Feed"; + public static final String DELTA_FEED_ENTRY_TITLE = "Delta Encoded Feed Entry"; + + public static final String SERVLET_MAPPING = "/FetcherTestServlet/*"; + public static final String SERVLET_MAPPING2 = "/FetcherTestServlet2/*"; + + /** + * @throws IOException + * @throws + * @see javax.servlet.http.HttpServlet#doGet(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + + if ("TRUE".equalsIgnoreCase(request.getParameter("redirect"))) { + // testing redirection support + response.sendRedirect("/rome/FetcherTestServlet2/"); + return; + } else if (request.getParameter("error") != null) { + //response.sendError(HttpServletResponse.SC_NOT_FOUND); + int errorToThrow = Integer.parseInt(request.getParameter("error")); + response.sendError(errorToThrow); + return; + } else { + + // We manually set the date headers using strings + // instead of the get/setDateHeader methods because + // those methods return longs, which has too much + // precision for the real date headers + // this is just a random date + String lastModifiedDate = "Thu, 08 Jan 2009 23:06:39 GMT"; + String eTag = ETAG_1; + + if ("TRUE".equalsIgnoreCase(request.getParameter("refreshfeed"))) { + lastModifiedDate = "Fri, 09 Jan 2009 12:06:39 GMT"; + eTag = ETAG_2; + } + + boolean serveFeed = checkModified(request, lastModifiedDate, eTag) || ("TRUE".equalsIgnoreCase(request.getParameter("deltaencode"))); + boolean gzip = "TRUE".equalsIgnoreCase(request.getParameter("gzipfeed")); + + if (serveFeed) { + String aimHeader = request.getHeader("A-IM"); + boolean serveDeltaEncodedFeed = ((aimHeader != null) && (aimHeader.indexOf("feed") >=0) && "TRUE".equalsIgnoreCase(request.getParameter("deltaencode"))); + if (serveDeltaEncodedFeed) { + try { + sendDeltaEncodedData(response, lastModifiedDate, request.getHeader("If-None-Match"), eTag, gzip); + } catch (FeedException e) { + throw new ServletException(e); + } + } else { + sendFeedData(response, lastModifiedDate, eTag, gzip); + } + return; + } else { + response.sendError(HttpServletResponse.SC_NOT_MODIFIED); + return; + } + } + } + + + + private boolean checkModified(HttpServletRequest request, String lastModifiedDate, String eTag) { + + String requestedETag = request.getHeader("If-None-Match"); + String requestedLastModified = request.getHeader("If-Modified-Since"); + boolean modified = true; + boolean mustServer = false; + if (requestedETag != null) { + if (eTag.equals(requestedETag)) { + modified = false; + } else { + modified = true; + mustServer = true; + } + } + if (requestedLastModified != null) { + if (lastModifiedDate.equals(requestedLastModified)) { + modified = false; + } else { + modified = true; + mustServer = true; + } + } + boolean serveFeed = (modified || mustServer); + return serveFeed; + } + + /** + * @param request + * @param lastModifiedDate + * @param tag + * @param gzip + * @throws IOException + * @throws FeedException + */ + private void sendDeltaEncodedData(HttpServletResponse response, String lastModifiedDate, String requestedETag, String responseETag, boolean gzip) throws IOException, FeedException { + if (ETAG_1.equals(requestedETag) || ETAG_2.equals(requestedETag)) { + OutputStream out = null; + if (gzip) { + response.setHeader("Content-Encoding", "gzip"); + out = new GZIPOutputStream(response.getOutputStream()); + } else { + out = response.getOutputStream(); + } + + response.setContentType("text/xml"); + response.setStatus(226); + if (gzip) { + response.setHeader("IM", "feed, gzip"); + } else { + response.setHeader("IM", "feed"); + } + + if (responseETag != null) { + response.setHeader("ETag", responseETag); + } + if (lastModifiedDate != null) { + response.setHeader("Last-Modified", lastModifiedDate); + } + + SyndFeed feed = new SyndFeedImpl(); + feed.setFeedType("atom_1.0"); + + feed.setTitle(DELTA_FEED_TITLE); + feed.setLink("http://rome.dev.java.net"); + feed.setDescription("This tests using rfc3229 delta encoding."); + + List entries = new ArrayList(); + SyndEntry entry; + SyndContent description; + + entry = new SyndEntryImpl(); + entry.setTitle(DELTA_FEED_ENTRY_TITLE); + entry.setLink("http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html"); + try { + DateFormat dateParser = new SimpleDateFormat("yyyy-MM-dd"); + entry.setPublishedDate(dateParser.parse("2004-11-25")); + } + catch (ParseException ex) { + // + } + description = new SyndContentImpl(); + description.setType("text/plain"); + description.setValue("Test for RFC3229 Delta Encoding"); + entry.setDescription(description); + entries.add(entry); + + feed.setEntries(entries); + + SyndFeedOutput output = new SyndFeedOutput(); + output.output(feed, new OutputStreamWriter(out)); + } else { + sendFeedData(response, lastModifiedDate, responseETag, gzip); + } + } + + private void sendFeedData(HttpServletResponse response, String lastModifiedDate, String eTag, boolean gzip) throws IOException { + OutputStream out = null; + if (gzip) { + response.setHeader("Content-Encoding", "gzip"); + out = new GZIPOutputStream(response.getOutputStream()); + } else { + out = response.getOutputStream(); + } + + response.setContentType("text/xml"); + if (eTag != null) { + response.setHeader("ETag", eTag); + } + if (lastModifiedDate != null) { + response.setHeader("Last-Modified", lastModifiedDate); + } + + InputStream inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("/atom_1.0.xml"); + if (inputStream == null) { + inputStream = this.getClass().getResourceAsStream("/atom_1.0.xml"); + } + + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + try { + String line; + while ((line = reader.readLine()) != null) { + out.write(line.getBytes()); + line = null; + } + } finally { + if (reader != null) { + reader.close(); + } + } + + out.close(); + } +} diff --git a/src/test/org/rometools/test/HashMapFeedInfoCacheTest.java b/src/test/org/rometools/test/HashMapFeedInfoCacheTest.java new file mode 100644 index 0000000..11f3642 --- /dev/null +++ b/src/test/org/rometools/test/HashMapFeedInfoCacheTest.java @@ -0,0 +1,41 @@ +package org.rometools.test; + +import org.rometools.fetcher.impl.HashMapFeedInfoCache; +import org.rometools.fetcher.impl.SyndFeedInfo; +import java.net.URL; + +import junit.framework.TestCase; + +public class HashMapFeedInfoCacheTest extends TestCase { + + public void testRemove() throws Exception { + final HashMapFeedInfoCache cache = new HashMapFeedInfoCache(); + assertNotNull( cache ); + + final URL url = new URL("http://foo.com"); + final SyndFeedInfo syndFeedInfo = new SyndFeedInfo(); + syndFeedInfo.setUrl(url); + cache.setFeedInfo(url, syndFeedInfo); + + final SyndFeedInfo returned = cache.remove(url); + assertTrue( returned.equals(syndFeedInfo) ); + assertTrue( url.equals( returned.getUrl() )); + } + + public void testClear() throws Exception { + final HashMapFeedInfoCache cache = new HashMapFeedInfoCache(); + assertNotNull( cache ); + + final URL url = new URL("http://foo.com"); + final SyndFeedInfo syndFeedInfo = new SyndFeedInfo(); + syndFeedInfo.setUrl(url); + cache.setFeedInfo(url, syndFeedInfo); + + //clear it + cache.clear(); + + //we should not get a result back + final Object returned = cache.getFeedInfo(url); + assertTrue( returned == null ); + } +} diff --git a/src/test/org/rometools/test/HttpClientFeedFetcherTest.java b/src/test/org/rometools/test/HttpClientFeedFetcherTest.java new file mode 100644 index 0000000..60cb8e2 --- /dev/null +++ b/src/test/org/rometools/test/HttpClientFeedFetcherTest.java @@ -0,0 +1,60 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.test; + +import org.rometools.fetcher.impl.HttpClientFeedFetcher; +import org.rometools.fetcher.impl.FeedFetcherCache; +import org.apache.commons.httpclient.Credentials; +import org.apache.commons.httpclient.UsernamePasswordCredentials; + +import org.rometools.fetcher.FeedFetcher; + +/** + * @author Nick Lothian + */ +public class HttpClientFeedFetcherTest extends AbstractJettyTest { + + public HttpClientFeedFetcherTest(String s) { + super(s); + } + + /** + * @see com.sun.syndication.fetcher.impl.AbstractJettyTest#getFeedFetcher() + */ + protected FeedFetcher getFeedFetcher() { + return new HttpClientFeedFetcher(); + } + + protected FeedFetcher getFeedFetcher(FeedFetcherCache cache) { + return new HttpClientFeedFetcher(cache); + } + + /** + * @see com.sun.syndication.fetcher.impl.AbstractJettyTest#getAuthenticatedFeedFetcher() + */ + public FeedFetcher getAuthenticatedFeedFetcher() { + return new HttpClientFeedFetcher(null, new HttpClientFeedFetcher.CredentialSupplier() { + public Credentials getCredentials(String realm, String host) { + if ("localhost".equals(host)) { + return new UsernamePasswordCredentials("username", "password"); + } else { + return null; + } + } + }); + } +} diff --git a/src/test/org/rometools/test/HttpURLFeedFetcherTest.java b/src/test/org/rometools/test/HttpURLFeedFetcherTest.java new file mode 100644 index 0000000..12ddfc1 --- /dev/null +++ b/src/test/org/rometools/test/HttpURLFeedFetcherTest.java @@ -0,0 +1,55 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.test; + +import org.rometools.fetcher.impl.HttpURLFeedFetcher; +import org.rometools.fetcher.impl.FeedFetcherCache; +import org.rometools.fetcher.FeedFetcher; + + +public class HttpURLFeedFetcherTest extends AbstractJettyTest { + + public HttpURLFeedFetcherTest(String s) { + super(s); + } + + /** + * @see com.sun.syndication.fetcher.impl.AbstractJettyTest#getFeedFetcher() + */ + protected FeedFetcher getFeedFetcher() { + return new HttpURLFeedFetcher(); + } + + protected FeedFetcher getFeedFetcher(FeedFetcherCache cache) { + return new HttpURLFeedFetcher(cache); + } + + /** + * @see com.sun.syndication.fetcher.impl.AbstractJettyTest#getAuthenticatedFeedFetcher() + */ + public FeedFetcher getAuthenticatedFeedFetcher() { + // setup the authenticator + java.net.Authenticator.setDefault(new TestBasicAuthenticator()); + + FeedFetcher feedFetcher = getFeedFetcher(); + + return feedFetcher; + } + + + +} diff --git a/src/test/org/rometools/test/ResponseHandlerTest.java b/src/test/org/rometools/test/ResponseHandlerTest.java new file mode 100644 index 0000000..c4bb033 --- /dev/null +++ b/src/test/org/rometools/test/ResponseHandlerTest.java @@ -0,0 +1,42 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.test; + +import org.rometools.fetcher.impl.ResponseHandler; + +import junit.framework.TestCase; + +public class ResponseHandlerTest extends TestCase { + + /** + * Constructor for ResponseHandlerTest. + */ + public ResponseHandlerTest(String arg0) { + super(arg0); + } + + public void testGetCharacterEncodingString() { + assertEquals(ResponseHandler.defaultCharacterEncoding, ResponseHandler.getCharacterEncoding((String)null)); + assertEquals(ResponseHandler.defaultCharacterEncoding, ResponseHandler.getCharacterEncoding("text/xml")); + assertEquals(ResponseHandler.defaultCharacterEncoding, ResponseHandler.getCharacterEncoding("text/xml;")); + assertEquals("ISO-8859-4", ResponseHandler.getCharacterEncoding("text/xml; charset=ISO-8859-4")); + assertEquals("ISO-8859-4", ResponseHandler.getCharacterEncoding("text/xml;charset=ISO-8859-4")); + assertEquals("ISO-8859-4", ResponseHandler.getCharacterEncoding("text/xml;charset=ISO-8859-4;something")); + assertEquals(ResponseHandler.defaultCharacterEncoding, ResponseHandler.getCharacterEncoding("text/xml;something")); + } + +} diff --git a/src/test/org/rometools/test/TestBasicAuthenticator.java b/src/test/org/rometools/test/TestBasicAuthenticator.java new file mode 100644 index 0000000..bdd6ed1 --- /dev/null +++ b/src/test/org/rometools/test/TestBasicAuthenticator.java @@ -0,0 +1,37 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.rometools.test; + +import java.net.Authenticator; +import java.net.PasswordAuthentication; + +/** + * @author nl + */ +public class TestBasicAuthenticator extends Authenticator { + + /** + * @see java.net.Authenticator#getPasswordAuthentication() + */ + protected PasswordAuthentication getPasswordAuthentication() { + if ("localhost".equals(getRequestingHost())) { + return new PasswordAuthentication("username", "password".toCharArray()); + } else { + return null; + } + } +}