diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..83a7c0c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+.classpath
+.project
+.settings
+target
+.idea
+*.iml
+atlassian-ide-plugin.xml
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f43cdb1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,14 @@
+Copyright 2004 Sun Microsystems, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..49d2dfe
--- /dev/null
+++ b/README.md
@@ -0,0 +1,7 @@
+rome
+====
+
+ROME is a set of RSS and Atom Utilities for Java. It makes it easy to work in Java with most syndication formats: RSS 0.90, RSS 0.91 Netscape,
+RSS 0.91 Userland, RSS 0.92, RSS 0.93, RSS 0.94, RSS 1.0, RSS 2.0, Atom 0.3, Atom 1.0
+
+More Information: http://rometools.github.io/rome-fetcher/
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..5d86063
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,123 @@
+
+
+ * The default user agent. It is not marked final so buggy java compiler will not write this + * string into all classes that reference it. + *
+ * + *+ * http://tinyurl.com/64t5n points to https://rome.dev.java.net Some servers ban user agents + * with "Java" in the name. + *
+ * + */ + public static String DEFAULT_USER_AGENT = "Rome Client (http://tinyurl.com/64t5n)"; + + /** + * @param string The User-Agent to sent to servers + */ + public abstract void setUserAgent(String string); + + /** + * @return the User-Agent currently being sent to servers + */ + public abstract String getUserAgent(); + + /** + *+ * Turn on or off rfc3229 delta encoding + *
+ * + *+ * See http://www.ietf.org/rfc/rfc3229.txt and + * http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html + *
+ * + *+ * NOTE: This is experimental and feedback is welcome! + *
+ * + * @param useDeltaEncoding + */ + public abstract void setUsingDeltaEncoding(boolean useDeltaEncoding); + + /** + *+ * Is this fetcher using rfc3229 delta encoding? + *
+ * + * @return + */ + public abstract boolean isUsingDeltaEncoding(); + + /** + *+ * Add a FetcherListener. + *
+ * + *+ * The FetcherListener will receive an FetcherEvent when a Fetcher event (feed polled, + * retrieved, etc) occurs + *
+ * + * @param listener The FetcherListener to recieve the event + */ + public abstract void addFetcherEventListener(FetcherListener listener); + + /** + *+ * Remove a FetcherListener + *
+ * + * @param listener The FetcherListener to remove + */ + public abstract void removeFetcherEventListener(FetcherListener listener); + + /** + * Retrieve a feed over HTTP + * + * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve + * @return A {@link com.rometools.rome.feed.synd.SyndFeed} object + * @throws IllegalArgumentException if the URL is null; + * @throws IOException if a TCP error occurs + * @throws FeedException if the feed is not valid + * @throws FetcherException if a HTTP error occurred + */ + public abstract SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException; + + public SyndFeed retrieveFeed(String userAgent, URL url) throws IllegalArgumentException, IOException, FeedException, FetcherException; + + /** + * If set to true, the WireFeed will be made accessible from the SyndFeed object returned from + * the Fetcher via the originalWireFeed() method. Each Entry in the feed will have the + * corresponding wireEntry property set. + */ + void setPreserveWireFeed(boolean preserveWireFeed); +} diff --git a/src/main/java/com/rometools/fetcher/FetcherEvent.java b/src/main/java/com/rometools/fetcher/FetcherEvent.java new file mode 100644 index 0000000..6ed4a0c --- /dev/null +++ b/src/main/java/com/rometools/fetcher/FetcherEvent.java @@ -0,0 +1,90 @@ +package com.rometools.fetcher; + +import java.util.EventObject; + +import com.rometools.rome.feed.synd.SyndFeed; + +/** + * Implementation note: FetcherEvent is not thread safe. Make sure that they are only ever accessed + * by one thread. If necessary, make all getters and setters synchronized, or alternatively make all + * fields final. + * + * @author nl + */ +public class FetcherEvent extends EventObject { + + private static final long serialVersionUID = 1L; + + public static final String EVENT_TYPE_FEED_POLLED = "FEED_POLLED"; + public static final String EVENT_TYPE_FEED_RETRIEVED = "FEED_RETRIEVED"; + public static final String EVENT_TYPE_FEED_UNCHANGED = "FEED_UNCHANGED"; + + private String eventType; + private String urlString; + private SyndFeed feed; + + public FetcherEvent(final Object source) { + super(source); + } + + public FetcherEvent(final Object source, final String urlStr, final String eventType) { + this(source); + setUrlString(urlStr); + setEventType(eventType); + } + + public FetcherEvent(final Object source, final String urlStr, final String eventType, final SyndFeed feed) { + this(source, urlStr, eventType); + setFeed(feed); + } + + /** + * @return Returns the feed. + * + *+ * The feed will only be set if the eventType is EVENT_TYPE_FEED_RETRIEVED + *
+ */ + public SyndFeed getFeed() { + return feed; + } + + /** + * @param feed The feed to set. + * + *+ * The feed will only be set if the eventType is EVENT_TYPE_FEED_RETRIEVED + *
+ */ + public void setFeed(final SyndFeed feed) { + this.feed = feed; + } + + /** + * @return Returns the eventType. + */ + public String getEventType() { + return eventType; + } + + /** + * @param eventType The eventType to set. + */ + public void setEventType(final String eventType) { + this.eventType = eventType; + } + + /** + * @return Returns the urlString. + */ + public String getUrlString() { + return urlString; + } + + /** + * @param urlString The urlString to set. + */ + public void setUrlString(final String urlString) { + this.urlString = urlString; + } +} diff --git a/src/main/java/com/rometools/fetcher/FetcherException.java b/src/main/java/com/rometools/fetcher/FetcherException.java new file mode 100644 index 0000000..447ab2f --- /dev/null +++ b/src/main/java/com/rometools/fetcher/FetcherException.java @@ -0,0 +1,51 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher; + +/** + * @author Nick Lothian + * + */ +public class FetcherException extends Exception { + private static final long serialVersionUID = 1L; + + int responseCode; + + public FetcherException(final Throwable cause) { + super(); + initCause(cause); + } + + public FetcherException(final String message, final Throwable cause) { + super(message); + initCause(cause); + } + + public FetcherException(final String message) { + super(message); + } + + public FetcherException(final int responseCode, final String message) { + this(message); + this.responseCode = responseCode; + } + + public int getResponseCode() { + return responseCode; + } + +} diff --git a/src/main/java/com/rometools/fetcher/FetcherListener.java b/src/main/java/com/rometools/fetcher/FetcherListener.java new file mode 100644 index 0000000..617532c --- /dev/null +++ b/src/main/java/com/rometools/fetcher/FetcherListener.java @@ -0,0 +1,16 @@ +package com.rometools.fetcher; + +import java.util.EventListener; + +public interface FetcherListener extends EventListener { + + /** + *+ * Called when a fetcher event occurs + *
+ * + * @param event the event that fired + */ + public void fetcherEvent(FetcherEvent event); + +} diff --git a/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcher.java b/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcher.java new file mode 100644 index 0000000..15f07ac --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcher.java @@ -0,0 +1,225 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.rometools.fetcher.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URLConnection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Properties; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.rometools.fetcher.FeedFetcher; +import com.rometools.fetcher.FetcherEvent; +import com.rometools.fetcher.FetcherException; +import com.rometools.fetcher.FetcherListener; +import com.rometools.rome.feed.synd.SyndFeed; + +public abstract class AbstractFeedFetcher implements FeedFetcher { + + private static final Logger LOG = LoggerFactory.getLogger(AbstractFeedFetcher.class); + + private final Set+ * Handles HTTP error codes. + *
+ * + * @param responseCode the HTTP response code + * @throws FetcherException if response code is in the range 400 to 599 inclusive + */ + protected void handleErrorCodes(final int responseCode) throws FetcherException { + // Handle 2xx codes as OK, so ignore them here + // 3xx codes are handled by the HttpURLConnection class + if (responseCode == 403) { + // Authentication is required + throwAuthenticationError(responseCode); + } else if (responseCode >= 400 && responseCode < 500) { + throw4XXError(responseCode); + } else if (responseCode >= 500 && responseCode < 600) { + throw new FetcherException(responseCode, "The server encounted an error. HTTP Response code was:" + responseCode); + } + } + + protected void throw4XXError(final int responseCode) throws FetcherException { + throw new FetcherException(responseCode, "The requested resource could not be found. HTTP Response code was:" + responseCode); + } + + protected void throwAuthenticationError(final int responseCode) throws FetcherException { + throw new FetcherException(responseCode, "Authentication required for that resource. HTTP Response code was:" + responseCode); + } + + /** + *+ * Combine the entries in two feeds into a single feed. + *
+ * + *+ * The returned feed will have the same data as the newFeed parameter, with the entries from + * originalFeed appended to the end of its entries. + *
+ * + * @param originalFeed + * @param newFeed + * @return + */ + public static SyndFeed combineFeeds(final SyndFeed originalFeed, final SyndFeed newFeed) { + try { + final SyndFeed result = (SyndFeed) newFeed.clone(); + result.getEntries().addAll(result.getEntries().size(), originalFeed.getEntries()); + return result; + } catch (final CloneNotSupportedException e) { + final IllegalArgumentException iae = new IllegalArgumentException("Cannot clone feed"); + iae.initCause(e); + throw iae; + } + } + + public boolean isPreserveWireFeed() { + return preserveWireFeed; + } + + @Override + public void setPreserveWireFeed(final boolean preserveWireFeed) { + this.preserveWireFeed = preserveWireFeed; + } + +} diff --git a/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java b/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java new file mode 100644 index 0000000..5bac21e --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java @@ -0,0 +1,33 @@ +package com.rometools.fetcher.impl; + +import java.beans.EventSetDescriptor; +import java.beans.SimpleBeanInfo; +import java.lang.reflect.Method; + +import com.rometools.fetcher.FetcherEvent; +import com.rometools.fetcher.FetcherListener; + +public class AbstractFeedFetcherBeanInfo extends SimpleBeanInfo { + + @Override + public EventSetDescriptor[] getEventSetDescriptors() { + + try { + + // get the class object which we'll describe + final Class+ * An interface to allow caching of feed details. Implementing this allows the + * {@link com.rometools.fetcher.io.HttpURLFeedFetcher} class to enable conditional gets + *
+ * + * @author Nick Lothian + * + */ +public interface FeedFetcherCache { + + /** + * Get a SyndFeedInfo object from the cache. + * + * @param feedUrl The url of the feed + * @return A SyndFeedInfo or null if it is not in the cache + */ + public SyndFeedInfo getFeedInfo(URL feedUrl); + + /** + * Add a SyndFeedInfo object to the cache + * + * @param feedUrl The url of the feed + * @param syndFeedInfo A SyndFeedInfo for the feed + */ + public void setFeedInfo(URL feedUrl, SyndFeedInfo syndFeedInfo); + + /** + * Removes all items from the cache. + */ + public void clear(); + + /** + * Removes the SyndFeedInfo identified by the url from the cache. + * + * @return The removed SyndFeedInfo + */ + public SyndFeedInfo remove(URL feedUrl); + +} diff --git a/src/main/java/com/rometools/fetcher/impl/HashMapFeedInfoCache.java b/src/main/java/com/rometools/fetcher/impl/HashMapFeedInfoCache.java new file mode 100644 index 0000000..7508ce7 --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/HashMapFeedInfoCache.java @@ -0,0 +1,139 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import java.io.Serializable; +import java.net.URL; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + *+ * A very simple implementation of the {@link com.rometools.fetcher.impl.FeedFetcherCache} + * interface. + *
+ * + *+ * This implementation uses a HashMap to cache retrieved feeds. This implementation is most suitible + * for sort term (client aggregator?) use, as the memory usage will increase over time as the number + * of feeds in the cache increases. + *
+ * + * @author Nick Lothian + * + */ +public class HashMapFeedInfoCache implements FeedFetcherCache, Serializable { + + private static final long serialVersionUID = 1L; + + static HashMapFeedInfoCache instance; + + private Map+ * Constructor for HashMapFeedInfoCache + *
+ * + *+ * Only use this if you want multiple instances of the cache. Usually getInstance() is more + * appropriate. + *
+ * + */ + public HashMapFeedInfoCache() { + setInfoCache(createInfoCache()); + } + + /** + * Get the global instance of the cache + * + * @return an implementation of FeedFetcherCache + */ + public static synchronized FeedFetcherCache getInstance() { + if (instance == null) { + instance = new HashMapFeedInfoCache(); + } + return instance; + } + + protected Map+ * Class to retrieve syndication files via HTTP. + *
+ * + *+ * If passed a {@link com.rometools.fetcher.impl.FeedFetcherCache} in the constructor it will use + * conditional gets to only retrieve modified content. + *
+ * + *+ * The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where supported by the + * server. + *
+ * + *+ * Simple usage: + * + *
+ * // create the cache + * FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache(); + * // retrieve the feed the first time + * // any subsequent request will use conditional gets and only + * // retrieve the resource if it has changed + * SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl); + *+ * + * + * + * @see http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers + * @see http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level + * @see http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html + * @author Nick Lothian + */ +public class HttpURLFeedFetcher extends AbstractFeedFetcher { + + private volatile int connectTimeout = -1; + + static final int POLL_EVENT = 1; + static final int RETRIEVE_EVENT = 2; + static final int UNCHANGED_EVENT = 3; + + private FeedFetcherCache feedInfoCache; + + /** + * Constructor to use HttpURLFeedFetcher without caching of feeds + * + */ + public HttpURLFeedFetcher() { + this(null); + } + + /** + * Constructor to enable HttpURLFeedFetcher to cache feeds + * + * @param feedInfoCache - an instance of the FeedFetcherCache interface + */ + public HttpURLFeedFetcher(final FeedFetcherCache feedInfoCache) { + setFeedInfoCache(feedInfoCache); + } + + @Override + public SyndFeed retrieveFeed(final URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { + return this.retrieveFeed(getUserAgent(), feedUrl); + } + + /** + * Retrieve a feed over HTTP + * + * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve + * @return A {@link com.rometools.rome.feed.synd.SyndFeed} object + * @throws IllegalArgumentException if the URL is null; + * @throws IOException if a TCP error occurs + * @throws FeedException if the feed is not valid + * @throws FetcherException if a HTTP error occurred + */ + @Override + public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { + if (feedUrl == null) { + throw new IllegalArgumentException("null is not a valid URL"); + } + + final URLConnection connection = feedUrl.openConnection(); + if (!(connection instanceof HttpURLConnection)) { + throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url"); + } + final HttpURLConnection httpConnection = (HttpURLConnection) connection; + if (connectTimeout >= 0) { + httpConnection.setConnectTimeout(connectTimeout); + } + // httpConnection.setInstanceFollowRedirects(true); // this is true by default, but can be + // changed on a claswide basis + + final FeedFetcherCache cache = getFeedInfoCache(); + if (cache != null) { + SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl); + setRequestHeaders(connection, syndFeedInfo, userAgent); + httpConnection.connect(); + try { + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection); + + if (syndFeedInfo == null) { + // this is a feed that hasn't been retrieved + syndFeedInfo = new SyndFeedInfo(); + retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection); + } else { + // check the response code + final int responseCode = httpConnection.getResponseCode(); + if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) { + // the response code is not 304 NOT MODIFIED + // This is either because the feed server + // does not support condition gets + // or because the feed hasn't changed + retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection); + } else { + // the feed does not need retrieving + fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection); + } + } + + return syndFeedInfo.getSyndFeed(); + } finally { + httpConnection.disconnect(); + } + } else { + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection); + InputStream inputStream = null; + setRequestHeaders(connection, null, userAgent); + + httpConnection.connect(); + try { + inputStream = httpConnection.getInputStream(); + return getSyndFeedFromStream(inputStream, connection); + } catch (final java.io.IOException e) { + handleErrorCodes(((HttpURLConnection) connection).getResponseCode()); + } finally { + IO.close(inputStream); + httpConnection.disconnect(); + } + // we will never actually get to this line + return null; + } + } + + protected void retrieveAndCacheFeed(final URL feedUrl, final SyndFeedInfo syndFeedInfo, final HttpURLConnection connection) + throws IllegalArgumentException, FeedException, FetcherException, IOException { + handleErrorCodes(connection.getResponseCode()); + + resetFeedInfo(feedUrl, syndFeedInfo, connection); + final FeedFetcherCache cache = getFeedInfoCache(); + // resetting feed info in the cache + // could be needed for some implementations + // of FeedFetcherCache (eg, distributed HashTables) + if (cache != null) { + cache.setFeedInfo(feedUrl, syndFeedInfo); + } + } + + protected void resetFeedInfo(final URL orignalUrl, final SyndFeedInfo syndFeedInfo, final HttpURLConnection connection) throws IllegalArgumentException, + IOException, FeedException { + // need to always set the URL because this may have changed due to 3xx redirects + syndFeedInfo.setUrl(connection.getURL()); + + // the ID is a persistant value that should stay the same even if the URL for the + // feed changes (eg, by 3xx redirects) + syndFeedInfo.setId(orignalUrl.toString()); + + // This will be 0 if the server doesn't support or isn't setting the last modified header + syndFeedInfo.setLastModified(connection.getLastModified()); + + // This will be null if the server doesn't support or isn't setting the ETag header + syndFeedInfo.setETag(connection.getHeaderField("ETag")); + + // get the contents + InputStream inputStream = null; + try { + inputStream = connection.getInputStream(); + SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection); + + final String imHeader = connection.getHeaderField("IM"); + if (isUsingDeltaEncoding() && imHeader != null && imHeader.contains("feed")) { + final FeedFetcherCache cache = getFeedInfoCache(); + if (cache != null && connection.getResponseCode() == 226) { + // client is setup to use http delta encoding and the server supports it and has + // returned a delta encoded response + // This response only includes new items + final SyndFeedInfo cachedInfo = cache.getFeedInfo(orignalUrl); + if (cachedInfo != null) { + final SyndFeed cachedFeed = cachedInfo.getSyndFeed(); + + // set the new feed to be the orginal feed plus the new items + syndFeed = combineFeeds(cachedFeed, syndFeed); + } + } + } + + syndFeedInfo.setSyndFeed(syndFeed); + } finally { + IO.close(inputStream); + } + } + + /** + *
+ * Set appropriate HTTP headers, including conditional get and gzip encoding headers + *
+ * + * @param connection A URLConnection + * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null + * @param userAgent the name of the user-agent to be placed in HTTP-header. + */ + protected void setRequestHeaders(final URLConnection connection, final SyndFeedInfo syndFeedInfo, final String userAgent) { + if (syndFeedInfo != null) { + // set the headers to get feed only if modified + // we support the use of both last modified and eTag headers + if (syndFeedInfo.getLastModified() != null) { + final Object lastModified = syndFeedInfo.getLastModified(); + if (lastModified instanceof Long) { + connection.setIfModifiedSince((Long) syndFeedInfo.getLastModified()); + } + } + if (syndFeedInfo.getETag() != null) { + connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag()); + } + + } + // header to retrieve feed gzipped + connection.setRequestProperty("Accept-Encoding", "gzip"); + connection.addRequestProperty("User-Agent", userAgent); + + if (isUsingDeltaEncoding()) { + connection.addRequestProperty("A-IM", "feed"); + } + } + + private SyndFeed readSyndFeedFromStream(final InputStream inputStream, final URLConnection connection) throws IOException, IllegalArgumentException, + FeedException { + BufferedInputStream is; + if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) { + // handle gzip encoded content + is = new BufferedInputStream(new GZIPInputStream(inputStream)); + } else { + is = new BufferedInputStream(inputStream); + } + + // InputStreamReader reader = new InputStreamReader(is, + // ResponseHandler.getCharacterEncoding(connection)); + + // SyndFeedInput input = new SyndFeedInput(); + + final XmlReader reader; + if (connection.getHeaderField("Content-Type") != null) { + reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true); + } else { + reader = new XmlReader(is, true); + } + + final SyndFeedInput syndFeedInput = new SyndFeedInput(); + syndFeedInput.setPreserveWireFeed(isPreserveWireFeed()); + + return syndFeedInput.build(reader); + + } + + private SyndFeed getSyndFeedFromStream(final InputStream inputStream, final URLConnection connection) throws IOException, IllegalArgumentException, + FeedException { + final SyndFeed feed = readSyndFeedFromStream(inputStream, connection); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed); + return feed; + } + + /** + * @return The FeedFetcherCache used by this fetcher (Could be null) + */ + public synchronized FeedFetcherCache getFeedInfoCache() { + return feedInfoCache; + } + + /** + * @param cache The cache to be used by this fetcher (pass null to stop using a cache) + */ + public synchronized void setFeedInfoCache(final FeedFetcherCache cache) { + feedInfoCache = cache; + } + + /** + * @param timeout see java.net.URLConnection.setConnectTimeout(int timeout) + */ + public synchronized void setConnectTimeout(final int timeout) { + connectTimeout = timeout; + } +} diff --git a/src/main/java/com/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java b/src/main/java/com/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java new file mode 100644 index 0000000..a398bf6 --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java @@ -0,0 +1,81 @@ +package com.rometools.fetcher.impl; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + *+ * An implementation of the {@link com.rometools.fetcher.impl.FeedFetcherCache} interface. + *
+ * + *+ * Unlike the HashMapFeedInfoCache this implementation will not grow unbound + *
+ * + * @author Javier Kohen + * @author Nick Lothian + * + */ +public class LinkedHashMapFeedInfoCache extends HashMapFeedInfoCache { + + private final class CacheImpl extends LinkedHashMap+ * Constructor for HashMapFeedInfoCache + *
+ * + *+ * Only use this if you want multiple instances of the cache. Usually {@link #getInstance()} is + * more appropriate. + *
+ * + * @see #getInstance() + */ + public LinkedHashMapFeedInfoCache() { + super(); + } + + @Override + protected Map+ * Gets the character encoding of a response. (Note that this is different to the + * content-encoding) + *
+ * + * @param contentTypeHeader the value of the content-type HTTP header eg: text/html; + * charset=ISO-8859-4 + * @return the character encoding, eg: ISO-8859-4 + */ + public static String getCharacterEncoding(final String contentTypeHeader) { + if (contentTypeHeader == null) { + return defaultCharacterEncoding; + } + + final Matcher m = characterEncodingPattern.matcher(contentTypeHeader); + // if (!m.matches()) { + if (!m.find()) { + return defaultCharacterEncoding; + } else { + return m.group(1); + } + } +} diff --git a/src/main/java/com/rometools/fetcher/impl/SyndFeedInfo.java b/src/main/java/com/rometools/fetcher/impl/SyndFeedInfo.java new file mode 100644 index 0000000..07c9bfe --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/SyndFeedInfo.java @@ -0,0 +1,160 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import java.io.Serializable; +import java.net.URL; + +import com.rometools.rome.feed.impl.ObjectBean; +import com.rometools.rome.feed.synd.SyndFeed; + +/** + *+ * A class to represent a {@link com.rometools.rome.feed.synd.SyndFeed} and some useful information + * about it. + *
+ * + *+ * This class is thread safe, as expected by the different feed fetcher implementations. + *
+ * + * @author Nick Lothian + */ +public class SyndFeedInfo implements Cloneable, Serializable { + private static final long serialVersionUID = 1L; + + private final ObjectBean _objBean; + private String id; + private URL url; + private Object lastModified; + private String eTag; + private SyndFeed syndFeed; + + public SyndFeedInfo() { + _objBean = new ObjectBean(this.getClass(), this); + } + + /** + * Creates a deep 'bean' clone of the object. + *+ * + * @return a clone of the object. + * @throws CloneNotSupportedException thrown if an element of the object cannot be cloned. + * + */ + @Override + public Object clone() throws CloneNotSupportedException { + return _objBean.clone(); + } + + /** + * Indicates whether some other object is "equal to" this one as defined by the Object equals() + * method. + *
+ * + * @param other he reference object with which to compare. + * @return true if 'this' object is equal to the 'other' object. + * + */ + @Override + public boolean equals(final Object other) { + return _objBean.equals(other); + } + + /** + * Returns a hashcode value for the object. + *
+ * It follows the contract defined by the Object hashCode() method. + *
+ * + * @return the hashcode of the bean object. + * + */ + @Override + public int hashCode() { + return _objBean.hashCode(); + } + + /** + * Returns the String representation for the object. + *
+ *
+ * @return String representation for the object.
+ *
+ */
+ @Override
+ public String toString() {
+ return _objBean.toString();
+ }
+
+ /**
+ * @return the ETag the feed was last retrieved with
+ */
+ public synchronized String getETag() {
+ return eTag;
+ }
+
+ /**
+ * @return the last modified date for the feed
+ */
+ public synchronized Object getLastModified() {
+ return lastModified;
+ }
+
+ /**
+ * @return the URL the feed was served from
+ */
+ public synchronized URL getUrl() {
+ return url;
+ }
+
+ public synchronized void setETag(final String string) {
+ eTag = string;
+ }
+
+ public synchronized void setLastModified(final Object o) {
+ lastModified = o;
+ }
+
+ public synchronized void setUrl(final URL url) {
+ this.url = url;
+ }
+
+ public synchronized SyndFeed getSyndFeed() {
+ return syndFeed;
+ }
+
+ public synchronized void setSyndFeed(final SyndFeed feed) {
+ syndFeed = feed;
+ }
+
+ /**
+ * @return A unique ID to identify the feed
+ */
+ public synchronized String getId() {
+ return id;
+ }
+
+ /**
+ * @param string A unique ID to identify the feed. Note that if the URL of the feed changes this
+ * will remain the same
+ */
+ public synchronized void setId(final String string) {
+ id = string;
+ }
+
+}
diff --git a/src/main/resources/.gitignore b/src/main/resources/.gitignore
new file mode 100644
index 0000000..53b845b
--- /dev/null
+++ b/src/main/resources/.gitignore
@@ -0,0 +1 @@
+# needed to commit empty folder
\ No newline at end of file
diff --git a/src/site/apt/BuildingTheRomeFetcher.apt b/src/site/apt/BuildingTheRomeFetcher.apt
new file mode 100644
index 0000000..83ecc7d
--- /dev/null
+++ b/src/site/apt/BuildingTheRomeFetcher.apt
@@ -0,0 +1,11 @@
+ -----
+ Building the Rome Fetcher
+ -----
+ mkurz
+ -----
+ 2011-08-15 17:34:51.402
+ -----
+
+Building the Rome Fetcher
+
+ The Rome Fetcher can build using Maven 2.
diff --git a/src/site/apt/ChangeLog.apt b/src/site/apt/ChangeLog.apt
new file mode 100644
index 0000000..135af0c
--- /dev/null
+++ b/src/site/apt/ChangeLog.apt
@@ -0,0 +1,113 @@
+ -----
+ Change Log
+ -----
+ mkurz
+ -----
+ 2011-08-15 17:27:20.212
+ -----
+
+Change Log
+
+*Prior to first release (on the way to v0.3)
+
+ [[1]] Updated to handle removal of IO methods using byte streams\
+ Byte Stream IO was removed from Rome itself. The Rome Fetcher is now updated to support this
+
+ [[1]] Add FeedFetcherI interface and FeedFetcherFactory class\
+ There is now a FeedFetcherI interface, which FeedFetcher implements. Use FeedFetcherFactory to create instances of
+ FeedFetcher (as suggested by Joseph Ottinger) (FeedFetcherFactory was later removed)
+
+ [[1]] Event Support Added to FeedFetcherI\
+ The FeedFetcherI interface now supports feed polled, feed retrieved and feed unchanged events
+
+ [[1]] Samples added\
+ Samples are now included with the Rome Fetcher
+
+ [[1]] Unit Tests Added\
+ JUnit based tests which invoke the Rome Fetcher against an embedded Jetty webserver are now included
+
+ [[1]] Bug fixes in the FeedFetcher event model\
+ The JUnit test suite uncovered some bugs in the event model used by the FeedFetcher. These bugs are now fixed.
+
+ [[1]] Refactored the SyndFeedInfo class\
+ SyndFeedInfo now extends ObjectBean
+
+ [[1]] Removed FeedFetcherFactory\
+ The benefit of the FeedFetcherFactory was arguable. Now the client code will need to manage the creation of specific implementations of the FeedFetcher
+
+ []
+
+*Prior to second release (on the way to v0.4)
+
+ [[1]] Refectored to match Rome naming standards\
+ FeedFetcherI renamed to FeedFetcher\
+ #. New FeedFetcher Implementation\
+ HttpClientFeedFetcher uses the Apache Commons HTTP Client
+
+ [[1]] Abstract test classes excluded in project.xml\
+ Tests now run correctly under Maven
+
+ [[1]] Added GZip support to HttpClientFeedFetcher\
+ HttpClientFeedFetcher now supports GZip compression. Tests have been added.
+
+ []
+
+*Prior to third release (on the way to v0.5)
+
+ [[1]] SyndFeedInfo implements Serializable\
+ SyndFeedInfo implements Serializable to make it easier to store
+
+ [[1]] Support for rfc3229 delta encoding\
+ The Fetcher now supports rfc3229 delta encoding. See {{{http://www.ietf.org/rfc/rfc3229.txt}http://www.ietf.org/rfc/rfc3229.txt}} and
+ {{{http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html}http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html}}.
+ Note that this is support is experimental and disabled by default
+
+ []
+
+*Prior to 0.6
+
+ [[1]] Feed passed to FetcherEvents\
+ When a feed is retrieved it is now passed to the Fetcher Event. This makes it easier to code applications using an event oriented style.
+
+ []
+
+*Prior to 0.7
+
+ [[1]] Fix for URL Connection leak\
+ In some circumstances URLConnection objects were not closed. This could cause problems in long\-running application.
+
+ []
+
+*0.8 was never released
+
+*Prior to 0.9
+
+ [[1]] Fix for potential synchronization issue\
+ There was the possibility of synchronization issues in the FeedFetcher. Fixed, thanks to suggestions from Javier Kohen.
+
+ [[1]] New LinkedHashMapFeedInfoCache FeedFetcherCache implementation\
+ The new LinkedHashMapFeedInfoCache has the advantage that it will not grow unbound
+
+ []
+
+*Prior to 1.0RC2
+
+ [[1]] BeanInfo class added for AbstractFeedFetcher\
+ com.rometools.rome.fetcher.impl.AbstractFeedFetcherBeanInfo was created to allow introspection to correctly find the events
+
+ [[1]] Callback to allow access to HttpClient HttpMethod object\
+ Add a HttpClientMethodCallbackIntf to allow the calling code to modify the HttpClient HttpMethod used to make the request
+ (eg, add additinal headers, etc.) Also fixes a reported bug where the user agent wasn't being set properly
+
+ [[1]] Support for clearing cache\
+ See {{{http://java.net/jira/browse/ROME\-119}http://java.net/jira/browse/ROME\-119}} for details
+
+ []
+
+*Prior to 1.0
+
+ [[1]] Support for preserving wire feed data.\
+ The fetcher now has a setPreserveWireFeed() method which will setup ROME to preserve WireFeed data. See
+ {{{http://rometools.github.io/rome/PreservingWireFeeds.html}PreservingWireFeeds}} for further information.
+
+ []
diff --git a/src/site/apt/Dependencies/HttpClientFeedFetcher.apt b/src/site/apt/Dependencies/HttpClientFeedFetcher.apt
new file mode 100644
index 0000000..e67bfdc
--- /dev/null
+++ b/src/site/apt/Dependencies/HttpClientFeedFetcher.apt
@@ -0,0 +1,13 @@
+ -----
+ HttpClientFeedFetcher
+ -----
+ mkurz
+ -----
+ 2011-08-15 17:40:11.468
+ -----
+
+HttpClientFeedFetcher
+
+
+ An implementation of the <<
+ * It aggregates a list of RSS/Atom feeds (they can be of different types) into a single feed of the
+ * specified type.
+ *
+ * Converted from the original FeedAggregator sample
+ *
+ *
+ * @author Alejandro Abdelnur
+ * @author Nick Lothian
+ *
+ */
+public class FeedReader {
+
+ public static void main(final String[] args) {
+
+ boolean ok = false;
+
+ if (args.length == 1) {
+
+ try {
+ final URL feedUrl = new URL(args[0]);
+ final FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance();
+ final FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache);
+
+ final FetcherEventListenerImpl listener = new FetcherEventListenerImpl();
+
+ fetcher.addFetcherEventListener(listener);
+
+ System.err.println("Retrieving feed " + feedUrl);
+ // Retrieve the feed.
+ // We will get a Feed Polled Event and then a
+ // Feed Retrieved event (assuming the feed is valid)
+ final SyndFeed feed = fetcher.retrieveFeed(feedUrl);
+
+ System.err.println(feedUrl + " retrieved");
+ System.err.println(feedUrl + " has a title: " + feed.getTitle() + " and contains " + feed.getEntries().size() + " entries.");
+ // We will now retrieve the feed again. If the feed is unmodified
+ // and the server supports conditional gets, we will get a "Feed
+ // Unchanged" event after the Feed Polled event
+ System.err.println("Polling " + feedUrl + " again to test conditional get support.");
+ fetcher.retrieveFeed(feedUrl);
+ System.err.println("If a \"Feed Unchanged\" event fired then the server supports conditional gets.");
+
+ ok = true;
+
+ } catch (final Exception ex) {
+ System.out.println("ERROR: " + ex.getMessage());
+ ex.printStackTrace();
+ }
+
+ }
+
+ if (!ok) {
+ System.out.println();
+ System.out.println("FeedReader reads and prints any RSS/Atom feed type.");
+ System.out.println("The first parameter must be the URL of the feed to read.");
+ System.out.println();
+ }
+
+ }
+
+ static class FetcherEventListenerImpl implements FetcherListener {
+ /**
+ * @see com.rometools.rome.fetcher.FetcherListener#fetcherEvent(com.rometools.rome.fetcher.FetcherEvent)
+ */
+ @Override
+ public void fetcherEvent(final FetcherEvent event) {
+ final String eventType = event.getEventType();
+ if (FetcherEvent.EVENT_TYPE_FEED_POLLED.equals(eventType)) {
+ System.err.println("\tEVENT: Feed Polled. URL = " + event.getUrlString());
+ } else if (FetcherEvent.EVENT_TYPE_FEED_RETRIEVED.equals(eventType)) {
+ System.err.println("\tEVENT: Feed Retrieved. URL = " + event.getUrlString());
+ } else if (FetcherEvent.EVENT_TYPE_FEED_UNCHANGED.equals(eventType)) {
+ System.err.println("\tEVENT: Feed Unchanged. URL = " + event.getUrlString());
+ }
+ }
+ }
+}
diff --git a/src/test/resources/atom_1.0.xml b/src/test/resources/atom_1.0.xml
new file mode 100644
index 0000000..9d83939
--- /dev/null
+++ b/src/test/resources/atom_1.0.xml
@@ -0,0 +1,78 @@
+
+