diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..83a7c0c --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.classpath +.project +.settings +target +.idea +*.iml +atlassian-ide-plugin.xml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f43cdb1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,14 @@ +Copyright 2004 Sun Microsystems, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..49d2dfe --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +rome +==== + +ROME is a set of RSS and Atom Utilities for Java. It makes it easy to work in Java with most syndication formats: RSS 0.90, RSS 0.91 Netscape, +RSS 0.91 Userland, RSS 0.92, RSS 0.93, RSS 0.94, RSS 1.0, RSS 2.0, Atom 0.3, Atom 1.0 + +More Information: http://rometools.github.io/rome-fetcher/ diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..5d86063 --- /dev/null +++ b/pom.xml @@ -0,0 +1,123 @@ + + + + 4.0.0 + + + com.rometools + rome-parent + 1.6.0-SNAPSHOT + + + rome-fetcher + 1.6.0-SNAPSHOT + jar + + rome-fetcher + + A well behaved feed fetcher API for ROME + + http://rometools.github.io/rome-fetcher/ + + + scm:git:ssh://github.com/rometools/rome-fetcher.git + scm:git:ssh://git@github.com/rometools/rome-fetcher.git + https://github.com/rometools/rome-fetcher + + + + + Nick Lothian + http://nicklothian.com + + + Robert Cooper + kebernet@gmail.comM + + + + + + sonatype-nexus-snapshots + https://oss.sonatype.org/content/repositories/snapshots + + false + + + true + + + + + + + + org.apache.maven.plugins + maven-scm-publish-plugin + + gh-pages + ${project.scm.developerConnection} + ${project.build.directory}/site + + + + org.apache.felix + maven-bundle-plugin + + + + com.rometools.fetcher, + com.rometools.fetcher.impl + + + com.rometools.rome.feed.impl, + com.rometools.rome.feed.synd, + com.rometools.rome.io, + com.rometools.utils, + org.apache.commons.httpclient;resolution:=optional, + org.apache.commons.httpclient.auth;resolution:=optional, + org.apache.commons.httpclient.methods;resolution:=optional, + org.apache.commons.httpclient.params;resolution:=optional, + org.slf4j + + + + + + + + + + + com.rometools + rome + 1.6.0-SNAPSHOT + + + commons-httpclient + commons-httpclient + + + + javax.servlet + servlet-api + test + + + jetty + jetty + test + + + ch.qos.logback + logback-classic + test + + + junit + junit + test + + + + diff --git a/src/main/java/com/rometools/fetcher/FeedFetcher.java b/src/main/java/com/rometools/fetcher/FeedFetcher.java new file mode 100644 index 0000000..05f0868 --- /dev/null +++ b/src/main/java/com/rometools/fetcher/FeedFetcher.java @@ -0,0 +1,120 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher; + +import java.io.IOException; +import java.net.URL; + +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.io.FeedException; + +public interface FeedFetcher { + /** + *

+ * The default user agent. It is not marked final so buggy java compiler will not write this + * string into all classes that reference it. + *

+ * + *

+ * http://tinyurl.com/64t5n points to https://rome.dev.java.net Some servers ban user agents + * with "Java" in the name. + *

+ * + */ + public static String DEFAULT_USER_AGENT = "Rome Client (http://tinyurl.com/64t5n)"; + + /** + * @param string The User-Agent to sent to servers + */ + public abstract void setUserAgent(String string); + + /** + * @return the User-Agent currently being sent to servers + */ + public abstract String getUserAgent(); + + /** + *

+ * Turn on or off rfc3229 delta encoding + *

+ * + *

+ * See http://www.ietf.org/rfc/rfc3229.txt and + * http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html + *

+ * + *

+ * NOTE: This is experimental and feedback is welcome! + *

+ * + * @param useDeltaEncoding + */ + public abstract void setUsingDeltaEncoding(boolean useDeltaEncoding); + + /** + *

+ * Is this fetcher using rfc3229 delta encoding? + *

+ * + * @return + */ + public abstract boolean isUsingDeltaEncoding(); + + /** + *

+ * Add a FetcherListener. + *

+ * + *

+ * The FetcherListener will receive an FetcherEvent when a Fetcher event (feed polled, + * retrieved, etc) occurs + *

+ * + * @param listener The FetcherListener to recieve the event + */ + public abstract void addFetcherEventListener(FetcherListener listener); + + /** + *

+ * Remove a FetcherListener + *

+ * + * @param listener The FetcherListener to remove + */ + public abstract void removeFetcherEventListener(FetcherListener listener); + + /** + * Retrieve a feed over HTTP + * + * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve + * @return A {@link com.rometools.rome.feed.synd.SyndFeed} object + * @throws IllegalArgumentException if the URL is null; + * @throws IOException if a TCP error occurs + * @throws FeedException if the feed is not valid + * @throws FetcherException if a HTTP error occurred + */ + public abstract SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException; + + public SyndFeed retrieveFeed(String userAgent, URL url) throws IllegalArgumentException, IOException, FeedException, FetcherException; + + /** + * If set to true, the WireFeed will be made accessible from the SyndFeed object returned from + * the Fetcher via the originalWireFeed() method. Each Entry in the feed will have the + * corresponding wireEntry property set. + */ + void setPreserveWireFeed(boolean preserveWireFeed); +} diff --git a/src/main/java/com/rometools/fetcher/FetcherEvent.java b/src/main/java/com/rometools/fetcher/FetcherEvent.java new file mode 100644 index 0000000..6ed4a0c --- /dev/null +++ b/src/main/java/com/rometools/fetcher/FetcherEvent.java @@ -0,0 +1,90 @@ +package com.rometools.fetcher; + +import java.util.EventObject; + +import com.rometools.rome.feed.synd.SyndFeed; + +/** + * Implementation note: FetcherEvent is not thread safe. Make sure that they are only ever accessed + * by one thread. If necessary, make all getters and setters synchronized, or alternatively make all + * fields final. + * + * @author nl + */ +public class FetcherEvent extends EventObject { + + private static final long serialVersionUID = 1L; + + public static final String EVENT_TYPE_FEED_POLLED = "FEED_POLLED"; + public static final String EVENT_TYPE_FEED_RETRIEVED = "FEED_RETRIEVED"; + public static final String EVENT_TYPE_FEED_UNCHANGED = "FEED_UNCHANGED"; + + private String eventType; + private String urlString; + private SyndFeed feed; + + public FetcherEvent(final Object source) { + super(source); + } + + public FetcherEvent(final Object source, final String urlStr, final String eventType) { + this(source); + setUrlString(urlStr); + setEventType(eventType); + } + + public FetcherEvent(final Object source, final String urlStr, final String eventType, final SyndFeed feed) { + this(source, urlStr, eventType); + setFeed(feed); + } + + /** + * @return Returns the feed. + * + *

+ * The feed will only be set if the eventType is EVENT_TYPE_FEED_RETRIEVED + *

+ */ + public SyndFeed getFeed() { + return feed; + } + + /** + * @param feed The feed to set. + * + *

+ * The feed will only be set if the eventType is EVENT_TYPE_FEED_RETRIEVED + *

+ */ + public void setFeed(final SyndFeed feed) { + this.feed = feed; + } + + /** + * @return Returns the eventType. + */ + public String getEventType() { + return eventType; + } + + /** + * @param eventType The eventType to set. + */ + public void setEventType(final String eventType) { + this.eventType = eventType; + } + + /** + * @return Returns the urlString. + */ + public String getUrlString() { + return urlString; + } + + /** + * @param urlString The urlString to set. + */ + public void setUrlString(final String urlString) { + this.urlString = urlString; + } +} diff --git a/src/main/java/com/rometools/fetcher/FetcherException.java b/src/main/java/com/rometools/fetcher/FetcherException.java new file mode 100644 index 0000000..447ab2f --- /dev/null +++ b/src/main/java/com/rometools/fetcher/FetcherException.java @@ -0,0 +1,51 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher; + +/** + * @author Nick Lothian + * + */ +public class FetcherException extends Exception { + private static final long serialVersionUID = 1L; + + int responseCode; + + public FetcherException(final Throwable cause) { + super(); + initCause(cause); + } + + public FetcherException(final String message, final Throwable cause) { + super(message); + initCause(cause); + } + + public FetcherException(final String message) { + super(message); + } + + public FetcherException(final int responseCode, final String message) { + this(message); + this.responseCode = responseCode; + } + + public int getResponseCode() { + return responseCode; + } + +} diff --git a/src/main/java/com/rometools/fetcher/FetcherListener.java b/src/main/java/com/rometools/fetcher/FetcherListener.java new file mode 100644 index 0000000..617532c --- /dev/null +++ b/src/main/java/com/rometools/fetcher/FetcherListener.java @@ -0,0 +1,16 @@ +package com.rometools.fetcher; + +import java.util.EventListener; + +public interface FetcherListener extends EventListener { + + /** + *

+ * Called when a fetcher event occurs + *

+ * + * @param event the event that fired + */ + public void fetcherEvent(FetcherEvent event); + +} diff --git a/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcher.java b/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcher.java new file mode 100644 index 0000000..15f07ac --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcher.java @@ -0,0 +1,225 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.rometools.fetcher.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URLConnection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Properties; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.rometools.fetcher.FeedFetcher; +import com.rometools.fetcher.FetcherEvent; +import com.rometools.fetcher.FetcherException; +import com.rometools.fetcher.FetcherListener; +import com.rometools.rome.feed.synd.SyndFeed; + +public abstract class AbstractFeedFetcher implements FeedFetcher { + + private static final Logger LOG = LoggerFactory.getLogger(AbstractFeedFetcher.class); + + private final Set listeners; + private String userAgent; + private boolean usingDeltaEncoding; + private boolean preserveWireFeed; + + public AbstractFeedFetcher() { + + listeners = Collections.synchronizedSet(new HashSet()); + + final Properties props = new Properties(System.getProperties()); + final String resourceName = "fetcher.properties"; + + try { + + InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream(resourceName); + if (inputStream == null) { + inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(resourceName); + } + + if (inputStream != null) { + props.load(inputStream); + System.getProperties().putAll(props); + inputStream.close(); + } else { + LOG.warn("Could not find {} on classpath", resourceName); + } + + } catch (final IOException e) { + // do nothing - we don't want to fail just because we could not find the version + LOG.error("Error reading {} from classpath: {}", resourceName, e.getMessage()); + } + + setUserAgent(DEFAULT_USER_AGENT + " Ver: " + System.getProperty("rome.fetcher.version", "UNKNOWN")); + } + + /** + * @return the User-Agent currently being sent to servers + */ + @Override + public synchronized String getUserAgent() { + return userAgent; + } + + /** + * @param string The User-Agent to sent to servers + */ + @Override + public synchronized void setUserAgent(final String string) { + userAgent = string; + } + + /** + * @param eventType The event type to fire + * @param connection the current connection + */ + protected void fireEvent(final String eventType, final URLConnection connection) { + fireEvent(eventType, connection.getURL().toExternalForm(), null); + } + + /** + * @param eventType The event type to fire + * @param connection the current connection + * @param feed The feed to pass to the event + */ + protected void fireEvent(final String eventType, final URLConnection connection, final SyndFeed feed) { + fireEvent(eventType, connection.getURL().toExternalForm(), feed); + } + + /** + * @param eventType The event type to fire + * @param urlStr the current url as a string + */ + protected void fireEvent(final String eventType, final String urlStr) { + fireEvent(eventType, urlStr, null); + } + + /** + * @param eventType The event type to fire + * @param urlStr the current url as a string + * @param feed The feed to pass to the event + */ + protected void fireEvent(final String eventType, final String urlStr, final SyndFeed feed) { + final FetcherEvent fetcherEvent = new FetcherEvent(this, urlStr, eventType, feed); + synchronized (listeners) { + for (final FetcherListener fetcherEventListener : listeners) { + fetcherEventListener.fetcherEvent(fetcherEvent); + } + } + } + + @Override + public void addFetcherEventListener(final FetcherListener listener) { + if (listener != null) { + listeners.add(listener); + } + } + + @Override + public void removeFetcherEventListener(final FetcherListener listener) { + if (listener != null) { + listeners.remove(listener); + } + } + + /** + * @return Returns the useDeltaEncoding. + */ + @Override + public synchronized boolean isUsingDeltaEncoding() { + return usingDeltaEncoding; + } + + /** + * @param useDeltaEncoding The useDeltaEncoding to set. + */ + @Override + public synchronized void setUsingDeltaEncoding(final boolean useDeltaEncoding) { + usingDeltaEncoding = useDeltaEncoding; + } + + /** + *

+ * Handles HTTP error codes. + *

+ * + * @param responseCode the HTTP response code + * @throws FetcherException if response code is in the range 400 to 599 inclusive + */ + protected void handleErrorCodes(final int responseCode) throws FetcherException { + // Handle 2xx codes as OK, so ignore them here + // 3xx codes are handled by the HttpURLConnection class + if (responseCode == 403) { + // Authentication is required + throwAuthenticationError(responseCode); + } else if (responseCode >= 400 && responseCode < 500) { + throw4XXError(responseCode); + } else if (responseCode >= 500 && responseCode < 600) { + throw new FetcherException(responseCode, "The server encounted an error. HTTP Response code was:" + responseCode); + } + } + + protected void throw4XXError(final int responseCode) throws FetcherException { + throw new FetcherException(responseCode, "The requested resource could not be found. HTTP Response code was:" + responseCode); + } + + protected void throwAuthenticationError(final int responseCode) throws FetcherException { + throw new FetcherException(responseCode, "Authentication required for that resource. HTTP Response code was:" + responseCode); + } + + /** + *

+ * Combine the entries in two feeds into a single feed. + *

+ * + *

+ * The returned feed will have the same data as the newFeed parameter, with the entries from + * originalFeed appended to the end of its entries. + *

+ * + * @param originalFeed + * @param newFeed + * @return + */ + public static SyndFeed combineFeeds(final SyndFeed originalFeed, final SyndFeed newFeed) { + try { + final SyndFeed result = (SyndFeed) newFeed.clone(); + result.getEntries().addAll(result.getEntries().size(), originalFeed.getEntries()); + return result; + } catch (final CloneNotSupportedException e) { + final IllegalArgumentException iae = new IllegalArgumentException("Cannot clone feed"); + iae.initCause(e); + throw iae; + } + } + + public boolean isPreserveWireFeed() { + return preserveWireFeed; + } + + @Override + public void setPreserveWireFeed(final boolean preserveWireFeed) { + this.preserveWireFeed = preserveWireFeed; + } + +} diff --git a/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java b/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java new file mode 100644 index 0000000..5bac21e --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/AbstractFeedFetcherBeanInfo.java @@ -0,0 +1,33 @@ +package com.rometools.fetcher.impl; + +import java.beans.EventSetDescriptor; +import java.beans.SimpleBeanInfo; +import java.lang.reflect.Method; + +import com.rometools.fetcher.FetcherEvent; +import com.rometools.fetcher.FetcherListener; + +public class AbstractFeedFetcherBeanInfo extends SimpleBeanInfo { + + @Override + public EventSetDescriptor[] getEventSetDescriptors() { + + try { + + // get the class object which we'll describe + final Class clz = AbstractFeedFetcher.class; + final Method addMethod = clz.getMethod("addFetcherEventListener", new Class[] { FetcherListener.class }); + final Method removeMethod = clz.getMethod("removeFetcherEventListener", new Class[] { FetcherListener.class }); + final Method listenerMethod = FetcherListener.class.getMethod("fetcherEvent", new Class[] { FetcherEvent.class }); + final EventSetDescriptor est = new EventSetDescriptor("fetcherEvent", clz, new Method[] { listenerMethod }, addMethod, removeMethod); + return new EventSetDescriptor[] { est }; + + } catch (final Exception e) { + // IntrospectionException, SecurityException and/or NoSuchMethodException can be thrown + // here. The best we can do is to convert them to runtime exceptions + throw new RuntimeException(e); + } + + } + +} diff --git a/src/main/java/com/rometools/fetcher/impl/DiskFeedInfoCache.java b/src/main/java/com/rometools/fetcher/impl/DiskFeedInfoCache.java new file mode 100644 index 0000000..92bc9b1 --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/DiskFeedInfoCache.java @@ -0,0 +1,158 @@ +/* + * Copyright 2005 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.rometools.fetcher.impl; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.net.URL; + +import com.rometools.utils.IO; + +/** + * Disk based feed cache. + */ +public class DiskFeedInfoCache implements FeedFetcherCache { + + protected String cachePath = null; + + public DiskFeedInfoCache(final String cachePath) { + this.cachePath = cachePath; + } + + @Override + public SyndFeedInfo getFeedInfo(final URL url) { + final String fileName = generateFilename(url); + return getFeedInfo(fileName); + } + + @Override + public void setFeedInfo(final URL url, final SyndFeedInfo feedInfo) { + + final String fileName = generateFilename(url); + + FileOutputStream fos = null; + ObjectOutputStream oos = null; + + try { + + fos = new FileOutputStream(fileName); + oos = new ObjectOutputStream(fos); + oos.writeObject(feedInfo); + fos.flush(); + + } catch (final FileNotFoundException e) { + + throw new RuntimeException("Error while writing to cache", e); + + } catch (final IOException e) { + + throw new RuntimeException("Error while writing to cache", e); + + } finally { + + IO.closeQuietly(fos); + IO.closeQuietly(oos); + + } + + } + + @Override + public synchronized void clear() { + final File file = new File(cachePath); + // only do the delete if the directory exists + if (file.exists() && file.canWrite()) { + // make the directory empty + final String[] files = file.list(); + final int len = files.length; + for (int i = 0; i < len; i++) { + final File deleteMe = new File(cachePath + File.separator + files[i]); + deleteMe.delete(); + } + // don't delete the cache directory + } + } + + @Override + public SyndFeedInfo remove(final URL url) { + final String fileName = generateFilename(url); + final SyndFeedInfo info = getFeedInfo(fileName); + if (info != null) { + final File file = new File(fileName); + if (file.exists()) { + file.delete(); + } + } + return info; + } + + private SyndFeedInfo getFeedInfo(final String fileName) { + + SyndFeedInfo info = null; + FileInputStream fis = null; + ObjectInputStream ois = null; + + try { + + fis = new FileInputStream(fileName); + ois = new ObjectInputStream(fis); + info = (SyndFeedInfo) ois.readObject(); + + } catch (final FileNotFoundException e) { + + // feed is not cached yet + + } catch (final ClassNotFoundException e) { + + throw new RuntimeException("Unable to read from cache", e); + + } catch (final IOException e) { + + throw new RuntimeException("Unable to read from cache", e); + + } finally { + + IO.closeQuietly(fis); + IO.closeQuietly(ois); + + } + + return info; + + } + + private static String replaceNonAlphanumeric(final String string, final char character) { + final StringBuffer buffer = new StringBuffer(string.length()); + for (final char singleChar : string.toCharArray()) { + if (Character.isLetterOrDigit(singleChar)) { + buffer.append(singleChar); + } else { + buffer.append(character); + } + } + return buffer.toString(); + } + + private String generateFilename(final URL url) { + return cachePath + File.separator + "feed_" + replaceNonAlphanumeric(url.toString(), '_').trim(); + } + +} diff --git a/src/main/java/com/rometools/fetcher/impl/FeedFetcherCache.java b/src/main/java/com/rometools/fetcher/impl/FeedFetcherCache.java new file mode 100644 index 0000000..4c0ff0a --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/FeedFetcherCache.java @@ -0,0 +1,60 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import java.net.URL; + +/** + *

+ * An interface to allow caching of feed details. Implementing this allows the + * {@link com.rometools.fetcher.io.HttpURLFeedFetcher} class to enable conditional gets + *

+ * + * @author Nick Lothian + * + */ +public interface FeedFetcherCache { + + /** + * Get a SyndFeedInfo object from the cache. + * + * @param feedUrl The url of the feed + * @return A SyndFeedInfo or null if it is not in the cache + */ + public SyndFeedInfo getFeedInfo(URL feedUrl); + + /** + * Add a SyndFeedInfo object to the cache + * + * @param feedUrl The url of the feed + * @param syndFeedInfo A SyndFeedInfo for the feed + */ + public void setFeedInfo(URL feedUrl, SyndFeedInfo syndFeedInfo); + + /** + * Removes all items from the cache. + */ + public void clear(); + + /** + * Removes the SyndFeedInfo identified by the url from the cache. + * + * @return The removed SyndFeedInfo + */ + public SyndFeedInfo remove(URL feedUrl); + +} diff --git a/src/main/java/com/rometools/fetcher/impl/HashMapFeedInfoCache.java b/src/main/java/com/rometools/fetcher/impl/HashMapFeedInfoCache.java new file mode 100644 index 0000000..7508ce7 --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/HashMapFeedInfoCache.java @@ -0,0 +1,139 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import java.io.Serializable; +import java.net.URL; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + *

+ * A very simple implementation of the {@link com.rometools.fetcher.impl.FeedFetcherCache} + * interface. + *

+ * + *

+ * This implementation uses a HashMap to cache retrieved feeds. This implementation is most suitible + * for sort term (client aggregator?) use, as the memory usage will increase over time as the number + * of feeds in the cache increases. + *

+ * + * @author Nick Lothian + * + */ +public class HashMapFeedInfoCache implements FeedFetcherCache, Serializable { + + private static final long serialVersionUID = 1L; + + static HashMapFeedInfoCache instance; + + private Map infoCache; + + /** + *

+ * Constructor for HashMapFeedInfoCache + *

+ * + *

+ * Only use this if you want multiple instances of the cache. Usually getInstance() is more + * appropriate. + *

+ * + */ + public HashMapFeedInfoCache() { + setInfoCache(createInfoCache()); + } + + /** + * Get the global instance of the cache + * + * @return an implementation of FeedFetcherCache + */ + public static synchronized FeedFetcherCache getInstance() { + if (instance == null) { + instance = new HashMapFeedInfoCache(); + } + return instance; + } + + protected Map createInfoCache() { + return Collections.synchronizedMap(new HashMap()); + } + + protected Object get(final Object key) { + return getInfoCache().get(key); + } + + /** + * @see extensions.io.FeedFetcherCache#getFeedInfo(java.net.URL) + */ + @Override + public SyndFeedInfo getFeedInfo(final URL feedUrl) { + return (SyndFeedInfo) get(feedUrl.toString()); + } + + protected void put(final String key, final SyndFeedInfo value) { + getInfoCache().put(key, value); + } + + /** + * @see extensions.io.FeedFetcherCache#setFeedInfo(java.net.URL, extensions.io.SyndFeedInfo) + */ + @Override + public void setFeedInfo(final URL feedUrl, final SyndFeedInfo syndFeedInfo) { + put(feedUrl.toString(), syndFeedInfo); + } + + protected synchronized final Map getInfoCache() { + return infoCache; + } + + /** + * The API of this class indicates that map must thread safe. In other words, be sure to wrap it + * in a synchronized map unless you know what you are doing. + * + * @param map the map to use as the info cache. + */ + protected synchronized final void setInfoCache(final Map map) { + infoCache = map; + } + + /** + * @see com.rometools.rome.fetcher.impl.FeedFetcherCache#clear() + */ + @Override + public void clear() { + synchronized (infoCache) { + infoCache.clear(); + } + } + + /** + * @see com.rometools.rome.fetcher.impl.FeedFetcherCache#remove(java.net.URL) + */ + @Override + public SyndFeedInfo remove(final URL url) { + if (url == null) { + return null; + } + + return infoCache.remove(url.toString()); + } + +} diff --git a/src/main/java/com/rometools/fetcher/impl/HttpClientFeedFetcher.java b/src/main/java/com/rometools/fetcher/impl/HttpClientFeedFetcher.java new file mode 100644 index 0000000..c15833f --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/HttpClientFeedFetcher.java @@ -0,0 +1,386 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.apache.commons.httpclient.Credentials; +import org.apache.commons.httpclient.Header; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.HttpMethodRetryHandler; +import org.apache.commons.httpclient.HttpState; +import org.apache.commons.httpclient.auth.AuthScope; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.params.HttpClientParams; +import org.apache.commons.httpclient.params.HttpMethodParams; + +import com.rometools.fetcher.FetcherEvent; +import com.rometools.fetcher.FetcherException; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.io.FeedException; +import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.XmlReader; +import com.rometools.utils.IO; + +/** + * @author Nick Lothian + */ +public class HttpClientFeedFetcher extends AbstractFeedFetcher { + + private CredentialSupplier credentialSupplier; + private FeedFetcherCache feedInfoCache; + private volatile HttpClientMethodCallbackIntf httpClientMethodCallback; + private volatile HttpClientParams httpClientParams; + private Map customRequestHeaders; + + public HttpClientFeedFetcher() { + this(null, null); + } + + public HttpClientFeedFetcher(final FeedFetcherCache cache) { + this(cache, null); + } + + public HttpClientFeedFetcher(final FeedFetcherCache cache, final CredentialSupplier credentialSupplier) { + setHttpClientParams(new HttpClientParams()); + setFeedInfoCache(cache); + setCredentialSupplier(credentialSupplier); + } + + @Override + public SyndFeed retrieveFeed(final URL url) throws IllegalArgumentException, IOException, FeedException, FetcherException { + return this.retrieveFeed(getUserAgent(), url); + } + + @Override + public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { + + if (feedUrl == null) { + throw new IllegalArgumentException("null is not a valid URL"); + } + + final HttpClient client = new HttpClient(httpClientParams); + + if (credentialSupplier != null) { + + final HttpClientParams params = client.getParams(); + params.setAuthenticationPreemptive(true); + + final String host = feedUrl.getHost(); + final Credentials credentials = credentialSupplier.getCredentials(null, host); + if (credentials != null) { + final AuthScope authScope = new AuthScope(host, -1); + final HttpState state = client.getState(); + state.setCredentials(authScope, credentials); + } + + } + + System.setProperty("httpclient.useragent", userAgent); + + final String urlStr = feedUrl.toString(); + final HttpMethod method = new GetMethod(urlStr); + + if (customRequestHeaders == null) { + method.addRequestHeader("Accept-Encoding", "gzip"); + method.addRequestHeader("User-Agent", userAgent); + + } else { + for (final Map.Entry entry : customRequestHeaders.entrySet()) { + method.addRequestHeader(entry.getKey(), entry.getValue()); + } + if (!customRequestHeaders.containsKey("Accept-Encoding")) { + method.addRequestHeader("Accept-Encoding", "gzip"); + } + if (!customRequestHeaders.containsKey("User-Agent")) { + method.addRequestHeader("User-Agent", userAgent); + } + } + + method.setFollowRedirects(true); + + if (httpClientMethodCallback != null) { + synchronized (httpClientMethodCallback) { + httpClientMethodCallback.afterHttpClientMethodCreate(method); + } + } + + final FeedFetcherCache cache = getFeedInfoCache(); + + if (cache != null) { + // retrieve feed + try { + + if (isUsingDeltaEncoding()) { + method.setRequestHeader("A-IM", "feed"); + } + + // try to get the feed info from the cache + SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl); + + if (syndFeedInfo != null) { + + method.setRequestHeader("If-None-Match", syndFeedInfo.getETag()); + + final Object lastModifiedHeader = syndFeedInfo.getLastModified(); + if (lastModifiedHeader instanceof String) { + method.setRequestHeader("If-Modified-Since", (String) lastModifiedHeader); + } + + } + + final int statusCode = client.executeMethod(method); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr); + handleErrorCodes(statusCode); + + SyndFeed feed = getFeed(syndFeedInfo, urlStr, method, statusCode); + + syndFeedInfo = buildSyndFeedInfo(feedUrl, urlStr, method, feed, statusCode); + + cache.setFeedInfo(feedUrl, syndFeedInfo); + + // the feed may have been modified to pick up cached values + // (eg - for delta encoding) + feed = syndFeedInfo.getSyndFeed(); + + return feed; + + } finally { + + method.releaseConnection(); + + } + + } else { + + // cache is not in use + try { + + final int statusCode = client.executeMethod(method); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr); + handleErrorCodes(statusCode); + + return getFeed(null, urlStr, method, statusCode); + + } finally { + + method.releaseConnection(); + + } + + } + + } + + private SyndFeed getFeed(final SyndFeedInfo syndFeedInfo, final String urlStr, final HttpMethod method, final int statusCode) throws IOException, + HttpException, FetcherException, FeedException { + + if (statusCode == HttpURLConnection.HTTP_NOT_MODIFIED && syndFeedInfo != null) { + fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, urlStr); + return syndFeedInfo.getSyndFeed(); + } + + final SyndFeed feed = retrieveFeed(urlStr, method); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, urlStr, feed); + return feed; + } + + private SyndFeedInfo buildSyndFeedInfo(final URL feedUrl, final String urlStr, final HttpMethod method, SyndFeed feed, final int statusCode) + throws MalformedURLException { + + SyndFeedInfo syndFeedInfo; + syndFeedInfo = new SyndFeedInfo(); + + // this may be different to feedURL because of 3XX redirects + syndFeedInfo.setUrl(new URL(urlStr)); + syndFeedInfo.setId(feedUrl.toString()); + + final Header imHeader = method.getResponseHeader("IM"); + if (imHeader != null && imHeader.getValue().contains("feed") && isUsingDeltaEncoding()) { + + final FeedFetcherCache cache = getFeedInfoCache(); + + if (cache != null && statusCode == 226) { + // client is setup to use http delta encoding and the server supports it and has + // returned a delta encoded response. This response only includes new items + final SyndFeedInfo cachedInfo = cache.getFeedInfo(feedUrl); + + if (cachedInfo != null) { + final SyndFeed cachedFeed = cachedInfo.getSyndFeed(); + + // set the new feed to be the orginal feed plus the new items + feed = combineFeeds(cachedFeed, feed); + } + } + } + + final Header lastModifiedHeader = method.getResponseHeader("Last-Modified"); + if (lastModifiedHeader != null) { + syndFeedInfo.setLastModified(lastModifiedHeader.getValue()); + } + + final Header eTagHeader = method.getResponseHeader("ETag"); + if (eTagHeader != null) { + syndFeedInfo.setETag(eTagHeader.getValue()); + } + + syndFeedInfo.setSyndFeed(feed); + + return syndFeedInfo; + } + + private SyndFeed retrieveFeed(final String urlStr, final HttpMethod method) throws IOException, HttpException, FetcherException, FeedException { + + final Header contentEncodingHeader = method.getResponseHeader("Content-Encoding"); + + final InputStream stream; + if (contentEncodingHeader != null && "gzip".equalsIgnoreCase(contentEncodingHeader.getValue())) { + stream = new GZIPInputStream(method.getResponseBodyAsStream()); + } else { + stream = method.getResponseBodyAsStream(); + } + + try { + + final Header contentTypeHeader = method.getResponseHeader("Content-Type"); + + final XmlReader reader; + if (contentTypeHeader != null) { + reader = new XmlReader(stream, contentTypeHeader.getValue(), true); + } else { + reader = new XmlReader(stream, true); + } + + final SyndFeedInput syndFeedInput = new SyndFeedInput(); + syndFeedInput.setPreserveWireFeed(isPreserveWireFeed()); + + return syndFeedInput.build(reader); + + } finally { + + IO.close(stream); + + } + + } + + public synchronized void setRetryHandler(final HttpMethodRetryHandler handler) { + httpClientParams.setParameter(HttpMethodParams.RETRY_HANDLER, handler); + } + + /** + * @param timeout Sets the connect timeout for the HttpClient but using the URLConnection method + * name. Uses the HttpClientParams method setConnectionManagerTimeout instead of + * setConnectTimeout + * + */ + public synchronized void setConnectTimeout(final int timeout) { + httpClientParams.setConnectionManagerTimeout(timeout); + } + + /** + * @return The currently used connect timeout for the HttpClient but using the URLConnection + * method name. Uses the HttpClientParams method getConnectionManagerTimeout instead of + * getConnectTimeout + * + */ + public int getConnectTimeout() { + return (int) getHttpClientParams().getConnectionManagerTimeout(); + } + + public synchronized void setCredentialSupplier(final CredentialSupplier credentialSupplier) { + this.credentialSupplier = credentialSupplier; + } + + public synchronized CredentialSupplier getCredentialSupplier() { + return credentialSupplier; + } + + public synchronized void setFeedInfoCache(final FeedFetcherCache feedInfoCache) { + this.feedInfoCache = feedInfoCache; + } + + public synchronized FeedFetcherCache getFeedInfoCache() { + return feedInfoCache; + } + + public synchronized void setHttpClientMethodCallback(final HttpClientMethodCallbackIntf httpClientMethodCallback) { + this.httpClientMethodCallback = httpClientMethodCallback; + } + + public HttpClientMethodCallbackIntf getHttpClientMethodCallback() { + return httpClientMethodCallback; + } + + public synchronized void setHttpClientParams(final HttpClientParams httpClientParams) { + this.httpClientParams = httpClientParams; + } + + public synchronized HttpClientParams getHttpClientParams() { + return httpClientParams; + } + + /** + * @return The currently used read timeout for the URLConnection, 0 is unlimited, i.e. no + * timeout + */ + public synchronized void setReadTimeout(final int timeout) { + httpClientParams.setSoTimeout(timeout); + } + + /** + * @return timeout the read timeout for the URLConnection to a specified timeout, in + * milliseconds. + */ + public int getReadTimeout() { + return getHttpClientParams().getSoTimeout(); + } + + /** + * Apply any request headers to the HTTP method call. + * + * @param customRequestHeaders + */ + public synchronized void setCustomRequestHeaders(final Map customRequestHeaders) { + this.customRequestHeaders = customRequestHeaders; + } + + public interface CredentialSupplier { + public Credentials getCredentials(String realm, String host); + } + + public interface HttpClientMethodCallbackIntf { + + /** + * Allows access to the underlying HttpClient HttpMethod object. Note that in most cases, + * method.setRequestHeader(String, String) is what you want to do (rather than + * method.addRequestHeader(String, String)) + * + * @param method + */ + public void afterHttpClientMethodCreate(HttpMethod method); + } + +} diff --git a/src/main/java/com/rometools/fetcher/impl/HttpURLFeedFetcher.java b/src/main/java/com/rometools/fetcher/impl/HttpURLFeedFetcher.java new file mode 100644 index 0000000..70a8863 --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/HttpURLFeedFetcher.java @@ -0,0 +1,329 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; +import java.util.zip.GZIPInputStream; + +import com.rometools.fetcher.FetcherEvent; +import com.rometools.fetcher.FetcherException; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.io.FeedException; +import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.XmlReader; +import com.rometools.utils.IO; + +/** + *

+ * Class to retrieve syndication files via HTTP. + *

+ * + *

+ * If passed a {@link com.rometools.fetcher.impl.FeedFetcherCache} in the constructor it will use + * conditional gets to only retrieve modified content. + *

+ * + *

+ * The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where supported by the + * server. + *

+ * + *

+ * Simple usage: + * + *

+ * // create the cache
+ * FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
+ * // retrieve the feed the first time
+ * // any subsequent request will use conditional gets and only
+ * // retrieve the resource if it has changed
+ * SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
+ * 
+ * + *

+ * + * @see http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers + * @see http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level + * @see http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html + * @author Nick Lothian + */ +public class HttpURLFeedFetcher extends AbstractFeedFetcher { + + private volatile int connectTimeout = -1; + + static final int POLL_EVENT = 1; + static final int RETRIEVE_EVENT = 2; + static final int UNCHANGED_EVENT = 3; + + private FeedFetcherCache feedInfoCache; + + /** + * Constructor to use HttpURLFeedFetcher without caching of feeds + * + */ + public HttpURLFeedFetcher() { + this(null); + } + + /** + * Constructor to enable HttpURLFeedFetcher to cache feeds + * + * @param feedInfoCache - an instance of the FeedFetcherCache interface + */ + public HttpURLFeedFetcher(final FeedFetcherCache feedInfoCache) { + setFeedInfoCache(feedInfoCache); + } + + @Override + public SyndFeed retrieveFeed(final URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { + return this.retrieveFeed(getUserAgent(), feedUrl); + } + + /** + * Retrieve a feed over HTTP + * + * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve + * @return A {@link com.rometools.rome.feed.synd.SyndFeed} object + * @throws IllegalArgumentException if the URL is null; + * @throws IOException if a TCP error occurs + * @throws FeedException if the feed is not valid + * @throws FetcherException if a HTTP error occurred + */ + @Override + public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { + if (feedUrl == null) { + throw new IllegalArgumentException("null is not a valid URL"); + } + + final URLConnection connection = feedUrl.openConnection(); + if (!(connection instanceof HttpURLConnection)) { + throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url"); + } + final HttpURLConnection httpConnection = (HttpURLConnection) connection; + if (connectTimeout >= 0) { + httpConnection.setConnectTimeout(connectTimeout); + } + // httpConnection.setInstanceFollowRedirects(true); // this is true by default, but can be + // changed on a claswide basis + + final FeedFetcherCache cache = getFeedInfoCache(); + if (cache != null) { + SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl); + setRequestHeaders(connection, syndFeedInfo, userAgent); + httpConnection.connect(); + try { + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection); + + if (syndFeedInfo == null) { + // this is a feed that hasn't been retrieved + syndFeedInfo = new SyndFeedInfo(); + retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection); + } else { + // check the response code + final int responseCode = httpConnection.getResponseCode(); + if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) { + // the response code is not 304 NOT MODIFIED + // This is either because the feed server + // does not support condition gets + // or because the feed hasn't changed + retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection); + } else { + // the feed does not need retrieving + fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection); + } + } + + return syndFeedInfo.getSyndFeed(); + } finally { + httpConnection.disconnect(); + } + } else { + fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection); + InputStream inputStream = null; + setRequestHeaders(connection, null, userAgent); + + httpConnection.connect(); + try { + inputStream = httpConnection.getInputStream(); + return getSyndFeedFromStream(inputStream, connection); + } catch (final java.io.IOException e) { + handleErrorCodes(((HttpURLConnection) connection).getResponseCode()); + } finally { + IO.close(inputStream); + httpConnection.disconnect(); + } + // we will never actually get to this line + return null; + } + } + + protected void retrieveAndCacheFeed(final URL feedUrl, final SyndFeedInfo syndFeedInfo, final HttpURLConnection connection) + throws IllegalArgumentException, FeedException, FetcherException, IOException { + handleErrorCodes(connection.getResponseCode()); + + resetFeedInfo(feedUrl, syndFeedInfo, connection); + final FeedFetcherCache cache = getFeedInfoCache(); + // resetting feed info in the cache + // could be needed for some implementations + // of FeedFetcherCache (eg, distributed HashTables) + if (cache != null) { + cache.setFeedInfo(feedUrl, syndFeedInfo); + } + } + + protected void resetFeedInfo(final URL orignalUrl, final SyndFeedInfo syndFeedInfo, final HttpURLConnection connection) throws IllegalArgumentException, + IOException, FeedException { + // need to always set the URL because this may have changed due to 3xx redirects + syndFeedInfo.setUrl(connection.getURL()); + + // the ID is a persistant value that should stay the same even if the URL for the + // feed changes (eg, by 3xx redirects) + syndFeedInfo.setId(orignalUrl.toString()); + + // This will be 0 if the server doesn't support or isn't setting the last modified header + syndFeedInfo.setLastModified(connection.getLastModified()); + + // This will be null if the server doesn't support or isn't setting the ETag header + syndFeedInfo.setETag(connection.getHeaderField("ETag")); + + // get the contents + InputStream inputStream = null; + try { + inputStream = connection.getInputStream(); + SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection); + + final String imHeader = connection.getHeaderField("IM"); + if (isUsingDeltaEncoding() && imHeader != null && imHeader.contains("feed")) { + final FeedFetcherCache cache = getFeedInfoCache(); + if (cache != null && connection.getResponseCode() == 226) { + // client is setup to use http delta encoding and the server supports it and has + // returned a delta encoded response + // This response only includes new items + final SyndFeedInfo cachedInfo = cache.getFeedInfo(orignalUrl); + if (cachedInfo != null) { + final SyndFeed cachedFeed = cachedInfo.getSyndFeed(); + + // set the new feed to be the orginal feed plus the new items + syndFeed = combineFeeds(cachedFeed, syndFeed); + } + } + } + + syndFeedInfo.setSyndFeed(syndFeed); + } finally { + IO.close(inputStream); + } + } + + /** + *

+ * Set appropriate HTTP headers, including conditional get and gzip encoding headers + *

+ * + * @param connection A URLConnection + * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null + * @param userAgent the name of the user-agent to be placed in HTTP-header. + */ + protected void setRequestHeaders(final URLConnection connection, final SyndFeedInfo syndFeedInfo, final String userAgent) { + if (syndFeedInfo != null) { + // set the headers to get feed only if modified + // we support the use of both last modified and eTag headers + if (syndFeedInfo.getLastModified() != null) { + final Object lastModified = syndFeedInfo.getLastModified(); + if (lastModified instanceof Long) { + connection.setIfModifiedSince((Long) syndFeedInfo.getLastModified()); + } + } + if (syndFeedInfo.getETag() != null) { + connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag()); + } + + } + // header to retrieve feed gzipped + connection.setRequestProperty("Accept-Encoding", "gzip"); + connection.addRequestProperty("User-Agent", userAgent); + + if (isUsingDeltaEncoding()) { + connection.addRequestProperty("A-IM", "feed"); + } + } + + private SyndFeed readSyndFeedFromStream(final InputStream inputStream, final URLConnection connection) throws IOException, IllegalArgumentException, + FeedException { + BufferedInputStream is; + if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) { + // handle gzip encoded content + is = new BufferedInputStream(new GZIPInputStream(inputStream)); + } else { + is = new BufferedInputStream(inputStream); + } + + // InputStreamReader reader = new InputStreamReader(is, + // ResponseHandler.getCharacterEncoding(connection)); + + // SyndFeedInput input = new SyndFeedInput(); + + final XmlReader reader; + if (connection.getHeaderField("Content-Type") != null) { + reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true); + } else { + reader = new XmlReader(is, true); + } + + final SyndFeedInput syndFeedInput = new SyndFeedInput(); + syndFeedInput.setPreserveWireFeed(isPreserveWireFeed()); + + return syndFeedInput.build(reader); + + } + + private SyndFeed getSyndFeedFromStream(final InputStream inputStream, final URLConnection connection) throws IOException, IllegalArgumentException, + FeedException { + final SyndFeed feed = readSyndFeedFromStream(inputStream, connection); + fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed); + return feed; + } + + /** + * @return The FeedFetcherCache used by this fetcher (Could be null) + */ + public synchronized FeedFetcherCache getFeedInfoCache() { + return feedInfoCache; + } + + /** + * @param cache The cache to be used by this fetcher (pass null to stop using a cache) + */ + public synchronized void setFeedInfoCache(final FeedFetcherCache cache) { + feedInfoCache = cache; + } + + /** + * @param timeout see java.net.URLConnection.setConnectTimeout(int timeout) + */ + public synchronized void setConnectTimeout(final int timeout) { + connectTimeout = timeout; + } +} diff --git a/src/main/java/com/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java b/src/main/java/com/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java new file mode 100644 index 0000000..a398bf6 --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/LinkedHashMapFeedInfoCache.java @@ -0,0 +1,81 @@ +package com.rometools.fetcher.impl; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + *

+ * An implementation of the {@link com.rometools.fetcher.impl.FeedFetcherCache} interface. + *

+ * + *

+ * Unlike the HashMapFeedInfoCache this implementation will not grow unbound + *

+ * + * @author Javier Kohen + * @author Nick Lothian + * + */ +public class LinkedHashMapFeedInfoCache extends HashMapFeedInfoCache { + + private final class CacheImpl extends LinkedHashMap { + + private static final long serialVersionUID = 1L; + + public CacheImpl() { + super(16, 0.75F, true); + } + + @Override + protected boolean removeEldestEntry(final Map.Entry eldest) { + return size() > getMaxEntries(); + } + + } + + private static final int DEFAULT_MAX_ENTRIES = 20; + private static final long serialVersionUID = 1L; + private static final LinkedHashMapFeedInfoCache _instance = new LinkedHashMapFeedInfoCache(); + + private int maxEntries = DEFAULT_MAX_ENTRIES; + + /** + * Get the global instance of the cache + * + * @return an implementation of FeedFetcherCache + */ + public static final FeedFetcherCache getInstance() { + return _instance; + } + + /** + *

+ * Constructor for HashMapFeedInfoCache + *

+ * + *

+ * Only use this if you want multiple instances of the cache. Usually {@link #getInstance()} is + * more appropriate. + *

+ * + * @see #getInstance() + */ + public LinkedHashMapFeedInfoCache() { + super(); + } + + @Override + protected Map createInfoCache() { + return Collections.synchronizedMap(new CacheImpl()); + } + + public synchronized final int getMaxEntries() { + return maxEntries; + } + + public synchronized final void setMaxEntries(final int maxEntries) { + this.maxEntries = maxEntries; + } + +} diff --git a/src/main/java/com/rometools/fetcher/impl/ResponseHandler.java b/src/main/java/com/rometools/fetcher/impl/ResponseHandler.java new file mode 100644 index 0000000..56b3ab8 --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/ResponseHandler.java @@ -0,0 +1,60 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import java.net.URLConnection; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Utility class to help deal with HTTP responses + * + */ +public class ResponseHandler { + public static final String defaultCharacterEncoding = "ISO-8859-1"; + + private final static Pattern characterEncodingPattern = Pattern.compile("charset=([.[^; ]]*)"); + + public static String getCharacterEncoding(final URLConnection connection) { + return getCharacterEncoding(connection.getContentType()); + } + + /** + * + *

+ * Gets the character encoding of a response. (Note that this is different to the + * content-encoding) + *

+ * + * @param contentTypeHeader the value of the content-type HTTP header eg: text/html; + * charset=ISO-8859-4 + * @return the character encoding, eg: ISO-8859-4 + */ + public static String getCharacterEncoding(final String contentTypeHeader) { + if (contentTypeHeader == null) { + return defaultCharacterEncoding; + } + + final Matcher m = characterEncodingPattern.matcher(contentTypeHeader); + // if (!m.matches()) { + if (!m.find()) { + return defaultCharacterEncoding; + } else { + return m.group(1); + } + } +} diff --git a/src/main/java/com/rometools/fetcher/impl/SyndFeedInfo.java b/src/main/java/com/rometools/fetcher/impl/SyndFeedInfo.java new file mode 100644 index 0000000..07c9bfe --- /dev/null +++ b/src/main/java/com/rometools/fetcher/impl/SyndFeedInfo.java @@ -0,0 +1,160 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import java.io.Serializable; +import java.net.URL; + +import com.rometools.rome.feed.impl.ObjectBean; +import com.rometools.rome.feed.synd.SyndFeed; + +/** + *

+ * A class to represent a {@link com.rometools.rome.feed.synd.SyndFeed} and some useful information + * about it. + *

+ * + *

+ * This class is thread safe, as expected by the different feed fetcher implementations. + *

+ * + * @author Nick Lothian + */ +public class SyndFeedInfo implements Cloneable, Serializable { + private static final long serialVersionUID = 1L; + + private final ObjectBean _objBean; + private String id; + private URL url; + private Object lastModified; + private String eTag; + private SyndFeed syndFeed; + + public SyndFeedInfo() { + _objBean = new ObjectBean(this.getClass(), this); + } + + /** + * Creates a deep 'bean' clone of the object. + *

+ * + * @return a clone of the object. + * @throws CloneNotSupportedException thrown if an element of the object cannot be cloned. + * + */ + @Override + public Object clone() throws CloneNotSupportedException { + return _objBean.clone(); + } + + /** + * Indicates whether some other object is "equal to" this one as defined by the Object equals() + * method. + *

+ * + * @param other he reference object with which to compare. + * @return true if 'this' object is equal to the 'other' object. + * + */ + @Override + public boolean equals(final Object other) { + return _objBean.equals(other); + } + + /** + * Returns a hashcode value for the object. + *

+ * It follows the contract defined by the Object hashCode() method. + *

+ * + * @return the hashcode of the bean object. + * + */ + @Override + public int hashCode() { + return _objBean.hashCode(); + } + + /** + * Returns the String representation for the object. + *

+ * + * @return String representation for the object. + * + */ + @Override + public String toString() { + return _objBean.toString(); + } + + /** + * @return the ETag the feed was last retrieved with + */ + public synchronized String getETag() { + return eTag; + } + + /** + * @return the last modified date for the feed + */ + public synchronized Object getLastModified() { + return lastModified; + } + + /** + * @return the URL the feed was served from + */ + public synchronized URL getUrl() { + return url; + } + + public synchronized void setETag(final String string) { + eTag = string; + } + + public synchronized void setLastModified(final Object o) { + lastModified = o; + } + + public synchronized void setUrl(final URL url) { + this.url = url; + } + + public synchronized SyndFeed getSyndFeed() { + return syndFeed; + } + + public synchronized void setSyndFeed(final SyndFeed feed) { + syndFeed = feed; + } + + /** + * @return A unique ID to identify the feed + */ + public synchronized String getId() { + return id; + } + + /** + * @param string A unique ID to identify the feed. Note that if the URL of the feed changes this + * will remain the same + */ + public synchronized void setId(final String string) { + id = string; + } + +} diff --git a/src/main/resources/.gitignore b/src/main/resources/.gitignore new file mode 100644 index 0000000..53b845b --- /dev/null +++ b/src/main/resources/.gitignore @@ -0,0 +1 @@ +# needed to commit empty folder \ No newline at end of file diff --git a/src/site/apt/BuildingTheRomeFetcher.apt b/src/site/apt/BuildingTheRomeFetcher.apt new file mode 100644 index 0000000..83ecc7d --- /dev/null +++ b/src/site/apt/BuildingTheRomeFetcher.apt @@ -0,0 +1,11 @@ + ----- + Building the Rome Fetcher + ----- + mkurz + ----- + 2011-08-15 17:34:51.402 + ----- + +Building the Rome Fetcher + + The Rome Fetcher can build using Maven 2. diff --git a/src/site/apt/ChangeLog.apt b/src/site/apt/ChangeLog.apt new file mode 100644 index 0000000..135af0c --- /dev/null +++ b/src/site/apt/ChangeLog.apt @@ -0,0 +1,113 @@ + ----- + Change Log + ----- + mkurz + ----- + 2011-08-15 17:27:20.212 + ----- + +Change Log + +*Prior to first release (on the way to v0.3) + + [[1]] Updated to handle removal of IO methods using byte streams\ + Byte Stream IO was removed from Rome itself. The Rome Fetcher is now updated to support this + + [[1]] Add FeedFetcherI interface and FeedFetcherFactory class\ + There is now a FeedFetcherI interface, which FeedFetcher implements. Use FeedFetcherFactory to create instances of + FeedFetcher (as suggested by Joseph Ottinger) (FeedFetcherFactory was later removed) + + [[1]] Event Support Added to FeedFetcherI\ + The FeedFetcherI interface now supports feed polled, feed retrieved and feed unchanged events + + [[1]] Samples added\ + Samples are now included with the Rome Fetcher + + [[1]] Unit Tests Added\ + JUnit based tests which invoke the Rome Fetcher against an embedded Jetty webserver are now included + + [[1]] Bug fixes in the FeedFetcher event model\ + The JUnit test suite uncovered some bugs in the event model used by the FeedFetcher. These bugs are now fixed. + + [[1]] Refactored the SyndFeedInfo class\ + SyndFeedInfo now extends ObjectBean + + [[1]] Removed FeedFetcherFactory\ + The benefit of the FeedFetcherFactory was arguable. Now the client code will need to manage the creation of specific implementations of the FeedFetcher + + [] + +*Prior to second release (on the way to v0.4) + + [[1]] Refectored to match Rome naming standards\ + FeedFetcherI renamed to FeedFetcher\ + #. New FeedFetcher Implementation\ + HttpClientFeedFetcher uses the Apache Commons HTTP Client + + [[1]] Abstract test classes excluded in project.xml\ + Tests now run correctly under Maven + + [[1]] Added GZip support to HttpClientFeedFetcher\ + HttpClientFeedFetcher now supports GZip compression. Tests have been added. + + [] + +*Prior to third release (on the way to v0.5) + + [[1]] SyndFeedInfo implements Serializable\ + SyndFeedInfo implements Serializable to make it easier to store + + [[1]] Support for rfc3229 delta encoding\ + The Fetcher now supports rfc3229 delta encoding. See {{{http://www.ietf.org/rfc/rfc3229.txt}http://www.ietf.org/rfc/rfc3229.txt}} and + {{{http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html}http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html}}. + Note that this is support is experimental and disabled by default + + [] + +*Prior to 0.6 + + [[1]] Feed passed to FetcherEvents\ + When a feed is retrieved it is now passed to the Fetcher Event. This makes it easier to code applications using an event oriented style. + + [] + +*Prior to 0.7 + + [[1]] Fix for URL Connection leak\ + In some circumstances URLConnection objects were not closed. This could cause problems in long\-running application. + + [] + +*0.8 was never released + +*Prior to 0.9 + + [[1]] Fix for potential synchronization issue\ + There was the possibility of synchronization issues in the FeedFetcher. Fixed, thanks to suggestions from Javier Kohen. + + [[1]] New LinkedHashMapFeedInfoCache FeedFetcherCache implementation\ + The new LinkedHashMapFeedInfoCache has the advantage that it will not grow unbound + + [] + +*Prior to 1.0RC2 + + [[1]] BeanInfo class added for AbstractFeedFetcher\ + com.rometools.rome.fetcher.impl.AbstractFeedFetcherBeanInfo was created to allow introspection to correctly find the events + + [[1]] Callback to allow access to HttpClient HttpMethod object\ + Add a HttpClientMethodCallbackIntf to allow the calling code to modify the HttpClient HttpMethod used to make the request + (eg, add additinal headers, etc.) Also fixes a reported bug where the user agent wasn't being set properly + + [[1]] Support for clearing cache\ + See {{{http://java.net/jira/browse/ROME\-119}http://java.net/jira/browse/ROME\-119}} for details + + [] + +*Prior to 1.0 + + [[1]] Support for preserving wire feed data.\ + The fetcher now has a setPreserveWireFeed() method which will setup ROME to preserve WireFeed data. See + {{{http://rometools.github.io/rome/PreservingWireFeeds.html}PreservingWireFeeds}} for further information. + + [] diff --git a/src/site/apt/Dependencies/HttpClientFeedFetcher.apt b/src/site/apt/Dependencies/HttpClientFeedFetcher.apt new file mode 100644 index 0000000..e67bfdc --- /dev/null +++ b/src/site/apt/Dependencies/HttpClientFeedFetcher.apt @@ -0,0 +1,13 @@ + ----- + HttpClientFeedFetcher + ----- + mkurz + ----- + 2011-08-15 17:40:11.468 + ----- + +HttpClientFeedFetcher + + + An implementation of the <<>> which uses the Jakarta HTTP Client. This HTTP client has many benefits over the standard Java implementation. + diff --git a/src/site/apt/Dependencies/index.apt b/src/site/apt/Dependencies/index.apt new file mode 100644 index 0000000..e890d1c --- /dev/null +++ b/src/site/apt/Dependencies/index.apt @@ -0,0 +1,36 @@ + ----- + Dependencies + ----- + mkurz + ----- + 2011-08-15 17:42:31.999 + ----- + +Dependencies + + The Rome Fetcher aims to introduce the absolute minimum number of extra dependencies. Currently (30\-June\-2004) no extra dependencies over those required + by Rome are required to use the fetcher. + + The current dependencies required to use the Rome Fetcher are: + + * JDK 1.4\+ + + * Current version of Rome + + * JDom v 1.0 + + [] + + To build the Rome Fetcher the {{{http://jakarta.apache.org/commons/httpclient/}Jakarta HTTP Client}} is required. + + If the {{{./HttpClientFeedFetcher.html}HttpClientFeedFetcher (fetcher)}} fetcher implementation is used then the Jakarta HTTP Client and Jakarta Commons Logging is required. + + To build and run the unit tests for the Rome Fetcher the following additional dependencies are required: + + * servletapi version 2.3 + + * jetty 4.2.12 + + [] + + Note that Maven will automatically download the correct versions of all dependancies. diff --git a/src/site/apt/KnownIssues.apt b/src/site/apt/KnownIssues.apt new file mode 100644 index 0000000..38263e7 --- /dev/null +++ b/src/site/apt/KnownIssues.apt @@ -0,0 +1,43 @@ + ----- + Known Issues + ----- + mkurz + ----- + 2011-08-15 17:30:28.420 + ----- + +Known Issues + +*Version 0.3 + + * The Maven build does not run the <<>> tests because of a bug in Maven + + * Version 0.3 does not have <<>> included in the project.xml (it is required to run the samples). Either get the latest <<>> from CVS, or + {{{https://rome.dev.java.net/source/browse/rome/subprojects/fetcher/project.xml?r1\=1.1&r2\=1.2}patch it yourself}} + + * 0.3 had a bug that caused it to overwite system properties. + + [] + +*Version 0.4 + + * No known issues (yet!) + + [] + +*Version 0.5 + + * When listening to feed events using <<>>, there is no way to get to the retrieved content, because it is set after firing the event. + \-\- {{{http://wiki.java.net/twiki/bin/view/Javawsxml/Jawe}jawe}} + + * When listening to feed events using <<>>, the feed URLs returned by the <<>> are prepended with + "sun.net.www.protocol.http.HttpURLConnection:" \-\- {{{http://wiki.java.net/twiki/bin/view/Javawsxml/Jawe}jawe}} + + [] + +*Version 0.7 + + * <<>> doesn't work quite right because <<>> does hostname resolution and treats virtual hosts with the same IP as equal, + so e.g. all RSS feeds from <<>> collide in the cache. Also, it's really slow. Fix is to use <<>> as the hash key instead of the URL itself. + + [] diff --git a/src/site/apt/Releases/ROMEFetcher0.6.apt b/src/site/apt/Releases/ROMEFetcher0.6.apt new file mode 100644 index 0000000..7a6e396 --- /dev/null +++ b/src/site/apt/Releases/ROMEFetcher0.6.apt @@ -0,0 +1,49 @@ + ----- + ROME Fetcher 0.6 + ----- + mkurz + ----- + 2011-08-15 17:50:02.821 + ----- + +ROME Fetcher 0.6 + +*Downloads + + * {{{./rome\-fetcher\-0.6\-src.zip}rome\-fetcher\-0.6\-src.zip}} + + * {{{./rome\-fetcher\-0.6.tar.gz}rome\-fetcher\-0.6.tar.gz}} + + * {{{./rome\-fetcher\-0.6.zip}rome\-fetcher\-0.6.zip}} + + * {{{./rome\-fetcher\-0.6\-src.tar.gz}rome\-fetcher\-0.6\-src.tar.gz}} + + [] + +*Tutorials + + * {{{../BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{../UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{../SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +*Todo list + + * {{{../TodoList.html}Todo list (fetcher)}} + + [] + +*Issues + + * {{{../KnownIssues.html}Known Issues (fetcher)}} + + [] + +*Change Log + + * {{{../ChangeLog.html}Change Log (fetcher)}} + + [] diff --git a/src/site/apt/Releases/ROMEFetcher0.7.apt b/src/site/apt/Releases/ROMEFetcher0.7.apt new file mode 100644 index 0000000..d574307 --- /dev/null +++ b/src/site/apt/Releases/ROMEFetcher0.7.apt @@ -0,0 +1,49 @@ + ----- + ROME Fetcher 0.7 + ----- + mkurz + ----- + 2011-08-15 17:50:31.833 + ----- + +ROME Fetcher 0.7 + +*Downloads + + * {{{./rome\-fetcher\-0.7\-src.zip}rome\-fetcher\-0.7\-src.zip}} + + * {{{./rome\-fetcher\-0.7.tar.gz}rome\-fetcher\-0.7.tar.gz}} + + * {{{./rome\-fetcher\-0.7.zip}rome\-fetcher\-0.7.zip}} + + * {{{./rome\-fetcher\-0.7\-src.tar.gz}rome\-fetcher\-0.7\-src.tar.gz}} + + [] + +*Tutorials + + * {{{../BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{../UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{../SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +*Todo list + + * {{{../TodoList.html}Todo list (fetcher)}} + + [] + +*Issues + + * {{{../KnownIssues.html}Known Issues (fetcher)}} + + [] + +*Change Log + + * {{{../ChangeLog.html}Change Log (fetcher)}} + + [] diff --git a/src/site/apt/Releases/ROMEFetcher0.9.apt b/src/site/apt/Releases/ROMEFetcher0.9.apt new file mode 100644 index 0000000..5bb4964 --- /dev/null +++ b/src/site/apt/Releases/ROMEFetcher0.9.apt @@ -0,0 +1,51 @@ + ----- + ROME Fetcher 0.9 + ----- + mkurz + ----- + 2011-08-15 17:51:04.383 + ----- + +ROME Fetcher 0.9 + + Note that there was no 0.8 Fetcher release + +*Downloads + + * {{{./rome\-fetcher\-0.9\-src.zip}rome\-fetcher\-0.9\-src.zip}} + + * {{{./rome\-fetcher\-0.9.tar.gz}rome\-fetcher\-0.9.tar.gz}} + + * {{{./rome\-fetcher\-0.9.zip}rome\-fetcher\-0.9.zip}} + + * {{{./rome\-fetcher\-0.9\-src.tar.gz}rome\-fetcher\-0.9\-src.tar.gz}} + + [] + +*Tutorials + + * {{{../BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{../UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{../SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +*Todo list + + * {{{../TodoList.html}Todo list (fetcher)}} + + [] + +*Issues + + * {{{../KnownIssues.html}Known Issues (fetcher)}} + + [] + +*Change Log + + * {{{../ChangeLog.html}Change Log (fetcher)}} + + [] diff --git a/src/site/apt/Releases/ROMEFetcher1.0.apt b/src/site/apt/Releases/ROMEFetcher1.0.apt new file mode 100644 index 0000000..992a9b9 --- /dev/null +++ b/src/site/apt/Releases/ROMEFetcher1.0.apt @@ -0,0 +1,53 @@ + ----- + ROME Fetcher 1.0 + ----- + mkurz + ----- + 2011-08-15 17:52:18.652 + ----- + +ROME Fetcher 1.0 + +*Downloads + + * {{{./rome\-fetcher\-1.0.jar}rome\-fetcher\-1.0.jar}} + + * {{{./rome\-fetcher\-1.0\-javadoc.jar}rome\-fetcher\-1.0\-javadoc.jar}} + + * {{{./rome\-fetcher\-1.0\-sources.jar}rome\-fetcher\-1.0\-sources.jar}} + + [] + +*Tutorials + + * {{{../BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{../UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{../SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +*API Docs + + * {{{./rome-fetcher-1.0-javadoc.jar}Fetcher API Docs}} + + [] + +*Todo list + + * {{{../TodoList.html}Todo list (fetcher)}} + + [] + +*Issues + + * {{{../KnownIssues.html}Known Issues (fetcher)}} + + [] + +*Change Log + + * {{{../ChangeLog.html}Change Log (fetcher)}} + + [] diff --git a/src/site/apt/Releases/ROMEFetcher1.0RC2.apt b/src/site/apt/Releases/ROMEFetcher1.0RC2.apt new file mode 100644 index 0000000..0519218 --- /dev/null +++ b/src/site/apt/Releases/ROMEFetcher1.0RC2.apt @@ -0,0 +1,57 @@ + ----- + ROME Fetcher 1.0 RC2 + ----- + mkurz + ----- + 2011-08-15 17:51:50.236 + ----- + +ROME Fetcher 1.0 RC2 + + Note that there was no 1.0 RC1 Fetcher release + +*Downloads + + * {{{./rome\-fetcher\-1.0RC2\-src.zip}rome\-fetcher\-1.0RC2\-src.zip}} + + * {{{./rome\-fetcher\-1.0RC2.jar}rome\-fetcher\-1.0RC2.jar}} + + * {{{./rome\-fetcher\-1.0RC2\-javadoc.jar}rome\-fetcher\-1.0RC2\-javadoc.jar}} + + * {{{./rome\-fetcher\-1.0RC2\-sources.jar}rome\-fetcher\-1.0RC2\-sources.jar}} + + [] + +*Tutorials + + * {{{../BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{../UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{../SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +*API Docs + + * {{{./rome-fetcher-1.0RC2-javadoc.jar}Fetcher API Docs}} + + [] + +*Todo list + + * {{{../TodoList.html}Todo list (fetcher)}} + + [] + +*Issues + + * {{{../KnownIssues.html}Known Issues (fetcher)}} + + [] + +*Change Log + + * {{{../ChangeLog.html}Change Log (fetcher)}} + + [] diff --git a/src/site/apt/Releases/RomeFetcher0.3.apt b/src/site/apt/Releases/RomeFetcher0.3.apt new file mode 100644 index 0000000..21d024e --- /dev/null +++ b/src/site/apt/Releases/RomeFetcher0.3.apt @@ -0,0 +1,51 @@ + ----- + Rome Fetcher 0.3 + ----- + mkurz + ----- + 2011-08-15 17:46:45.860 + ----- + +Rome Fetcher 0.3 + + Rome Fetcher version 0.3 is inital release of the Rome Fetcher. It is released as version 0.3 to synchronize with the version number of the core Rome project release. + +*Downloads + + * {{{./rome\-fetcher\-0.3\-src.zip}rome\-fetcher\-0.3\-src.zip}} + + * {{{./rome\-fetcher\-0.3.tar.gz}rome\-fetcher\-0.3.tar.gz}} + + * {{{./rome\-fetcher\-0.3.zip}rome\-fetcher\-0.3.zip}} + + * {{{./rome\-fetcher\-0.3\-src.tar.gz}rome\-fetcher\-0.3\-src.tar.gz}} + + [] + +*Tutorials + + * {{{../BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{../UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{../SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +*Todo list + + * {{{../TodoList.html}Todo list (fetcher)}} + + [] + +*Issues + + * {{{../KnownIssues.html}Known Issues (fetcher)}} + + [] + +*Change Log + + * {{{../ChangeLog.html}Change Log (fetcher)}} + + [] diff --git a/src/site/apt/Releases/RomeFetcher0.4.apt b/src/site/apt/Releases/RomeFetcher0.4.apt new file mode 100644 index 0000000..a9abd8a --- /dev/null +++ b/src/site/apt/Releases/RomeFetcher0.4.apt @@ -0,0 +1,49 @@ + ----- + Rome Fetcher 0.4 + ----- + mkurz + ----- + 2011-08-15 17:47:58.203 + ----- + +Rome Fetcher 0.4 + +*Downloads + + * {{{./rome\-fetcher\-0.4\-src.zip}rome\-fetcher\-0.4\-src.zip}} + + * {{{./rome\-fetcher\-0.4.tar.gz}rome\-fetcher\-0.4.tar.gz}} + + * {{{./rome\-fetcher\-0.4.zip}rome\-fetcher\-0.4.zip}} + + * {{{./rome\-fetcher\-0.4\-src.tar.gz}rome\-fetcher\-0.4\-src.tar.gz}} + + [] + +*Tutorials + + * {{{../BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{../UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{../SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +*Todo list + + * {{{../TodoList.html}Todo list (fetcher)}} + + [] + +*Issues + + * {{{../KnownIssues.html}Known Issues (fetcher)}} + + [] + +*Change Log + + * {{{../ChangeLog.html}Change Log (fetcher)}} + + [] diff --git a/src/site/apt/Releases/RomeFetcher0.5.apt b/src/site/apt/Releases/RomeFetcher0.5.apt new file mode 100644 index 0000000..9959793 --- /dev/null +++ b/src/site/apt/Releases/RomeFetcher0.5.apt @@ -0,0 +1,51 @@ + ----- + Rome Fetcher 0.5 + ----- + mkurz + ----- + 2011-08-15 17:49:03.166 + ----- + +Rome Fetcher 0.5 + + The ROME Fetcher v 0.6 is now released. This page exists for historical purposes only. + +*Downloads + + * {{{./rome\-fetcher\-0.5\-src.zip}rome\-fetcher\-0.5\-src.zip}} + + * {{{./rome\-fetcher\-0.5.zip}rome\-fetcher\-0.5.zip}} + + * {{{./rome\-fetcher\-0.5.tar.gz}rome\-fetcher\-0.5.tar.gz}} + + * {{{./rome\-fetcher\-0.5\-src.tar.gz}rome\-fetcher\-0.5\-src.tar.gz}} + + [] + +*Tutorials + + * {{{../BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{../UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{../SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +*Todo list + + * {{{../TodoList.html}Todo list (fetcher)}} + + [] + +*Issues + + * {{{../KnownIssues.html}Known Issues (fetcher)}} + + [] + +*Change Log + + * {{{../ChangeLog.html}Change Log (fetcher)}} + + [] diff --git a/src/site/apt/Releases/index.apt b/src/site/apt/Releases/index.apt new file mode 100644 index 0000000..f5715b8 --- /dev/null +++ b/src/site/apt/Releases/index.apt @@ -0,0 +1,10 @@ + ----- + Releases + ----- + mkurz + ----- + 2011-08-15 17:57:59.422 + ----- + +Releases + diff --git a/src/site/apt/SampleProgramsIncluded.apt b/src/site/apt/SampleProgramsIncluded.apt new file mode 100644 index 0000000..7c12649 --- /dev/null +++ b/src/site/apt/SampleProgramsIncluded.apt @@ -0,0 +1,18 @@ + ----- + Sample programs included + ----- + mkurz + ----- + 2011-08-15 17:38:21.664 + ----- + +Sample programs included + + There are two sample programs included with Rome Fetcher. + + {{{https://github.com/rometools/rome-fetcher/tree/master/src/main/java/org/rometools/fetcher/samples/FeedReader.java}FeedReader}} + is a program which demonstrates the use of the Fetcher to retrieve a feed and then to use the conditional get support to retrieve + it again only if it has changed. It also shows how to use the event API in the Fetcher. It can be run using the <<>> target. + + {{{https://github.com/rometools/rome-fetcher/tree/master/src/main/java/org/rometools/fetcher/samples/FeedAggregator.java}FeedAggregator}} + is a program which aggregates a number of feeds together into a single feed. It can be run using the <<>> target. diff --git a/src/site/apt/TodoList.apt b/src/site/apt/TodoList.apt new file mode 100644 index 0000000..d6c2582 --- /dev/null +++ b/src/site/apt/TodoList.apt @@ -0,0 +1,29 @@ + ----- + Todo list + ----- + mkurz + ----- + 2011-08-15 17:33:17.559 + ----- + +Todo list + + Please discuss items here on the rome dev mailing list + + * Automatically update the + \-{{{https://github.com/rometools/rome-fetcher/blob/master/src/java/org/rometools/fetcher/FeedFetcher.java}default User\-Agent version number}} + via the build\- Done: 23\-June\-2004 + + * Listener Architecture (for URL changes via 3xx redirection etc) Done: 30\-June\-2004 + + * {{{http://radio.userland.com/userGuide/reference/howToRedirectRss}RSS Redirection}} + + * Unit Tests: smart unit tests along the lines of Mark Pilgrim's tests, in his + \-{{{http://diveintomark.org/projects/feed_parser/}Python Universal Feed Parser}}. He instantiates a web server to a local directory where the samples live, + and then fetches the feeds from the server, which allows him to test in depth the behavior of gzip compression and etags handling.\- Done 30\-June\-2004 + + * Better character encoding handling \- See {{{http://diveintomark.org/archives/2004/02/13/xml\-media\-types}http://diveintomark.org/archives/2004/02/13/xml\-media\-types}} + + * A caching feed fetcher + + [] diff --git a/src/site/apt/UsingTheRomeFetcherModuleToRetrieveFeeds.apt b/src/site/apt/UsingTheRomeFetcherModuleToRetrieveFeeds.apt new file mode 100644 index 0000000..cc2bcfa --- /dev/null +++ b/src/site/apt/UsingTheRomeFetcherModuleToRetrieveFeeds.apt @@ -0,0 +1,36 @@ + ----- + Using the Rome Fetcher module to retrieve feeds + ----- + mkurz + ----- + 2011-08-15 17:36:41.844 + ----- + +Using the Rome Fetcher module to retrieve feeds + + The HttpURLFeedFetcher class does the actual HTTP request. It relies on the FeedInfoCacheI interface which stores information + about each feed required for conditional gets. Currently there is a single implementation of FeedInfoCacheI supplied: <<>>. + + The basic usage of FeedFetcher is as follows: + ++------+ +FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); +FeedFetcher feedFetcher = new HttpURLFeedFetcher(feedInfoCache); +SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://blogs.sun.com/roller/rss/pat")); +System.out.println(feed); ++------+ + + Any subsequent fetches of {{{http://blogs.sun.com/roller/rss/pat}http://blogs.sun.com/roller/rss/pat}} by any FeedFetcher using feedInfoCache + will now only retrieve the feed if it has changed. + + FeedFetcher can be used without a cache if required. Simply create it using the zero\-parameter constructor: + ++------+ +FeedFetcher feedFetcher = new HttpURLFeedFetcher(); ++------+ + + A more complete sample (including the use of listener on Fetcher events) + {{{https://github.com/rometools/rome-fetcher/blob/master/src/java/org/rometools/fetcher/samples/FeedReader.java}is included in the Rome Fetcher project}} + + Note that there has been considerable discussion on the rome\-dev list about the best way to manage the creation of the feed fetcher. + Currently the client code needs to be responsible for creating specific implementations of the FeedFetcherI interface. diff --git a/src/site/apt/WishList.apt b/src/site/apt/WishList.apt new file mode 100644 index 0000000..f4cd90f --- /dev/null +++ b/src/site/apt/WishList.apt @@ -0,0 +1,17 @@ + ----- + Wish list + ----- + mkurz + ----- + 2011-08-15 17:33:55.774 + ----- + +Wish list + + * {{{http://diveintomark.org/archives/2002/05/31}RSS Autodiscovery}} + + * {{{http://www.intertwingly.net/wiki/pie/PaceAutoDisco}Atom Autodiscovery}} + + * Connection timeout and read settings. + + [] diff --git a/src/site/apt/index.apt b/src/site/apt/index.apt new file mode 100644 index 0000000..d3ae974 --- /dev/null +++ b/src/site/apt/index.apt @@ -0,0 +1,72 @@ + ----- + Home + ----- + mkurz + ----- + 2011-08-15 17:44:31.665 + ----- + +Rome Fetcher + + The Rome Fetcher (see modules/fetcher) allows the retrieval of feeds via HTTP. It supports + {{{http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers}HTTP conditional gets}} + (ie: last modified and ETag handling) and GZip encoded feeds. It should enable user to write aggregators that follow the + {{{http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level}Atom aggregator behaviour recommendations}} + + As with the rest of Rome, the Fetcher subproject is ultra\-lean \- it requires {{{./Dependencies/index.html}no new dependencies}} + over the requirements for Rome. + +Tutorials + + * {{{./BuildingTheRomeFetcher.html}Building the Rome Fetcher (fetcher)}} + + * {{{./UsingTheRomeFetcherModuleToRetrieveFeeds.html}Using the Rome Fetcher module to retrieve feeds (fetcher)}} + + * {{{./SampleProgramsIncluded.html}Sample programs included (fetcher)}} + + [] + +Todo list + + * {{{./TodoList.html}Todo list (fetcher)}} + + [] + +Wish list + + * {{{./WishList.html}Wish list (fetcher)}} + + [] + +Issues + + * {{{./KnownIssues.html}Known Issues (fetcher)}} + + [] + +Releases + + * {{{./Releases/RomeFetcher0.3.html}Rome Fetcher 0.3 (fetcher)}} + + * {{{./Releases/RomeFetcher0.4.html}Rome Fetcher 0.4 (fetcher)}} + + * {{{./Releases/RomeFetcher0.5.html}Rome Fetcher 0.5 (fetcher)}} + + * {{{./Releases/ROMEFetcher0.6.html}ROME Fetcher 0.6 (fetcher)}} + + * {{{./Releases/ROMEFetcher0.7.html}ROME Fetcher 0.7 (fetcher)}} + + * {{{./Releases/ROMEFetcher0.9.html}ROME Fetcher 0.9 (fetcher)}} + + * {{{./Releases/ROMEFetcher1.0RC2.html}ROME Fetcher 1.0 RC2 (fetcher)}} + + * {{{./Releases/ROMEFetcher1.0.html}ROME Fetcher 1.0 (fetcher)}} + + [] + +Change Log + + * {{{./ChangeLog.html}Change Log (fetcher)}} + + [] + \ No newline at end of file diff --git a/src/site/resources/.nojekyll b/src/site/resources/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/src/site/resources/Releases/rome-fetcher-0.3-src.tar.gz b/src/site/resources/Releases/rome-fetcher-0.3-src.tar.gz new file mode 100644 index 0000000..0043cc9 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.3-src.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.3-src.zip b/src/site/resources/Releases/rome-fetcher-0.3-src.zip new file mode 100644 index 0000000..b806b31 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.3-src.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.3.tar.gz b/src/site/resources/Releases/rome-fetcher-0.3.tar.gz new file mode 100644 index 0000000..958b25e Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.3.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.3.zip b/src/site/resources/Releases/rome-fetcher-0.3.zip new file mode 100644 index 0000000..ba85cd1 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.3.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.4-src.tar.gz b/src/site/resources/Releases/rome-fetcher-0.4-src.tar.gz new file mode 100644 index 0000000..0af41b0 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.4-src.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.4-src.zip b/src/site/resources/Releases/rome-fetcher-0.4-src.zip new file mode 100644 index 0000000..697c2a6 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.4-src.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.4.tar.gz b/src/site/resources/Releases/rome-fetcher-0.4.tar.gz new file mode 100644 index 0000000..93adbc0 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.4.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.4.zip b/src/site/resources/Releases/rome-fetcher-0.4.zip new file mode 100644 index 0000000..44f1d64 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.4.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.5-src.tar.gz b/src/site/resources/Releases/rome-fetcher-0.5-src.tar.gz new file mode 100644 index 0000000..ee71caa Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.5-src.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.5-src.zip b/src/site/resources/Releases/rome-fetcher-0.5-src.zip new file mode 100644 index 0000000..eb6ce42 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.5-src.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.5.tar.gz b/src/site/resources/Releases/rome-fetcher-0.5.tar.gz new file mode 100644 index 0000000..cc58dd8 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.5.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.5.zip b/src/site/resources/Releases/rome-fetcher-0.5.zip new file mode 100644 index 0000000..27c9b55 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.5.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.6-src.tar.gz b/src/site/resources/Releases/rome-fetcher-0.6-src.tar.gz new file mode 100644 index 0000000..0d69b1b Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.6-src.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.6-src.zip b/src/site/resources/Releases/rome-fetcher-0.6-src.zip new file mode 100644 index 0000000..67ab51a Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.6-src.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.6.tar.gz b/src/site/resources/Releases/rome-fetcher-0.6.tar.gz new file mode 100644 index 0000000..c5a996a Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.6.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.6.zip b/src/site/resources/Releases/rome-fetcher-0.6.zip new file mode 100644 index 0000000..8890994 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.6.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.7-src.tar.gz b/src/site/resources/Releases/rome-fetcher-0.7-src.tar.gz new file mode 100644 index 0000000..30bc5a8 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.7-src.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.7-src.zip b/src/site/resources/Releases/rome-fetcher-0.7-src.zip new file mode 100644 index 0000000..ee788d4 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.7-src.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.7.tar.gz b/src/site/resources/Releases/rome-fetcher-0.7.tar.gz new file mode 100644 index 0000000..804aa97 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.7.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.7.zip b/src/site/resources/Releases/rome-fetcher-0.7.zip new file mode 100644 index 0000000..45934eb Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.7.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.9-src.tar.gz b/src/site/resources/Releases/rome-fetcher-0.9-src.tar.gz new file mode 100644 index 0000000..2b1bdf9 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.9-src.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.9-src.zip b/src/site/resources/Releases/rome-fetcher-0.9-src.zip new file mode 100644 index 0000000..1899ac7 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.9-src.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-0.9.tar.gz b/src/site/resources/Releases/rome-fetcher-0.9.tar.gz new file mode 100644 index 0000000..1560db9 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.9.tar.gz differ diff --git a/src/site/resources/Releases/rome-fetcher-0.9.zip b/src/site/resources/Releases/rome-fetcher-0.9.zip new file mode 100644 index 0000000..43c48a9 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-0.9.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-1.0-javadoc.jar b/src/site/resources/Releases/rome-fetcher-1.0-javadoc.jar new file mode 100644 index 0000000..7ea9678 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-1.0-javadoc.jar differ diff --git a/src/site/resources/Releases/rome-fetcher-1.0-sources.jar b/src/site/resources/Releases/rome-fetcher-1.0-sources.jar new file mode 100644 index 0000000..bf1be0f Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-1.0-sources.jar differ diff --git a/src/site/resources/Releases/rome-fetcher-1.0.jar b/src/site/resources/Releases/rome-fetcher-1.0.jar new file mode 100644 index 0000000..b889d47 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-1.0.jar differ diff --git a/src/site/resources/Releases/rome-fetcher-1.0RC2-javadoc.jar b/src/site/resources/Releases/rome-fetcher-1.0RC2-javadoc.jar new file mode 100644 index 0000000..4b08a4a Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-1.0RC2-javadoc.jar differ diff --git a/src/site/resources/Releases/rome-fetcher-1.0RC2-sources.jar b/src/site/resources/Releases/rome-fetcher-1.0RC2-sources.jar new file mode 100644 index 0000000..a471611 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-1.0RC2-sources.jar differ diff --git a/src/site/resources/Releases/rome-fetcher-1.0RC2-src.zip b/src/site/resources/Releases/rome-fetcher-1.0RC2-src.zip new file mode 100644 index 0000000..d2ac106 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-1.0RC2-src.zip differ diff --git a/src/site/resources/Releases/rome-fetcher-1.0RC2.jar b/src/site/resources/Releases/rome-fetcher-1.0RC2.jar new file mode 100644 index 0000000..c8a3f85 Binary files /dev/null and b/src/site/resources/Releases/rome-fetcher-1.0RC2.jar differ diff --git a/src/site/resources/css/site.css b/src/site/resources/css/site.css new file mode 100644 index 0000000..43c3cd8 --- /dev/null +++ b/src/site/resources/css/site.css @@ -0,0 +1,8 @@ +h1 { + padding: 4px 4px 4px 6px; + border: 1px solid #999; + color: #900; + background-color: #ddd; + font-weight:900; + font-size: x-large; +} \ No newline at end of file diff --git a/src/site/resources/images/romelogo.png b/src/site/resources/images/romelogo.png new file mode 100644 index 0000000..2c90608 Binary files /dev/null and b/src/site/resources/images/romelogo.png differ diff --git a/src/site/site.xml b/src/site/site.xml new file mode 100644 index 0000000..4e8df2e --- /dev/null +++ b/src/site/site.xml @@ -0,0 +1,41 @@ + + + + + org.apache.maven.skins + maven-fluido-skin + 1.3.0 + + + + ROME fetcher + images/romelogo.png + http://github.com/rometools/ + + + + + + + +

+ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/test/java/com/rometools/fetcher/AbstractJettyTest.java b/src/test/java/com/rometools/fetcher/AbstractJettyTest.java new file mode 100644 index 0000000..141c2b2 --- /dev/null +++ b/src/test/java/com/rometools/fetcher/AbstractJettyTest.java @@ -0,0 +1,424 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher; + +import java.net.URL; + +import junit.framework.TestCase; + +import org.mortbay.http.BasicAuthenticator; +import org.mortbay.http.HashUserRealm; +import org.mortbay.http.HttpContext; +import org.mortbay.http.HttpServer; +import org.mortbay.http.SecurityConstraint; +import org.mortbay.http.SocketListener; +import org.mortbay.http.UserRealm; +import org.mortbay.http.handler.SecurityHandler; +import org.mortbay.jetty.servlet.ServletHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.rometools.fetcher.impl.FeedFetcherCache; +import com.rometools.fetcher.impl.HashMapFeedInfoCache; +import com.rometools.rome.feed.atom.Entry; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndFeed; + +/** + * @author nl + */ +public abstract class AbstractJettyTest extends TestCase { + + private static final Logger LOG = LoggerFactory.getLogger(AbstractJettyTest.class); + + private HttpServer server; + private final int testPort = 8283; + + /** + * @param s + */ + public AbstractJettyTest(final String s) { + super(s); + } + + protected HttpServer getServer() { + return server; + } + + protected abstract FeedFetcher getFeedFetcher(); + + protected abstract FeedFetcher getFeedFetcher(FeedFetcherCache cache); + + /** + * @see junit.framework.TestCase#setUp() + */ + @Override + protected void setUp() throws Exception { + setupServer(); + + final HttpContext context = createContext(); + + final ServletHandler servlets = createServletHandler(); + context.addHandler(servlets); + + server.addContext(context); + + server.start(); + } + + /** + * @throws InterruptedException + */ + private void setupServer() throws InterruptedException { + + // Create the server + if (server != null) { + server.stop(); + server = null; + } + server = new HttpServer(); + + // Create a port listener + final SocketListener listener = new SocketListener(); + listener.setPort(testPort); + server.addListener(listener); + } + + /** + * @return + */ + private ServletHandler createServletHandler() { + final ServletHandler servlets = new ServletHandler(); + servlets.addServlet("FetcherTestServlet", FetcherTestServlet.SERVLET_MAPPING, "com.rometools.fetcher.FetcherTestServlet"); + servlets.addServlet("FetcherTestServlet", FetcherTestServlet.SERVLET_MAPPING2, "com.rometools.fetcher.FetcherTestServlet"); + return servlets; + } + + /** + * @return + */ + private HttpContext createContext() { + final HttpContext context = new HttpContext(); + context.setContextPath("/rome/*"); + return context; + } + + /** + * @see junit.framework.TestCase#tearDown() + */ + @Override + protected void tearDown() throws Exception { + if (server != null) { + server.stop(); + server.destroy(); + server = null; + } + } + + class FetcherEventListenerImpl implements FetcherListener { + boolean polled = false; + boolean retrieved = false; + boolean unchanged = false; + + public void reset() { + polled = false; + retrieved = false; + unchanged = false; + } + + /** + * @see com.rometools.rome.fetcher.FetcherListener#fetcherEvent(com.rometools.rome.fetcher.FetcherEvent) + */ + @Override + public void fetcherEvent(final FetcherEvent event) { + final String eventType = event.getEventType(); + if (FetcherEvent.EVENT_TYPE_FEED_POLLED.equals(eventType)) { + LOG.debug("\tEVENT: Feed Polled. URL = " + event.getUrlString()); + polled = true; + } else if (FetcherEvent.EVENT_TYPE_FEED_RETRIEVED.equals(eventType)) { + LOG.debug("\tEVENT: Feed Retrieved. URL = " + event.getUrlString()); + retrieved = true; + } else if (FetcherEvent.EVENT_TYPE_FEED_UNCHANGED.equals(eventType)) { + LOG.debug("\tEVENT: Feed Unchanged. URL = " + event.getUrlString()); + unchanged = true; + } + } + } + + public void testRetrieveFeed() { + final FeedFetcher feedFetcher = getFeedFetcher(); + try { + final SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + public void testBasicAuthentication() { + try { + setupServer(); + + final HttpContext context = createContext(); + + final URL url = this.getClass().getResource("/testuser.properties"); + final UserRealm ur = new HashUserRealm("test", url.getFile()); + context.setRealm(ur); + + final BasicAuthenticator ba = new BasicAuthenticator(); + context.setAuthenticator(ba); + + final SecurityHandler sh = new SecurityHandler(); + context.addHandler(sh); + + final SecurityConstraint sc = new SecurityConstraint(); + sc.setName("test"); + sc.addRole("*"); + sc.setAuthenticate(true); + context.addSecurityConstraint("/", sc); + + final ServletHandler servlets = createServletHandler(); + context.addHandler(servlets); + + server.addContext(context); + + server.start(); + + final FeedFetcher feedFetcher = getAuthenticatedFeedFetcher(); + final SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + } + + public abstract FeedFetcher getAuthenticatedFeedFetcher(); + + /** + * Test getting a feed via a http 301 redirect + * + */ + public void testRetrieveRedirectedFeed() { + final FeedFetcher feedFetcher = getFeedFetcher(); + try { + final SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet?redirect=TRUE")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + /** + * Test error handling + * + */ + public void testErrorHandling() { + final FeedFetcher feedFetcher = getFeedFetcher(); + try { + feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet?error=404")); + fail("4xx error handling did not work correctly"); + } catch (final FetcherException e) { + // expect this exception + assertEquals(404, e.getResponseCode()); + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + try { + feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet?error=500")); + fail("5xx error handling did not work correctly"); + } catch (final FetcherException e) { + // expect this exception + assertEquals(500, e.getResponseCode()); + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + public void testUserAgent() { + final FeedFetcher feedFetcher = getFeedFetcher(); + // LOG.debug(feedFetcher.getUserAgent()); + // LOG.debug(System.getProperty("rome.fetcher.version", "UNKNOWN")); + assertEquals("Rome Client (http://tinyurl.com/64t5n) Ver: " + System.getProperty("rome.fetcher.version", "UNKNOWN"), feedFetcher.getUserAgent()); + } + + /** + * Test events fired when there is no cache in use + * + */ + public void testFetchEvents() { + final FeedFetcher feedFetcher = getFeedFetcher(); + final FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); + feedFetcher.addFetcherEventListener(listener); + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertTrue(listener.polled); + assertTrue(listener.retrieved); + assertFalse(listener.unchanged); + listener.reset(); + + // since there is no cache, the events fired should be exactly the same if + // we re-retrieve the feed + feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertTrue(listener.polled); + assertTrue(listener.retrieved); + assertFalse(listener.unchanged); + listener.reset(); + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + /** + * Test events fired when there is a cache in use + * + */ + public void testFetchEventsWithCache() { + final FeedFetcherCache feedInfoCache = new HashMapFeedInfoCache(); + final FeedFetcher feedFetcher = getFeedFetcher(feedInfoCache); + final FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); + feedFetcher.addFetcherEventListener(listener); + try { + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertTrue(listener.polled); + assertTrue(listener.retrieved); + assertFalse(listener.unchanged); + listener.reset(); + + // Since the feed is cached, the second request should not + // actually retrieve the feed + feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertTrue(listener.polled); + assertFalse(listener.retrieved); + assertTrue(listener.unchanged); + listener.reset(); + + // now simulate getting the feed after it has changed + feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet?refreshfeed=TRUE")); + assertNotNull(feed); + assertTrue(listener.polled); + assertTrue(listener.retrieved); + assertFalse(listener.unchanged); + listener.reset(); + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + /** + * Test handling of GZipped feed + * + */ + public void testGZippedFeed() { + final FeedFetcher feedFetcher = getFeedFetcher(); + try { + final SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet?gzipfeed=TRUE")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + public void testPreserveWireFeed() throws Exception { + final FeedFetcher feedFetcher = getFeedFetcher(); + + // first check we the WireFeed is not preserved by default + SyndFeed feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + assertNull(feed.originalWireFeed()); + + SyndEntry syndEntry = feed.getEntries().get(0); + assertNotNull(syndEntry); + assertNull(syndEntry.getWireEntry()); + + // now turn on WireFeed preservation + feedFetcher.setPreserveWireFeed(true); + try { + feed = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + "/rome/FetcherTestServlet/")); + assertNotNull(feed); + assertEquals("atom_1.0.feed.title", feed.getTitle()); + assertNotNull(feed.originalWireFeed()); + + syndEntry = feed.getEntries().get(0); + assertNotNull(syndEntry); + assertNotNull(syndEntry.getWireEntry()); + + final Entry entry = (Entry) syndEntry.getWireEntry(); + assertEquals("atom_1.0.feed.entry[0].rights", entry.getRights()); + + } finally { + feedFetcher.setPreserveWireFeed(false); // reset + } + + } + + public void testDeltaEncoding() { + final FeedFetcherCache feedInfoCache = new HashMapFeedInfoCache(); + final FeedFetcher feedFetcher = getFeedFetcher(feedInfoCache); + try { + feedFetcher.setUsingDeltaEncoding(true); + + // first retrieval should just grab the default feed + final SyndFeed feed1 = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + + "/rome/FetcherTestServlet?deltaencode=TRUE&refreshfeed=TRUE")); + assertNotNull(feed1); + assertEquals("atom_1.0.feed.title", feed1.getTitle()); + assertEquals(2, feed1.getEntries().size()); + SyndEntry entry1 = feed1.getEntries().get(0); + assertEquals("atom_1.0.feed.entry[0].title", entry1.getTitle()); + + // second retrieval should get only the new item + /* + * This is breaking with Rome 0.5 ?? + */ + final SyndFeed feed2 = feedFetcher.retrieveFeed(new URL("http://localhost:" + testPort + + "/rome/FetcherTestServlet?deltaencode=TRUE&refreshfeed=TRUE")); + assertNotNull(feed2); + assertEquals(FetcherTestServlet.DELTA_FEED_TITLE, feed2.getTitle()); + assertEquals(3, feed2.getEntries().size()); + entry1 = feed2.getEntries().get(0); + assertEquals(FetcherTestServlet.DELTA_FEED_ENTRY_TITLE, entry1.getTitle()); + + final SyndEntry entry2 = feed2.getEntries().get(1); + assertEquals("atom_1.0.feed.entry[0].title", entry2.getTitle()); + + } catch (final Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + +} diff --git a/src/test/java/com/rometools/fetcher/FetcherTestServlet.java b/src/test/java/com/rometools/fetcher/FetcherTestServlet.java new file mode 100644 index 0000000..c967517 --- /dev/null +++ b/src/test/java/com/rometools/fetcher/FetcherTestServlet.java @@ -0,0 +1,248 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPOutputStream; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.rometools.rome.feed.synd.SyndContent; +import com.rometools.rome.feed.synd.SyndContentImpl; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndEntryImpl; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.feed.synd.SyndFeedImpl; +import com.rometools.rome.io.FeedException; +import com.rometools.rome.io.SyndFeedOutput; + +public class FetcherTestServlet extends HttpServlet { + + private static final long serialVersionUID = 1L; + + public static final String ETAG_1 = "ETAG-1"; + public static final String ETAG_2 = "ETAG-2"; + + public static final String DELTA_FEED_TITLE = "Delta Encoded Feed"; + public static final String DELTA_FEED_ENTRY_TITLE = "Delta Encoded Feed Entry"; + + public static final String SERVLET_MAPPING = "/FetcherTestServlet/*"; + public static final String SERVLET_MAPPING2 = "/FetcherTestServlet2/*"; + + /** + * @throws IOException + * @throws + * @see javax.servlet.http.HttpServlet#doGet(javax.servlet.http.HttpServletRequest, + * javax.servlet.http.HttpServletResponse) + */ + @Override + protected void doGet(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException { + + if ("TRUE".equalsIgnoreCase(request.getParameter("redirect"))) { + // testing redirection support + response.sendRedirect("/rome/FetcherTestServlet2/"); + return; + } else if (request.getParameter("error") != null) { + // response.sendError(HttpServletResponse.SC_NOT_FOUND); + final int errorToThrow = Integer.parseInt(request.getParameter("error")); + response.sendError(errorToThrow); + return; + } else { + + // We manually set the date headers using strings + // instead of the get/setDateHeader methods because + // those methods return longs, which has too much + // precision for the real date headers + // this is just a random date + String lastModifiedDate = "Thu, 08 Jan 2009 23:06:39 GMT"; + String eTag = ETAG_1; + + if ("TRUE".equalsIgnoreCase(request.getParameter("refreshfeed"))) { + lastModifiedDate = "Fri, 09 Jan 2009 12:06:39 GMT"; + eTag = ETAG_2; + } + + final boolean serveFeed = checkModified(request, lastModifiedDate, eTag) || "TRUE".equalsIgnoreCase(request.getParameter("deltaencode")); + final boolean gzip = "TRUE".equalsIgnoreCase(request.getParameter("gzipfeed")); + + if (serveFeed) { + final String aimHeader = request.getHeader("A-IM"); + final boolean serveDeltaEncodedFeed = aimHeader != null && aimHeader.indexOf("feed") >= 0 + && "TRUE".equalsIgnoreCase(request.getParameter("deltaencode")); + if (serveDeltaEncodedFeed) { + try { + sendDeltaEncodedData(response, lastModifiedDate, request.getHeader("If-None-Match"), eTag, gzip); + } catch (final FeedException e) { + throw new ServletException(e); + } + } else { + sendFeedData(response, lastModifiedDate, eTag, gzip); + } + return; + } else { + response.sendError(HttpServletResponse.SC_NOT_MODIFIED); + return; + } + } + } + + private boolean checkModified(final HttpServletRequest request, final String lastModifiedDate, final String eTag) { + + final String requestedETag = request.getHeader("If-None-Match"); + final String requestedLastModified = request.getHeader("If-Modified-Since"); + boolean modified = true; + boolean mustServer = false; + if (requestedETag != null) { + if (eTag.equals(requestedETag)) { + modified = false; + } else { + modified = true; + mustServer = true; + } + } + if (requestedLastModified != null) { + if (lastModifiedDate.equals(requestedLastModified)) { + modified = false; + } else { + modified = true; + mustServer = true; + } + } + final boolean serveFeed = modified || mustServer; + return serveFeed; + } + + /** + * @param request + * @param lastModifiedDate + * @param tag + * @param gzip + * @throws IOException + * @throws FeedException + */ + private void sendDeltaEncodedData(final HttpServletResponse response, final String lastModifiedDate, final String requestedETag, final String responseETag, + final boolean gzip) throws IOException, FeedException { + if (ETAG_1.equals(requestedETag) || ETAG_2.equals(requestedETag)) { + OutputStream out = null; + if (gzip) { + response.setHeader("Content-Encoding", "gzip"); + out = new GZIPOutputStream(response.getOutputStream()); + } else { + out = response.getOutputStream(); + } + + response.setContentType("text/xml"); + response.setStatus(226); + if (gzip) { + response.setHeader("IM", "feed, gzip"); + } else { + response.setHeader("IM", "feed"); + } + + if (responseETag != null) { + response.setHeader("ETag", responseETag); + } + if (lastModifiedDate != null) { + response.setHeader("Last-Modified", lastModifiedDate); + } + + final SyndFeed feed = new SyndFeedImpl(); + feed.setFeedType("atom_1.0"); + + feed.setTitle(DELTA_FEED_TITLE); + feed.setLink("http://rome.dev.java.net"); + feed.setDescription("This tests using rfc3229 delta encoding."); + + final List entries = new ArrayList(); + SyndEntry entry; + SyndContent description; + + entry = new SyndEntryImpl(); + entry.setTitle(DELTA_FEED_ENTRY_TITLE); + entry.setLink("http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html"); + try { + final DateFormat dateParser = new SimpleDateFormat("yyyy-MM-dd"); + entry.setPublishedDate(dateParser.parse("2004-11-25")); + } catch (final ParseException ex) { + // + } + description = new SyndContentImpl(); + description.setType("text/plain"); + description.setValue("Test for RFC3229 Delta Encoding"); + entry.setDescription(description); + entries.add(entry); + + feed.setEntries(entries); + + final SyndFeedOutput output = new SyndFeedOutput(); + output.output(feed, new OutputStreamWriter(out)); + } else { + sendFeedData(response, lastModifiedDate, responseETag, gzip); + } + } + + private void sendFeedData(final HttpServletResponse response, final String lastModifiedDate, final String eTag, final boolean gzip) throws IOException { + OutputStream out = null; + if (gzip) { + response.setHeader("Content-Encoding", "gzip"); + out = new GZIPOutputStream(response.getOutputStream()); + } else { + out = response.getOutputStream(); + } + + response.setContentType("text/xml"); + if (eTag != null) { + response.setHeader("ETag", eTag); + } + if (lastModifiedDate != null) { + response.setHeader("Last-Modified", lastModifiedDate); + } + + InputStream inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("/atom_1.0.xml"); + if (inputStream == null) { + inputStream = this.getClass().getResourceAsStream("/atom_1.0.xml"); + } + + final BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + try { + String line; + while ((line = reader.readLine()) != null) { + out.write(line.getBytes()); + line = null; + } + } finally { + if (reader != null) { + reader.close(); + } + } + + out.close(); + } +} diff --git a/src/test/java/com/rometools/fetcher/TestBasicAuthenticator.java b/src/test/java/com/rometools/fetcher/TestBasicAuthenticator.java new file mode 100644 index 0000000..355dd4b --- /dev/null +++ b/src/test/java/com/rometools/fetcher/TestBasicAuthenticator.java @@ -0,0 +1,38 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher; + +import java.net.Authenticator; +import java.net.PasswordAuthentication; + +/** + * @author nl + */ +public class TestBasicAuthenticator extends Authenticator { + + /** + * @see java.net.Authenticator#getPasswordAuthentication() + */ + @Override + protected PasswordAuthentication getPasswordAuthentication() { + if ("localhost".equals(getRequestingHost())) { + return new PasswordAuthentication("username", "password".toCharArray()); + } else { + return null; + } + } +} diff --git a/src/test/java/com/rometools/fetcher/impl/DiskFeedInfoCacheTest.java b/src/test/java/com/rometools/fetcher/impl/DiskFeedInfoCacheTest.java new file mode 100644 index 0000000..59b17bc --- /dev/null +++ b/src/test/java/com/rometools/fetcher/impl/DiskFeedInfoCacheTest.java @@ -0,0 +1,44 @@ +package com.rometools.fetcher.impl; + +import java.io.File; +import java.net.URL; + +import junit.framework.TestCase; + +import com.rometools.fetcher.impl.DiskFeedInfoCache; +import com.rometools.fetcher.impl.SyndFeedInfo; + +public class DiskFeedInfoCacheTest extends TestCase { + + public void testClear() throws Exception { + final File cacheDir = new File("test-cache"); + cacheDir.mkdir(); + cacheDir.deleteOnExit(); + + final DiskFeedInfoCache cache = new DiskFeedInfoCache(cacheDir.getCanonicalPath()); + final SyndFeedInfo info = new SyndFeedInfo(); + final URL url = new URL("http://nowhere.com"); + cache.setFeedInfo(url, info); + + cache.clear(); + final Object returned = cache.getFeedInfo(url); + assertTrue(returned == null); + } + + public void testRemove() throws Exception { + final File cacheDir = new File("test-cache"); + cacheDir.mkdir(); + cacheDir.deleteOnExit(); + + final DiskFeedInfoCache cache = new DiskFeedInfoCache(cacheDir.getCanonicalPath()); + final SyndFeedInfo info = new SyndFeedInfo(); + final URL url = new URL("http://nowhere.com"); + cache.setFeedInfo(url, info); + + final SyndFeedInfo removedInfo = cache.remove(url); + assertTrue(removedInfo.equals(info)); + final SyndFeedInfo shouldBeNull = cache.remove(url); + assertTrue(null == shouldBeNull); + } + +} diff --git a/src/test/java/com/rometools/fetcher/impl/HashMapFeedInfoCacheTest.java b/src/test/java/com/rometools/fetcher/impl/HashMapFeedInfoCacheTest.java new file mode 100644 index 0000000..1ab7878 --- /dev/null +++ b/src/test/java/com/rometools/fetcher/impl/HashMapFeedInfoCacheTest.java @@ -0,0 +1,42 @@ +package com.rometools.fetcher.impl; + +import java.net.URL; + +import junit.framework.TestCase; + +import com.rometools.fetcher.impl.HashMapFeedInfoCache; +import com.rometools.fetcher.impl.SyndFeedInfo; + +public class HashMapFeedInfoCacheTest extends TestCase { + + public void testRemove() throws Exception { + final HashMapFeedInfoCache cache = new HashMapFeedInfoCache(); + assertNotNull(cache); + + final URL url = new URL("http://foo.com"); + final SyndFeedInfo syndFeedInfo = new SyndFeedInfo(); + syndFeedInfo.setUrl(url); + cache.setFeedInfo(url, syndFeedInfo); + + final SyndFeedInfo returned = cache.remove(url); + assertTrue(returned.equals(syndFeedInfo)); + assertTrue(url.equals(returned.getUrl())); + } + + public void testClear() throws Exception { + final HashMapFeedInfoCache cache = new HashMapFeedInfoCache(); + assertNotNull(cache); + + final URL url = new URL("http://foo.com"); + final SyndFeedInfo syndFeedInfo = new SyndFeedInfo(); + syndFeedInfo.setUrl(url); + cache.setFeedInfo(url, syndFeedInfo); + + // clear it + cache.clear(); + + // we should not get a result back + final Object returned = cache.getFeedInfo(url); + assertTrue(returned == null); + } +} diff --git a/src/test/java/com/rometools/fetcher/impl/HttpClientFeedFetcherTest.java b/src/test/java/com/rometools/fetcher/impl/HttpClientFeedFetcherTest.java new file mode 100644 index 0000000..c64339b --- /dev/null +++ b/src/test/java/com/rometools/fetcher/impl/HttpClientFeedFetcherTest.java @@ -0,0 +1,65 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import org.apache.commons.httpclient.Credentials; +import org.apache.commons.httpclient.UsernamePasswordCredentials; + +import com.rometools.fetcher.AbstractJettyTest; +import com.rometools.fetcher.FeedFetcher; +import com.rometools.fetcher.impl.FeedFetcherCache; +import com.rometools.fetcher.impl.HttpClientFeedFetcher; + +/** + * @author Nick Lothian + */ +public class HttpClientFeedFetcherTest extends AbstractJettyTest { + + public HttpClientFeedFetcherTest(final String s) { + super(s); + } + + /** + * @see com.rometools.rome.fetcher.impl.AbstractJettyTest#getFeedFetcher() + */ + @Override + protected FeedFetcher getFeedFetcher() { + return new HttpClientFeedFetcher(); + } + + @Override + protected FeedFetcher getFeedFetcher(final FeedFetcherCache cache) { + return new HttpClientFeedFetcher(cache); + } + + /** + * @see com.rometools.rome.fetcher.impl.AbstractJettyTest#getAuthenticatedFeedFetcher() + */ + @Override + public FeedFetcher getAuthenticatedFeedFetcher() { + return new HttpClientFeedFetcher(null, new HttpClientFeedFetcher.CredentialSupplier() { + @Override + public Credentials getCredentials(final String realm, final String host) { + if ("localhost".equals(host)) { + return new UsernamePasswordCredentials("username", "password"); + } else { + return null; + } + } + }); + } +} diff --git a/src/test/java/com/rometools/fetcher/impl/HttpURLFeedFetcherTest.java b/src/test/java/com/rometools/fetcher/impl/HttpURLFeedFetcherTest.java new file mode 100644 index 0000000..c0ba1fe --- /dev/null +++ b/src/test/java/com/rometools/fetcher/impl/HttpURLFeedFetcherTest.java @@ -0,0 +1,57 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import com.rometools.fetcher.AbstractJettyTest; +import com.rometools.fetcher.FeedFetcher; +import com.rometools.fetcher.TestBasicAuthenticator; +import com.rometools.fetcher.impl.FeedFetcherCache; +import com.rometools.fetcher.impl.HttpURLFeedFetcher; + +public class HttpURLFeedFetcherTest extends AbstractJettyTest { + + public HttpURLFeedFetcherTest(final String s) { + super(s); + } + + /** + * @see com.rometools.rome.fetcher.impl.AbstractJettyTest#getFeedFetcher() + */ + @Override + protected FeedFetcher getFeedFetcher() { + return new HttpURLFeedFetcher(); + } + + @Override + protected FeedFetcher getFeedFetcher(final FeedFetcherCache cache) { + return new HttpURLFeedFetcher(cache); + } + + /** + * @see com.rometools.rome.fetcher.impl.AbstractJettyTest#getAuthenticatedFeedFetcher() + */ + @Override + public FeedFetcher getAuthenticatedFeedFetcher() { + // setup the authenticator + java.net.Authenticator.setDefault(new TestBasicAuthenticator()); + + final FeedFetcher feedFetcher = getFeedFetcher(); + + return feedFetcher; + } + +} diff --git a/src/test/java/com/rometools/fetcher/impl/ResponseHandlerTest.java b/src/test/java/com/rometools/fetcher/impl/ResponseHandlerTest.java new file mode 100644 index 0000000..71bfb4f --- /dev/null +++ b/src/test/java/com/rometools/fetcher/impl/ResponseHandlerTest.java @@ -0,0 +1,42 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.impl; + +import junit.framework.TestCase; + +import com.rometools.fetcher.impl.ResponseHandler; + +public class ResponseHandlerTest extends TestCase { + + /** + * Constructor for ResponseHandlerTest. + */ + public ResponseHandlerTest(final String arg0) { + super(arg0); + } + + public void testGetCharacterEncodingString() { + assertEquals(ResponseHandler.defaultCharacterEncoding, ResponseHandler.getCharacterEncoding((String) null)); + assertEquals(ResponseHandler.defaultCharacterEncoding, ResponseHandler.getCharacterEncoding("text/xml")); + assertEquals(ResponseHandler.defaultCharacterEncoding, ResponseHandler.getCharacterEncoding("text/xml;")); + assertEquals("ISO-8859-4", ResponseHandler.getCharacterEncoding("text/xml; charset=ISO-8859-4")); + assertEquals("ISO-8859-4", ResponseHandler.getCharacterEncoding("text/xml;charset=ISO-8859-4")); + assertEquals("ISO-8859-4", ResponseHandler.getCharacterEncoding("text/xml;charset=ISO-8859-4;something")); + assertEquals(ResponseHandler.defaultCharacterEncoding, ResponseHandler.getCharacterEncoding("text/xml;something")); + } + +} diff --git a/src/test/java/com/rometools/fetcher/samples/FeedAggregator.java b/src/test/java/com/rometools/fetcher/samples/FeedAggregator.java new file mode 100644 index 0000000..32b7907 --- /dev/null +++ b/src/test/java/com/rometools/fetcher/samples/FeedAggregator.java @@ -0,0 +1,102 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.rometools.fetcher.samples; + +import java.io.PrintWriter; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import com.rometools.fetcher.FeedFetcher; +import com.rometools.fetcher.impl.FeedFetcherCache; +import com.rometools.fetcher.impl.HashMapFeedInfoCache; +import com.rometools.fetcher.impl.HttpURLFeedFetcher; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.feed.synd.SyndFeedImpl; +import com.rometools.rome.io.SyndFeedOutput; + +/** + *

+ * It aggregates a list of RSS/Atom feeds (they can be of different types) into a single feed of the + * specified type. + *

+ * + *

+ * Converted from the original FeedAggregator sample + *

+ * + * @author Alejandro Abdelnur + * @author Nick Lothian + * + */ +public class FeedAggregator { + + public static void main(final String[] args) { + + boolean ok = false; + + if (args.length >= 2) { + + try { + + final String outputType = args[0]; + + final SyndFeed feed = new SyndFeedImpl(); + feed.setFeedType(outputType); + + feed.setTitle("Aggregated Feed"); + feed.setDescription("Anonymous Aggregated Feed"); + feed.setAuthor("anonymous"); + feed.setLink("http://www.anonymous.com"); + + final List entries = new ArrayList(); + feed.setEntries(entries); + + final FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); + final FeedFetcher feedFetcher = new HttpURLFeedFetcher(feedInfoCache); + + for (int i = 1; i < args.length; i++) { + final URL inputUrl = new URL(args[i]); + final SyndFeed inFeed = feedFetcher.retrieveFeed(inputUrl); + entries.addAll(inFeed.getEntries()); + } + + final SyndFeedOutput output = new SyndFeedOutput(); + output.output(feed, new PrintWriter(System.out)); + + ok = true; + + } catch (final Exception ex) { + System.out.println("ERROR: " + ex.getMessage()); + ex.printStackTrace(); + } + + } + + if (!ok) { + System.out.println(); + System.out.println("FeedAggregator aggregates different feeds into a single one."); + System.out.println("The first parameter must be the feed type for the aggregated feed."); + System.out.println(" [valid values are: rss_0.9, rss_0.91, rss_0.92, rss_0.93, ]"); + System.out.println(" [ rss_0.94, rss_1.0, rss_2.0 & atom_0.3 ]"); + System.out.println("The second to last parameters are the URLs of feeds to aggregate."); + System.out.println(); + } + } + +} diff --git a/src/test/java/com/rometools/fetcher/samples/FeedReader.java b/src/test/java/com/rometools/fetcher/samples/FeedReader.java new file mode 100644 index 0000000..54d4be7 --- /dev/null +++ b/src/test/java/com/rometools/fetcher/samples/FeedReader.java @@ -0,0 +1,104 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package com.rometools.fetcher.samples; + +import java.net.URL; + +import com.rometools.fetcher.FeedFetcher; +import com.rometools.fetcher.FetcherEvent; +import com.rometools.fetcher.FetcherListener; +import com.rometools.fetcher.impl.FeedFetcherCache; +import com.rometools.fetcher.impl.HashMapFeedInfoCache; +import com.rometools.fetcher.impl.HttpURLFeedFetcher; +import com.rometools.rome.feed.synd.SyndFeed; + +/** + * Reads and prints any RSS/Atom feed type. Converted from the original Rome sample FeedReader + *

+ * + * @author Alejandro Abdelnur + * @author Nick Lothian + * + */ +public class FeedReader { + + public static void main(final String[] args) { + + boolean ok = false; + + if (args.length == 1) { + + try { + final URL feedUrl = new URL(args[0]); + final FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); + final FeedFetcher fetcher = new HttpURLFeedFetcher(feedInfoCache); + + final FetcherEventListenerImpl listener = new FetcherEventListenerImpl(); + + fetcher.addFetcherEventListener(listener); + + System.err.println("Retrieving feed " + feedUrl); + // Retrieve the feed. + // We will get a Feed Polled Event and then a + // Feed Retrieved event (assuming the feed is valid) + final SyndFeed feed = fetcher.retrieveFeed(feedUrl); + + System.err.println(feedUrl + " retrieved"); + System.err.println(feedUrl + " has a title: " + feed.getTitle() + " and contains " + feed.getEntries().size() + " entries."); + // We will now retrieve the feed again. If the feed is unmodified + // and the server supports conditional gets, we will get a "Feed + // Unchanged" event after the Feed Polled event + System.err.println("Polling " + feedUrl + " again to test conditional get support."); + fetcher.retrieveFeed(feedUrl); + System.err.println("If a \"Feed Unchanged\" event fired then the server supports conditional gets."); + + ok = true; + + } catch (final Exception ex) { + System.out.println("ERROR: " + ex.getMessage()); + ex.printStackTrace(); + } + + } + + if (!ok) { + System.out.println(); + System.out.println("FeedReader reads and prints any RSS/Atom feed type."); + System.out.println("The first parameter must be the URL of the feed to read."); + System.out.println(); + } + + } + + static class FetcherEventListenerImpl implements FetcherListener { + /** + * @see com.rometools.rome.fetcher.FetcherListener#fetcherEvent(com.rometools.rome.fetcher.FetcherEvent) + */ + @Override + public void fetcherEvent(final FetcherEvent event) { + final String eventType = event.getEventType(); + if (FetcherEvent.EVENT_TYPE_FEED_POLLED.equals(eventType)) { + System.err.println("\tEVENT: Feed Polled. URL = " + event.getUrlString()); + } else if (FetcherEvent.EVENT_TYPE_FEED_RETRIEVED.equals(eventType)) { + System.err.println("\tEVENT: Feed Retrieved. URL = " + event.getUrlString()); + } else if (FetcherEvent.EVENT_TYPE_FEED_UNCHANGED.equals(eventType)) { + System.err.println("\tEVENT: Feed Unchanged. URL = " + event.getUrlString()); + } + } + } +} diff --git a/src/test/resources/atom_1.0.xml b/src/test/resources/atom_1.0.xml new file mode 100644 index 0000000..9d83939 --- /dev/null +++ b/src/test/resources/atom_1.0.xml @@ -0,0 +1,78 @@ + + + atom_1.0.feed.title + + + + rometest + + atom_1.0.feed.author.name + http://example.com + author0@example.com + + + atom_1.0.feed.contributor.name + http://example.com + author1@example.com + + atom_1.0.feed.tagline + http://example.com/blog/atom_1.0.xml + atom_1.0.feed.generator +atom_1.0.feed.copyright + 2000-01-01T00:00:00Z + + atom_1.0.feed.entry[0].title + + + + + http://example.com/blog/entry1 + + atom_1.0.feed.entry[0].author.name + http://example.com + author0@example.com + + + atom_1.0.feed.entry[0].contributor.name + http://example.com + author1@example.com + + 2000-01-01T00:00:00Z + 2000-01-01T01:00:00Z +

atom_1.0.feed.entry[0].summary + atom_1.0.feed.entry[0].content[0] + rometest + atom_1.0.feed.entry[0].rights + + + atom_1.0.feed.entry[1].title + + + + http://example.com/blog/entry2 + + atom_1.0.feed.entry[1].author.name + http://example.com + author0@example.com + + + atom_1.0.feed.entry[1].contributor.name + http://example.com + author1@example.com + + 2000-02-01T00:00:00Z + 2000-02-01T01:00:00Z + atom_1.0.feed.entry[1].summary + atom_1.0.feed.entry[1].content[0] + rometest + + diff --git a/src/test/resources/logback-test.xml b/src/test/resources/logback-test.xml new file mode 100644 index 0000000..44dea42 --- /dev/null +++ b/src/test/resources/logback-test.xml @@ -0,0 +1,13 @@ + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger - %msg%n + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/testuser.properties b/src/test/resources/testuser.properties new file mode 100644 index 0000000..28bc8e6 --- /dev/null +++ b/src/test/resources/testuser.properties @@ -0,0 +1 @@ +username: password