1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.fetcher.impl;
18
19 import java.io.BufferedInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.net.HttpURLConnection;
23 import java.net.URL;
24 import java.net.URLConnection;
25 import java.util.zip.GZIPInputStream;
26
27 import com.sun.syndication.feed.synd.SyndFeed;
28 import com.sun.syndication.fetcher.FetcherEvent;
29 import com.sun.syndication.fetcher.FetcherException;
30 import com.sun.syndication.io.FeedException;
31 import com.sun.syndication.io.SyndFeedInput;
32 import com.sun.syndication.io.XmlReader;
33
34 /***
35 * <p>Class to retrieve syndication files via HTTP.</p>
36 *
37 * <p>If passed a {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} in the
38 * constructor it will use conditional gets to only retrieve modified content.</p>
39 *
40 * <p>The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where
41 * supported by the server.</p>
42 *
43 * <p>Simple usage:
44 * <pre>
45 * // create the cache
46 * FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
47 * // retrieve the feed the first time
48 * // any subsequent request will use conditional gets and only
49 * // retrieve the resource if it has changed
50 * SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
51 *</pre>
52 *
53 * </p>
54 *
55 * @see <a href="http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers">http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers</a>
56 * @see <a href="http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level">http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level</a>
57 * @see <a href="http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html">http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html</a>
58 * @author Nick Lothian
59 */
60 public class HttpURLFeedFetcher extends AbstractFeedFetcher {
61 static final int POLL_EVENT = 1;
62 static final int RETRIEVE_EVENT = 2;
63 static final int UNCHANGED_EVENT = 3;
64
65 private FeedFetcherCache feedInfoCache;
66
67
68 /***
69 * Constructor to use HttpURLFeedFetcher without caching of feeds
70 *
71 */
72 public HttpURLFeedFetcher() {
73 super();
74 }
75
76 /***
77 * Constructor to enable HttpURLFeedFetcher to cache feeds
78 *
79 * @param feedCache - an instance of the FeedFetcherCache interface
80 */
81 public HttpURLFeedFetcher(FeedFetcherCache feedCache) {
82 this();
83 feedInfoCache = feedCache;
84 }
85
86 /***
87 * Retrieve a feed over HTTP
88 *
89 * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve
90 * @return A {@link com.sun.syndication.feed.synd.SyndFeed} object
91 * @throws IllegalArgumentException if the URL is null;
92 * @throws IOException if a TCP error occurs
93 * @throws FeedException if the feed is not valid
94 * @throws FetcherException if a HTTP error occurred
95 */
96 public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
97 if (feedUrl == null) {
98 throw new IllegalArgumentException("null is not a valid URL");
99 }
100
101 URLConnection connection = feedUrl.openConnection();
102 if (!(connection instanceof HttpURLConnection)) {
103 throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url");
104 }
105 HttpURLConnection httpConnection = (HttpURLConnection)connection;
106
107
108 if (feedInfoCache != null) {
109 SyndFeedInfo syndFeedInfo = feedInfoCache.getFeedInfo(feedUrl);
110 setRequestHeaders(connection, syndFeedInfo);
111 connection.connect();
112 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
113
114 if (syndFeedInfo == null) {
115
116 syndFeedInfo = new SyndFeedInfo();
117 retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
118 } else {
119
120 int responseCode = httpConnection.getResponseCode();
121 if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
122
123
124
125
126 retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
127 } else {
128
129 fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection);
130 }
131 }
132
133 return syndFeedInfo.getSyndFeed();
134 } else {
135 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
136 try {
137 InputStream inputStream = feedUrl.openStream();
138 return getSyndFeedFromStream(inputStream, connection);
139 } catch (java.io.IOException e) {
140 handleErrorCodes(((HttpURLConnection)connection).getResponseCode());
141 }
142
143 return null;
144 }
145 }
146
147 protected void retrieveAndCacheFeed(URL feedUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, FeedException, FetcherException, IOException {
148 handleErrorCodes(connection.getResponseCode());
149
150 resetFeedInfo(feedUrl, syndFeedInfo, connection);
151
152
153
154 if (feedInfoCache != null) {
155 feedInfoCache.setFeedInfo(feedUrl, syndFeedInfo);
156 }
157 }
158
159 protected void resetFeedInfo(URL orignalUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, IOException, FeedException {
160
161 syndFeedInfo.setUrl(connection.getURL());
162
163
164
165 syndFeedInfo.setId(orignalUrl.toString());
166
167
168 syndFeedInfo.setLastModified(new Long(connection.getLastModified()));
169
170
171 syndFeedInfo.setETag(connection.getHeaderField("ETag"));
172
173
174 InputStream inputStream = null;
175 try {
176 inputStream = connection.getInputStream();
177 SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection);
178
179 String imHeader = connection.getHeaderField("IM");
180 if (isUsingDeltaEncoding() && (imHeader!= null && imHeader.indexOf("feed") >= 0) && (feedInfoCache != null) && connection.getResponseCode() == 226) {
181
182
183 SyndFeedInfo cachedInfo = feedInfoCache.getFeedInfo(orignalUrl);
184 if (cachedInfo != null) {
185 SyndFeed cachedFeed = cachedInfo.getSyndFeed();
186
187
188 syndFeed = combineFeeds(cachedFeed, syndFeed);
189 }
190 }
191
192 syndFeedInfo.setSyndFeed(syndFeed);
193 } finally {
194 if (inputStream != null) {
195 inputStream.close();
196 }
197 }
198 }
199
200 /***
201 * <p>Set appropriate HTTP headers, including conditional get and gzip encoding headers</p>
202 *
203 * @param connection A URLConnection
204 * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null
205 */
206 protected void setRequestHeaders(URLConnection connection, SyndFeedInfo syndFeedInfo) {
207 if (syndFeedInfo != null) {
208
209
210 if (syndFeedInfo.getLastModified() != null) {
211 Object lastModified = syndFeedInfo.getLastModified();
212 if (lastModified instanceof Long) {
213 connection.setIfModifiedSince(((Long)syndFeedInfo.getLastModified()).longValue());
214 }
215 }
216 if (syndFeedInfo.getETag() != null) {
217 connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag());
218 }
219
220 }
221
222 connection.setRequestProperty("Accept-Encoding", "gzip");
223
224
225 connection.addRequestProperty("User-Agent", getUserAgent());
226
227 if (isUsingDeltaEncoding()) {
228 connection.addRequestProperty("A-IM", "feed");
229 }
230 }
231
232 private SyndFeed getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
233 BufferedInputStream is;
234 if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) {
235
236 is = new BufferedInputStream(new GZIPInputStream(inputStream));
237 } else {
238 is = new BufferedInputStream(inputStream);
239 }
240
241
242
243
244
245 XmlReader reader = null;
246 if (connection.getHeaderField("Content-Type") != null) {
247 reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true);
248 } else {
249 reader = new XmlReader(is, true);
250 }
251
252 SyndFeed feed = new SyndFeedInput().build(reader);
253 fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed);
254 return feed;
255 }
256
257 /***
258 * @return The FeedFetcherCache used by this fetcher (Could be null)
259 */
260 public FeedFetcherCache getFeedInfoCache() {
261 return feedInfoCache;
262 }
263
264 /***
265 * @param cache The cache to be used by this fetcher (pass null to stop using a cache)
266 */
267 public void setFeedInfoCache(FeedFetcherCache cache) {
268 feedInfoCache = cache;
269 }
270 }