1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.fetcher.impl;
18
19 import java.io.BufferedInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.net.HttpURLConnection;
23 import java.net.URL;
24 import java.net.URLConnection;
25 import java.util.zip.GZIPInputStream;
26
27 import com.sun.syndication.feed.synd.SyndFeed;
28 import com.sun.syndication.fetcher.FetcherEvent;
29 import com.sun.syndication.fetcher.FetcherException;
30 import com.sun.syndication.io.FeedException;
31 import com.sun.syndication.io.SyndFeedInput;
32 import com.sun.syndication.io.XmlReader;
33
34 /***
35 * <p>Class to retrieve syndication files via HTTP.</p>
36 *
37 * <p>If passed a {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} in the
38 * constructor it will use conditional gets to only retrieve modified content.</p>
39 *
40 * <p>The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where
41 * supported by the server.</p>
42 *
43 * <p>Simple usage:
44 * <pre>
45 * // create the cache
46 * FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
47 * // retrieve the feed the first time
48 * // any subsequent request will use conditional gets and only
49 * // retrieve the resource if it has changed
50 * SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
51 *</pre>
52 *
53 * </p>
54 *
55 * @see <a href="http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers">http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers</a>
56 * @see <a href="http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level">http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level</a>
57 * @see <a href="http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html">http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html</a>
58 * @author Nick Lothian
59 */
60 public class HttpURLFeedFetcher extends AbstractFeedFetcher {
61 static final int POLL_EVENT = 1;
62 static final int RETRIEVE_EVENT = 2;
63 static final int UNCHANGED_EVENT = 3;
64
65 private FeedFetcherCache feedInfoCache;
66
67
68 /***
69 * Constructor to use HttpURLFeedFetcher without caching of feeds
70 *
71 */
72 public HttpURLFeedFetcher() {
73 super();
74 }
75
76 /***
77 * Constructor to enable HttpURLFeedFetcher to cache feeds
78 *
79 * @param feedCache - an instance of the FeedFetcherCache interface
80 */
81 public HttpURLFeedFetcher(FeedFetcherCache feedInfoCache) {
82 this();
83 setFeedInfoCache(feedInfoCache);
84 }
85
86 /***
87 * Retrieve a feed over HTTP
88 *
89 * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve
90 * @return A {@link com.sun.syndication.feed.synd.SyndFeed} object
91 * @throws IllegalArgumentException if the URL is null;
92 * @throws IOException if a TCP error occurs
93 * @throws FeedException if the feed is not valid
94 * @throws FetcherException if a HTTP error occurred
95 */
96 public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
97 if (feedUrl == null) {
98 throw new IllegalArgumentException("null is not a valid URL");
99 }
100
101 URLConnection connection = feedUrl.openConnection();
102 if (!(connection instanceof HttpURLConnection)) {
103 throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url");
104 }
105 HttpURLConnection httpConnection = (HttpURLConnection)connection;
106
107
108 FeedFetcherCache cache = getFeedInfoCache();
109 if (cache != null) {
110 SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);
111 setRequestHeaders(connection, syndFeedInfo);
112 httpConnection.connect();
113 try {
114 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
115
116 if (syndFeedInfo == null) {
117
118 syndFeedInfo = new SyndFeedInfo();
119 retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
120 } else {
121
122 int responseCode = httpConnection.getResponseCode();
123 if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
124
125
126
127
128 retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
129 } else {
130
131 fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection);
132 }
133 }
134
135 return syndFeedInfo.getSyndFeed();
136 } finally {
137 httpConnection.disconnect();
138 }
139 } else {
140 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
141 InputStream inputStream = null;
142 setRequestHeaders(connection, null);
143 httpConnection.connect();
144 try {
145 inputStream = httpConnection.getInputStream();
146 return getSyndFeedFromStream(inputStream, connection);
147 } catch (java.io.IOException e) {
148 handleErrorCodes(((HttpURLConnection)connection).getResponseCode());
149 } finally {
150 if (inputStream != null) {
151 inputStream.close();
152 }
153 httpConnection.disconnect();
154 }
155
156 return null;
157 }
158 }
159
160 protected void retrieveAndCacheFeed(URL feedUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, FeedException, FetcherException, IOException {
161 handleErrorCodes(connection.getResponseCode());
162
163 resetFeedInfo(feedUrl, syndFeedInfo, connection);
164 FeedFetcherCache cache = getFeedInfoCache();
165
166
167
168 if (cache != null) {
169 cache.setFeedInfo(feedUrl, syndFeedInfo);
170 }
171 }
172
173 protected void resetFeedInfo(URL orignalUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, IOException, FeedException {
174
175 syndFeedInfo.setUrl(connection.getURL());
176
177
178
179 syndFeedInfo.setId(orignalUrl.toString());
180
181
182 syndFeedInfo.setLastModified(new Long(connection.getLastModified()));
183
184
185 syndFeedInfo.setETag(connection.getHeaderField("ETag"));
186
187
188 InputStream inputStream = null;
189 try {
190 inputStream = connection.getInputStream();
191 SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection);
192
193 String imHeader = connection.getHeaderField("IM");
194 if (isUsingDeltaEncoding() && (imHeader!= null && imHeader.indexOf("feed") >= 0)) {
195 FeedFetcherCache cache = getFeedInfoCache();
196 if (cache != null && connection.getResponseCode() == 226) {
197
198
199 SyndFeedInfo cachedInfo = cache.getFeedInfo(orignalUrl);
200 if (cachedInfo != null) {
201 SyndFeed cachedFeed = cachedInfo.getSyndFeed();
202
203
204 syndFeed = combineFeeds(cachedFeed, syndFeed);
205 }
206 }
207 }
208
209 syndFeedInfo.setSyndFeed(syndFeed);
210 } finally {
211 if (inputStream != null) {
212 inputStream.close();
213 }
214 }
215 }
216
217 /***
218 * <p>Set appropriate HTTP headers, including conditional get and gzip encoding headers</p>
219 *
220 * @param connection A URLConnection
221 * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null
222 */
223 protected void setRequestHeaders(URLConnection connection, SyndFeedInfo syndFeedInfo) {
224 if (syndFeedInfo != null) {
225
226
227 if (syndFeedInfo.getLastModified() != null) {
228 Object lastModified = syndFeedInfo.getLastModified();
229 if (lastModified instanceof Long) {
230 connection.setIfModifiedSince(((Long)syndFeedInfo.getLastModified()).longValue());
231 }
232 }
233 if (syndFeedInfo.getETag() != null) {
234 connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag());
235 }
236
237 }
238
239 connection.setRequestProperty("Accept-Encoding", "gzip");
240
241
242 connection.addRequestProperty("User-Agent", getUserAgent());
243
244 if (isUsingDeltaEncoding()) {
245 connection.addRequestProperty("A-IM", "feed");
246 }
247 }
248
249 private static SyndFeed readSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
250 BufferedInputStream is;
251 if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) {
252
253 is = new BufferedInputStream(new GZIPInputStream(inputStream));
254 } else {
255 is = new BufferedInputStream(inputStream);
256 }
257
258
259
260
261
262 XmlReader reader = null;
263 if (connection.getHeaderField("Content-Type") != null) {
264 reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true);
265 } else {
266 reader = new XmlReader(is, true);
267 }
268
269 return new SyndFeedInput().build(reader);
270 }
271
272 private SyndFeed getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
273 SyndFeed feed = readSyndFeedFromStream(inputStream, connection);
274 fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed);
275 return feed;
276 }
277
278 /***
279 * @return The FeedFetcherCache used by this fetcher (Could be null)
280 */
281 public synchronized FeedFetcherCache getFeedInfoCache() {
282 return feedInfoCache;
283 }
284
285 /***
286 * @param cache The cache to be used by this fetcher (pass null to stop using a cache)
287 */
288 public synchronized void setFeedInfoCache(FeedFetcherCache cache) {
289 feedInfoCache = cache;
290 }
291 }