1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.fetcher.impl;
18
19 import java.io.BufferedInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.InputStreamReader;
23 import java.net.HttpURLConnection;
24 import java.net.URL;
25 import java.net.URLConnection;
26 import java.util.zip.GZIPInputStream;
27
28 import com.sun.syndication.feed.synd.SyndFeedI;
29 import com.sun.syndication.fetcher.FetcherEvent;
30 import com.sun.syndication.fetcher.FetcherException;
31 import com.sun.syndication.io.FeedException;
32 import com.sun.syndication.io.SyndFeedInput;
33
34 /***
35 * <p>Class to retrieve syndication files via HTTP.</p>
36 *
37 * <p>If passed a {@link com.sun.syndication.fetcher.impl.FeedFetcherCacheI} in the
38 * constructor it will use conditional gets to only retrieve modified content.</p>
39 *
40 * <p>The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where
41 * supported by the server.</p>
42 *
43 * <p>Simple usage:
44 * <pre>
45 * // create the cache
46 * FeedFetcherCacheI feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
47 * // retrieve the feed the first time
48 * // any subsequent request will use conditional gets and only
49 * // retrieve the resource if it has changed
50 * SyndFeedI feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
51 *</pre>
52 *
53 * </p>
54 *
55 * @see <a href="http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers">http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers</a>
56 * @see <a href="http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level">http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level</a>
57 * @author Nick Lothian
58 */
59 public class HttpURLFeedFetcher extends AbstractFeedFetcher {
60 static final int POLL_EVENT = 1;
61 static final int RETRIEVE_EVENT = 2;
62 static final int UNCHANGED_EVENT = 3;
63
64 private FeedFetcherCacheI feedInfoCache;
65
66
67 /***
68 * Constructor to use HttpURLFeedFetcher without caching of feeds
69 *
70 */
71 public HttpURLFeedFetcher() {
72 super();
73 }
74
75 /***
76 * Constructor to enable HttpURLFeedFetcher to cache feeds
77 *
78 * @param feedCache - an instance of the FeedFetcherCacheI interface
79 */
80 public HttpURLFeedFetcher(FeedFetcherCacheI feedCache) {
81 this();
82 feedInfoCache = feedCache;
83 }
84
85 /***
86 * Retrieve a feed over HTTP
87 *
88 * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve
89 * @return A {@link com.sun.syndication.feed.synd.SyndFeedI} object
90 * @throws IllegalArgumentException if the URL is null;
91 * @throws IOException if a TCP error occurs
92 * @throws FeedException if the feed is not valid
93 * @throws FetcherException if a HTTP error occurred
94 */
95 public SyndFeedI retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
96 if (feedUrl == null) {
97 throw new IllegalArgumentException("null is not a valid URL");
98 }
99
100 URLConnection connection = feedUrl.openConnection();
101 if (feedInfoCache != null) {
102 SyndFeedInfo syndFeedInfo = feedInfoCache.getFeedInfo(feedUrl);
103 setRequestHeaders(connection, syndFeedInfo);
104 connection.connect();
105 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
106 if (connection instanceof HttpURLConnection) {
107 HttpURLConnection httpConnection = (HttpURLConnection)connection;
108
109 if (syndFeedInfo == null) {
110
111 syndFeedInfo = new SyndFeedInfo();
112 retrieveAndCacheFeed(feedUrl, syndFeedInfo, connection);
113 } else {
114
115 int responseCode = httpConnection.getResponseCode();
116 if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
117
118
119
120
121 retrieveAndCacheFeed(feedUrl, syndFeedInfo, connection);
122 } else {
123
124 fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection);
125 }
126 }
127 } else {
128 fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection);
129 }
130
131 return syndFeedInfo.getSyndFeed();
132 } else {
133 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
134 try {
135 InputStream inputStream = feedUrl.openStream();
136 return getSyndFeedFromStream(inputStream, connection);
137 } catch (java.io.IOException e) {
138 handleErrorCodes(((HttpURLConnection)connection).getResponseCode());
139 }
140
141 return null;
142 }
143 }
144
145 /***
146 * <p>Handles HTTP error codes.</p>
147 *
148 * @param responseCode the HTTP response code
149 * @throws FetcherException if response code is in the range 400 to 599 inclusive
150 */
151 protected void handleErrorCodes(int responseCode) throws FetcherException {
152
153
154 if (responseCode >= 400 && responseCode < 500) {
155 throw4XXError(responseCode);
156 } else if (responseCode >= 500 && responseCode < 600) {
157 throw new FetcherException(responseCode, "The server encounted an error");
158 }
159 }
160
161 private void throw4XXError(int responseCode) throws FetcherException {
162 throw new FetcherException(responseCode, "The requested resource could not be found");
163 }
164
165 protected void retrieveAndCacheFeed(URL feedUrl, SyndFeedInfo syndFeedInfo, URLConnection connection) throws IllegalArgumentException, FeedException, FetcherException, IOException {
166 if (connection instanceof HttpURLConnection) {
167 HttpURLConnection httpConnection = (HttpURLConnection)connection;
168 handleErrorCodes(httpConnection.getResponseCode());
169 }
170
171 resetFeedInfo(feedUrl, syndFeedInfo, connection);
172
173
174
175 if (feedInfoCache != null) {
176 feedInfoCache.setFeedInfo(feedUrl, syndFeedInfo);
177 }
178 }
179
180 protected void resetFeedInfo(URL orignalUrl, SyndFeedInfo syndFeedInfo, URLConnection connection) throws IllegalArgumentException, IOException, FeedException {
181
182 syndFeedInfo.setUrl(connection.getURL());
183
184
185
186 syndFeedInfo.setId(orignalUrl.toString());
187
188
189 syndFeedInfo.setLastModified(connection.getLastModified());
190
191
192 syndFeedInfo.setETag(connection.getHeaderField("ETag"));
193
194
195 InputStream inputStream = null;
196 try {
197 inputStream = connection.getInputStream();
198 SyndFeedI syndFeed = getSyndFeedFromStream(inputStream, connection);
199 syndFeedInfo.setSyndFeed(syndFeed);
200 } finally {
201 if (inputStream != null) {
202 inputStream.close();
203 }
204 }
205 }
206
207 /***
208 * <p>Set appropriate HTTP headers, including conditional get and gzip encoding headers</p>
209 *
210 * @param connection A URLConnection
211 * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null
212 */
213 protected void setRequestHeaders(URLConnection connection, SyndFeedInfo syndFeedInfo) {
214 if (syndFeedInfo != null) {
215
216
217 if (syndFeedInfo.getLastModified() != 0) {
218 connection.setIfModifiedSince(syndFeedInfo.getLastModified());
219 }
220 if (syndFeedInfo.getETag() != null) {
221 connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag());
222 }
223
224 }
225
226 connection.setRequestProperty("Accept-Encoding", "gzip");
227
228
229 connection.addRequestProperty("User-Agent", getUserAgent());
230 }
231
232 private SyndFeedI getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
233 BufferedInputStream is;
234 if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) {
235
236 is = new BufferedInputStream(new GZIPInputStream(inputStream));
237 } else {
238 is = new BufferedInputStream(inputStream);
239 }
240
241 InputStreamReader reader = new InputStreamReader(is, ResponseHandler.getCharacterEncoding(connection));
242
243 SyndFeedInput input = new SyndFeedInput();
244
245 SyndFeedI feed = input.build(reader);
246 fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection);
247 return feed;
248 }
249
250 /***
251 * @return The FeedFetcherCacheI used by this fetcher (Could be null)
252 */
253 public FeedFetcherCacheI getFeedInfoCache() {
254 return feedInfoCache;
255 }
256
257 /***
258 * @param cache The cache to be used by this fetcher (pass null to stop using a cache)
259 */
260 public void setFeedInfoCache(FeedFetcherCacheI cache) {
261 feedInfoCache = cache;
262 }
263 }