1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.fetcher.impl;
18
19 import java.io.BufferedInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.net.HttpURLConnection;
23 import java.net.URL;
24 import java.net.URLConnection;
25 import java.util.zip.GZIPInputStream;
26
27 import com.sun.syndication.feed.synd.SyndFeed;
28 import com.sun.syndication.fetcher.FetcherEvent;
29 import com.sun.syndication.fetcher.FetcherException;
30 import com.sun.syndication.io.FeedException;
31 import com.sun.syndication.io.SyndFeedInput;
32 import com.sun.syndication.io.XmlReader;
33
34 /***
35 * <p>Class to retrieve syndication files via HTTP.</p>
36 *
37 * <p>If passed a {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} in the
38 * constructor it will use conditional gets to only retrieve modified content.</p>
39 *
40 * <p>The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where
41 * supported by the server.</p>
42 *
43 * <p>Simple usage:
44 * <pre>
45 * // create the cache
46 * FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
47 * // retrieve the feed the first time
48 * // any subsequent request will use conditional gets and only
49 * // retrieve the resource if it has changed
50 * SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
51 *</pre>
52 *
53 * </p>
54 *
55 * @see <a href="http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers">http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers</a>
56 * @see <a href="http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level">http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level</a>
57 * @see <a href="http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html">http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html</a>
58 * @author Nick Lothian
59 */
60 public class HttpURLFeedFetcher extends AbstractFeedFetcher {
61 static final int POLL_EVENT = 1;
62 static final int RETRIEVE_EVENT = 2;
63 static final int UNCHANGED_EVENT = 3;
64
65 private FeedFetcherCache feedInfoCache;
66
67
68 /***
69 * Constructor to use HttpURLFeedFetcher without caching of feeds
70 *
71 */
72 public HttpURLFeedFetcher() {
73 super();
74 }
75
76 /***
77 * Constructor to enable HttpURLFeedFetcher to cache feeds
78 *
79 * @param feedCache - an instance of the FeedFetcherCache interface
80 */
81 public HttpURLFeedFetcher(FeedFetcherCache feedCache) {
82 this();
83 feedInfoCache = feedCache;
84 }
85
86 /***
87 * Retrieve a feed over HTTP
88 *
89 * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve
90 * @return A {@link com.sun.syndication.feed.synd.SyndFeed} object
91 * @throws IllegalArgumentException if the URL is null;
92 * @throws IOException if a TCP error occurs
93 * @throws FeedException if the feed is not valid
94 * @throws FetcherException if a HTTP error occurred
95 */
96 public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
97 if (feedUrl == null) {
98 throw new IllegalArgumentException("null is not a valid URL");
99 }
100
101 URLConnection connection = feedUrl.openConnection();
102 if (!(connection instanceof HttpURLConnection)) {
103 throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url");
104 }
105 HttpURLConnection httpConnection = (HttpURLConnection)connection;
106
107
108 if (feedInfoCache != null) {
109 SyndFeedInfo syndFeedInfo = feedInfoCache.getFeedInfo(feedUrl);
110 setRequestHeaders(connection, syndFeedInfo);
111 httpConnection.connect();
112 try {
113 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
114
115 if (syndFeedInfo == null) {
116
117 syndFeedInfo = new SyndFeedInfo();
118 retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
119 } else {
120
121 int responseCode = httpConnection.getResponseCode();
122 if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
123
124
125
126
127 retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
128 } else {
129
130 fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection);
131 }
132 }
133
134 return syndFeedInfo.getSyndFeed();
135 } finally {
136 httpConnection.disconnect();
137 }
138 } else {
139 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
140 InputStream inputStream = null;
141 try {
142 inputStream = feedUrl.openStream();
143 return getSyndFeedFromStream(inputStream, connection);
144 } catch (java.io.IOException e) {
145 handleErrorCodes(((HttpURLConnection)connection).getResponseCode());
146 } finally {
147 if (inputStream != null) {
148 inputStream.close();
149 }
150 }
151
152 return null;
153 }
154 }
155
156 protected void retrieveAndCacheFeed(URL feedUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, FeedException, FetcherException, IOException {
157 handleErrorCodes(connection.getResponseCode());
158
159 resetFeedInfo(feedUrl, syndFeedInfo, connection);
160
161
162
163 if (feedInfoCache != null) {
164 feedInfoCache.setFeedInfo(feedUrl, syndFeedInfo);
165 }
166 }
167
168 protected void resetFeedInfo(URL orignalUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, IOException, FeedException {
169
170 syndFeedInfo.setUrl(connection.getURL());
171
172
173
174 syndFeedInfo.setId(orignalUrl.toString());
175
176
177 syndFeedInfo.setLastModified(new Long(connection.getLastModified()));
178
179
180 syndFeedInfo.setETag(connection.getHeaderField("ETag"));
181
182
183 InputStream inputStream = null;
184 try {
185 inputStream = connection.getInputStream();
186 SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection);
187
188 String imHeader = connection.getHeaderField("IM");
189 if (isUsingDeltaEncoding() && (imHeader!= null && imHeader.indexOf("feed") >= 0) && (feedInfoCache != null) && connection.getResponseCode() == 226) {
190
191
192 SyndFeedInfo cachedInfo = feedInfoCache.getFeedInfo(orignalUrl);
193 if (cachedInfo != null) {
194 SyndFeed cachedFeed = cachedInfo.getSyndFeed();
195
196
197 syndFeed = combineFeeds(cachedFeed, syndFeed);
198 }
199 }
200
201 syndFeedInfo.setSyndFeed(syndFeed);
202 } finally {
203 if (inputStream != null) {
204 inputStream.close();
205 }
206 }
207 }
208
209 /***
210 * <p>Set appropriate HTTP headers, including conditional get and gzip encoding headers</p>
211 *
212 * @param connection A URLConnection
213 * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null
214 */
215 protected void setRequestHeaders(URLConnection connection, SyndFeedInfo syndFeedInfo) {
216 if (syndFeedInfo != null) {
217
218
219 if (syndFeedInfo.getLastModified() != null) {
220 Object lastModified = syndFeedInfo.getLastModified();
221 if (lastModified instanceof Long) {
222 connection.setIfModifiedSince(((Long)syndFeedInfo.getLastModified()).longValue());
223 }
224 }
225 if (syndFeedInfo.getETag() != null) {
226 connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag());
227 }
228
229 }
230
231 connection.setRequestProperty("Accept-Encoding", "gzip");
232
233
234 connection.addRequestProperty("User-Agent", getUserAgent());
235
236 if (isUsingDeltaEncoding()) {
237 connection.addRequestProperty("A-IM", "feed");
238 }
239 }
240
241 private SyndFeed getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
242 BufferedInputStream is;
243 if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) {
244
245 is = new BufferedInputStream(new GZIPInputStream(inputStream));
246 } else {
247 is = new BufferedInputStream(inputStream);
248 }
249
250
251
252
253
254 XmlReader reader = null;
255 if (connection.getHeaderField("Content-Type") != null) {
256 reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true);
257 } else {
258 reader = new XmlReader(is, true);
259 }
260
261 SyndFeed feed = new SyndFeedInput().build(reader);
262 fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed);
263 return feed;
264 }
265
266 /***
267 * @return The FeedFetcherCache used by this fetcher (Could be null)
268 */
269 public FeedFetcherCache getFeedInfoCache() {
270 return feedInfoCache;
271 }
272
273 /***
274 * @param cache The cache to be used by this fetcher (pass null to stop using a cache)
275 */
276 public void setFeedInfoCache(FeedFetcherCache cache) {
277 feedInfoCache = cache;
278 }
279 }