1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.fetcher.impl;
18
19 import java.io.BufferedInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.InputStreamReader;
23 import java.net.HttpURLConnection;
24 import java.net.URL;
25 import java.net.URLConnection;
26 import java.util.zip.GZIPInputStream;
27
28 import com.sun.syndication.feed.synd.SyndFeed;
29 import com.sun.syndication.fetcher.FetcherEvent;
30 import com.sun.syndication.fetcher.FetcherException;
31 import com.sun.syndication.io.FeedException;
32 import com.sun.syndication.io.SyndFeedInput;
33
34 /***
35 * <p>Class to retrieve syndication files via HTTP.</p>
36 *
37 * <p>If passed a {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} in the
38 * constructor it will use conditional gets to only retrieve modified content.</p>
39 *
40 * <p>The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where
41 * supported by the server.</p>
42 *
43 * <p>Simple usage:
44 * <pre>
45 * // create the cache
46 * FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
47 * // retrieve the feed the first time
48 * // any subsequent request will use conditional gets and only
49 * // retrieve the resource if it has changed
50 * SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
51 *</pre>
52 *
53 * </p>
54 *
55 * @see <a href="http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers">http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers</a>
56 * @see <a href="http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level">http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level</a>
57 * @author Nick Lothian
58 */
59 public class HttpURLFeedFetcher extends AbstractFeedFetcher {
60 static final int POLL_EVENT = 1;
61 static final int RETRIEVE_EVENT = 2;
62 static final int UNCHANGED_EVENT = 3;
63
64 private FeedFetcherCache feedInfoCache;
65
66
67 /***
68 * Constructor to use HttpURLFeedFetcher without caching of feeds
69 *
70 */
71 public HttpURLFeedFetcher() {
72 super();
73 }
74
75 /***
76 * Constructor to enable HttpURLFeedFetcher to cache feeds
77 *
78 * @param feedCache - an instance of the FeedFetcherCache interface
79 */
80 public HttpURLFeedFetcher(FeedFetcherCache feedCache) {
81 this();
82 feedInfoCache = feedCache;
83 }
84
85 /***
86 * Retrieve a feed over HTTP
87 *
88 * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve
89 * @return A {@link com.sun.syndication.feed.synd.SyndFeed} object
90 * @throws IllegalArgumentException if the URL is null;
91 * @throws IOException if a TCP error occurs
92 * @throws FeedException if the feed is not valid
93 * @throws FetcherException if a HTTP error occurred
94 */
95 public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
96 if (feedUrl == null) {
97 throw new IllegalArgumentException("null is not a valid URL");
98 }
99
100 URLConnection connection = feedUrl.openConnection();
101 if (feedInfoCache != null) {
102 SyndFeedInfo syndFeedInfo = feedInfoCache.getFeedInfo(feedUrl);
103 setRequestHeaders(connection, syndFeedInfo);
104 connection.connect();
105 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
106 if (connection instanceof HttpURLConnection) {
107 HttpURLConnection httpConnection = (HttpURLConnection)connection;
108
109 if (syndFeedInfo == null) {
110
111 syndFeedInfo = new SyndFeedInfo();
112 retrieveAndCacheFeed(feedUrl, syndFeedInfo, connection);
113 } else {
114
115 int responseCode = httpConnection.getResponseCode();
116 if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
117
118
119
120
121 retrieveAndCacheFeed(feedUrl, syndFeedInfo, connection);
122 } else {
123
124 fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection);
125 }
126 }
127 } else {
128 fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection);
129 }
130
131 return syndFeedInfo.getSyndFeed();
132 } else {
133 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
134 try {
135 InputStream inputStream = feedUrl.openStream();
136 return getSyndFeedFromStream(inputStream, connection);
137 } catch (java.io.IOException e) {
138 handleErrorCodes(((HttpURLConnection)connection).getResponseCode());
139 }
140
141 return null;
142 }
143 }
144
145 protected void retrieveAndCacheFeed(URL feedUrl, SyndFeedInfo syndFeedInfo, URLConnection connection) throws IllegalArgumentException, FeedException, FetcherException, IOException {
146 if (connection instanceof HttpURLConnection) {
147 HttpURLConnection httpConnection = (HttpURLConnection)connection;
148 handleErrorCodes(httpConnection.getResponseCode());
149 }
150
151 resetFeedInfo(feedUrl, syndFeedInfo, connection);
152
153
154
155 if (feedInfoCache != null) {
156 feedInfoCache.setFeedInfo(feedUrl, syndFeedInfo);
157 }
158 }
159
160 protected void resetFeedInfo(URL orignalUrl, SyndFeedInfo syndFeedInfo, URLConnection connection) throws IllegalArgumentException, IOException, FeedException {
161
162 syndFeedInfo.setUrl(connection.getURL());
163
164
165
166 syndFeedInfo.setId(orignalUrl.toString());
167
168
169 syndFeedInfo.setLastModified(new Long(connection.getLastModified()));
170
171
172 syndFeedInfo.setETag(connection.getHeaderField("ETag"));
173
174
175 InputStream inputStream = null;
176 try {
177 inputStream = connection.getInputStream();
178 SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection);
179 syndFeedInfo.setSyndFeed(syndFeed);
180 } finally {
181 if (inputStream != null) {
182 inputStream.close();
183 }
184 }
185 }
186
187 /***
188 * <p>Set appropriate HTTP headers, including conditional get and gzip encoding headers</p>
189 *
190 * @param connection A URLConnection
191 * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null
192 */
193 protected void setRequestHeaders(URLConnection connection, SyndFeedInfo syndFeedInfo) {
194 if (syndFeedInfo != null) {
195
196
197 if (syndFeedInfo.getLastModified() != null) {
198 Object lastModified = syndFeedInfo.getLastModified();
199 if (lastModified instanceof Long) {
200 connection.setIfModifiedSince(((Long)syndFeedInfo.getLastModified()).longValue());
201 }
202 }
203 if (syndFeedInfo.getETag() != null) {
204 connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag());
205 }
206
207 }
208
209 connection.setRequestProperty("Accept-Encoding", "gzip");
210
211
212 connection.addRequestProperty("User-Agent", getUserAgent());
213 }
214
215 private SyndFeed getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
216 BufferedInputStream is;
217 if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) {
218
219 is = new BufferedInputStream(new GZIPInputStream(inputStream));
220 } else {
221 is = new BufferedInputStream(inputStream);
222 }
223
224 InputStreamReader reader = new InputStreamReader(is, ResponseHandler.getCharacterEncoding(connection));
225
226 SyndFeedInput input = new SyndFeedInput();
227
228 SyndFeed feed = input.build(reader);
229 fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection);
230 return feed;
231 }
232
233 /***
234 * @return The FeedFetcherCache used by this fetcher (Could be null)
235 */
236 public FeedFetcherCache getFeedInfoCache() {
237 return feedInfoCache;
238 }
239
240 /***
241 * @param cache The cache to be used by this fetcher (pass null to stop using a cache)
242 */
243 public void setFeedInfoCache(FeedFetcherCache cache) {
244 feedInfoCache = cache;
245 }
246 }