View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  
18  package com.sun.syndication.fetcher.impl;
19  
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.net.HttpURLConnection;
23  import java.net.MalformedURLException;
24  import java.net.URL;
25  import java.util.zip.GZIPInputStream;
26  
27  import org.apache.commons.httpclient.Credentials;
28  import org.apache.commons.httpclient.Header;
29  import org.apache.commons.httpclient.HttpClient;
30  import org.apache.commons.httpclient.HttpException;
31  import org.apache.commons.httpclient.HttpMethod;
32  import org.apache.commons.httpclient.methods.GetMethod;
33  
34  import com.sun.syndication.feed.synd.SyndFeed;
35  import com.sun.syndication.fetcher.FetcherEvent;
36  import com.sun.syndication.fetcher.FetcherException;
37  import com.sun.syndication.io.FeedException;
38  import com.sun.syndication.io.SyndFeedInput;
39  import com.sun.syndication.io.XmlReader;
40  
41  /***
42   * @author Nick Lothian
43   */
44  public class HttpClientFeedFetcher extends AbstractFeedFetcher {
45  
46  	private FeedFetcherCache feedInfoCache;
47      private CredentialSupplier credentialSupplier;
48  		
49  	public HttpClientFeedFetcher() {
50  		super();
51  	}
52  	
53  	/***
54  	 * @param cache
55  	 */
56  	public HttpClientFeedFetcher(FeedFetcherCache cache) {
57  		this();
58  		setFeedInfoCache(cache);
59  	}
60  
61  	
62  	public HttpClientFeedFetcher(FeedFetcherCache cache, CredentialSupplier credentialSupplier) {
63  	    this(cache);
64  	    setCredentialSupplier(credentialSupplier);
65  	}
66  
67  	/***
68  	 * @return the feedInfoCache.
69  	 */
70  	public synchronized FeedFetcherCache getFeedInfoCache() {
71  		return feedInfoCache;
72  	}
73  	
74      /***
75  	 * @param feedInfoCache the feedInfoCache to set
76  	 */
77  	public synchronized void setFeedInfoCache(FeedFetcherCache feedInfoCache) {
78  		this.feedInfoCache = feedInfoCache;
79  	}
80  
81  	/***
82       * @return Returns the credentialSupplier.
83       */
84      public synchronized CredentialSupplier getCredentialSupplier() {
85          return credentialSupplier;
86      }
87      /***
88       * @param credentialSupplier The credentialSupplier to set.
89       */
90      public synchronized void setCredentialSupplier(CredentialSupplier credentialSupplier) {
91          this.credentialSupplier = credentialSupplier;
92      }	
93  	
94  	/***
95  	 * @see com.sun.syndication.fetcher.FeedFetcher#retrieveFeed(java.net.URL)
96  	 */
97  	public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
98  		if (feedUrl == null) {
99  			throw new IllegalArgumentException("null is not a valid URL");
100 		}
101 		// TODO Fix this
102 		//System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.SimpleLog");
103 		HttpClient client = new HttpClient();
104 		
105 		if (getCredentialSupplier() != null) {
106 			client.getState().setAuthenticationPreemptive(true);
107 			// TODO what should realm be here?
108 			Credentials credentials = getCredentialSupplier().getCredentials(null, feedUrl.getHost()); 
109 			if (credentials != null) {
110 			    client.getState().setCredentials(null, feedUrl.getHost(), credentials);
111 			}			
112 		}
113 		
114 		
115 		System.setProperty("httpclient.useragent", getUserAgent());
116 		String urlStr = feedUrl.toString();
117 		FeedFetcherCache cache = getFeedInfoCache();
118 		if (cache != null) {
119 			// retrieve feed
120 			HttpMethod method = new GetMethod(urlStr);
121 			method.addRequestHeader("Accept-Encoding", "gzip");
122 			try {
123 				if (isUsingDeltaEncoding()) {
124 				    method.setRequestHeader("A-IM", "feed");
125 				}	    
126 
127 				// get the feed info from the cache
128 			    // Note that syndFeedInfo will be null if it is not in the cache
129 				SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);			
130 			    if (syndFeedInfo != null) {
131 				    method.setRequestHeader("If-None-Match", syndFeedInfo.getETag());
132 				    
133 				    if (syndFeedInfo.getLastModified() instanceof String) {
134 				        method.setRequestHeader("If-Modified-Since", (String)syndFeedInfo.getLastModified());
135 				    }
136 			    }
137 			    
138 			    method.setFollowRedirects(true);			
139 				
140 				int statusCode = client.executeMethod(method);
141 				fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);				
142 				handleErrorCodes(statusCode);			    
143 			    			    
144 			    SyndFeed feed = getFeed(syndFeedInfo, urlStr, method, statusCode);
145 			    				    
146 				syndFeedInfo = buildSyndFeedInfo(feedUrl, urlStr, method, feed, statusCode);
147 				
148 				cache.setFeedInfo(new URL(urlStr), syndFeedInfo);	
149 				
150 				// the feed may have been modified to pick up cached values
151 				// (eg - for delta encoding)
152 				feed = syndFeedInfo.getSyndFeed();
153 	
154 				return feed;
155 			} finally {
156 				method.releaseConnection();
157 			}
158 				
159 		} else {
160 		    // cache is not in use		    
161 			HttpMethod method = new GetMethod(urlStr);
162 			try {
163 			    method.setFollowRedirects(true);			
164 				
165 				int statusCode = client.executeMethod(method);
166 				fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
167 				handleErrorCodes(statusCode);		
168 			    
169 				return getFeed(null, urlStr, method, statusCode);
170 			} finally {
171 				method.releaseConnection();
172 			}
173 		}
174 	}
175 
176 
177 	/***
178      * @param feedUrl
179      * @param urlStr
180      * @param method
181      * @param feed
182      * @return
183      * @throws MalformedURLException
184      */
185     private SyndFeedInfo buildSyndFeedInfo(URL feedUrl, String urlStr, HttpMethod method, SyndFeed feed, int statusCode) throws MalformedURLException {
186         SyndFeedInfo syndFeedInfo;
187         syndFeedInfo = new SyndFeedInfo();
188         
189         // this may be different to feedURL because of 3XX redirects
190         syndFeedInfo.setUrl(new URL(urlStr));
191         syndFeedInfo.setId(feedUrl.toString());                					
192                 
193         Header imHeader = method.getResponseHeader("IM");
194         if (imHeader != null && imHeader.getValue().indexOf("feed") >= 0 && isUsingDeltaEncoding()) {
195 			FeedFetcherCache cache = getFeedInfoCache();
196 			if (cache != null && statusCode == 226) {
197 			    // client is setup to use http delta encoding and the server supports it and has returned a delta encoded response
198 			    // This response only includes new items
199 			    SyndFeedInfo cachedInfo = cache.getFeedInfo(feedUrl);
200 			    if (cachedInfo != null) {
201 				    SyndFeed cachedFeed = cachedInfo.getSyndFeed();
202 				    
203 				    // set the new feed to be the orginal feed plus the new items
204 				    feed = combineFeeds(cachedFeed, feed);			        
205 			    }            
206 			}
207 		}
208         
209         Header lastModifiedHeader = method.getResponseHeader("Last-Modified");
210         if (lastModifiedHeader != null) {
211             syndFeedInfo.setLastModified(lastModifiedHeader.getValue());
212         }
213         
214         Header eTagHeader = method.getResponseHeader("ETag");
215         if (eTagHeader != null) {
216             syndFeedInfo.setETag(eTagHeader.getValue());
217         }
218         
219         syndFeedInfo.setSyndFeed(feed);
220         
221         return syndFeedInfo;
222     }
223 
224     /***
225 	 * @param client
226 	 * @param urlStr
227 	 * @param method
228 	 * @return
229 	 * @throws IOException
230 	 * @throws HttpException
231 	 * @throws FetcherException
232 	 * @throws FeedException
233 	 */
234 	private static SyndFeed retrieveFeed(String urlStr, HttpMethod method) throws IOException, HttpException, FetcherException, FeedException {
235 		
236 		InputStream stream = null;
237 		if ((method.getResponseHeader("Content-Encoding") != null) && ("gzip".equalsIgnoreCase(method.getResponseHeader("Content-Encoding").getValue()))) {		
238 		    stream = new GZIPInputStream(method.getResponseBodyAsStream());
239 		} else {
240 		    stream = method.getResponseBodyAsStream();
241 		}		
242 		try {		
243 		    XmlReader reader = null;
244 		    if (method.getResponseHeader("Content-Type") != null) {
245 		        reader = new XmlReader(stream, method.getResponseHeader("Content-Type").getValue(), true);
246 		    } else {
247 		        reader = new XmlReader(stream, true);
248 		    }
249 			return new SyndFeedInput().build(reader);
250 		} finally {
251 		    if (stream != null) {
252 		        stream.close();
253 		    }
254 		}
255 	}
256 
257 	private SyndFeed getFeed(SyndFeedInfo syndFeedInfo, String urlStr, HttpMethod method, int statusCode) throws IOException, HttpException, FetcherException, FeedException {
258 
259 		if (statusCode == HttpURLConnection.HTTP_NOT_MODIFIED && syndFeedInfo != null) {
260 		    fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, urlStr);
261 		    return syndFeedInfo.getSyndFeed();
262 		}
263 		
264 		SyndFeed feed = retrieveFeed(urlStr, method);
265 		fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, urlStr, feed);			
266 		return feed;
267 	}
268 	
269     public interface CredentialSupplier {
270         public Credentials getCredentials(String realm, String host);
271     }
272 	
273 
274 }