1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.io;
18
19 import java.io.ByteArrayInputStream;
20 import java.io.File;
21 import java.io.FileNotFoundException;
22 import java.io.FileReader;
23 import java.io.IOException;
24 import java.io.Reader;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.WeakHashMap;
28
29 import org.jdom.Document;
30 import org.jdom.JDOMException;
31 import org.jdom.input.DOMBuilder;
32 import org.jdom.input.JDOMParseException;
33 import org.xml.sax.EntityResolver;
34 import org.xml.sax.InputSource;
35 import org.xml.sax.SAXNotRecognizedException;
36 import org.xml.sax.SAXNotSupportedException;
37 import org.xml.sax.XMLReader;
38
39 import com.sun.syndication.feed.WireFeed;
40 import com.sun.syndication.io.impl.FeedParsers;
41 import com.sun.syndication.io.impl.XmlFixerReader;
42
43 /***
44 * Parses an XML document (File, InputStream, Reader, W3C SAX InputSource, W3C DOM Document or JDom DOcument)
45 * into an WireFeed (RSS/Atom).
46 * <p>
47 * It accepts all flavors of RSS (0.90, 0.91, 0.92, 0.93, 0.94, 1.0 and 2.0) and
48 * Atom 0.3 feeds. Parsers are plugable (they must implement the WireFeedParser interface).
49 * <p>
50 * The WireFeedInput useds liberal parsers.
51 * <p>
52 * @author Alejandro Abdelnur
53 *
54 */
55 public class WireFeedInput {
56
57 private static Map clMap = new WeakHashMap();
58
59 private static FeedParsers getFeedParsers() {
60 synchronized(WireFeedInput.class) {
61 FeedParsers parsers = (FeedParsers)
62 clMap.get(Thread.currentThread().getContextClassLoader());
63 if (parsers == null) {
64 parsers = new FeedParsers();
65 clMap.put(Thread.currentThread().getContextClassLoader(), parsers);
66 }
67 return parsers;
68 }
69 }
70
71 private static final InputSource EMPTY_INPUTSOURCE = new InputSource(new ByteArrayInputStream(new byte[0]));
72 private static final EntityResolver RESOLVER = new EmptyEntityResolver();
73
74 private static class EmptyEntityResolver implements EntityResolver {
75 public InputSource resolveEntity(String publicId, String systemId) {
76 if(systemId != null && systemId.endsWith(".dtd")) return EMPTY_INPUTSOURCE;
77 return null;
78 }
79 }
80
81 private boolean _validate;
82
83 private boolean _xmlHealerOn;
84
85 /***
86 * Returns the list of supported input feed types.
87 * <p>
88 * @see WireFeed for details on the format of these strings.
89 * <p>
90 * @return a list of String elements with the supported input feed types.
91 *
92 */
93 public static List getSupportedFeedTypes() {
94 return getFeedParsers().getSupportedFeedTypes();
95 }
96
97 /***
98 * Creates a WireFeedInput instance with input validation turned off.
99 * <p>
100 *
101 */
102 public WireFeedInput() {
103 this (false);
104 }
105
106 /***
107 * Creates a WireFeedInput instance.
108 * <p>
109 * @param validate indicates if the input should be validated. NOT IMPLEMENTED YET (validation does not happen)
110 *
111 */
112 public WireFeedInput(boolean validate) {
113 _validate = false;
114 _xmlHealerOn = true;
115 }
116
117 /***
118 * Enables XML healing in the WiredFeedInput instance.
119 * <p>
120 * Healing trims leading chars from the stream (empty spaces and comments) until the XML prolog.
121 * <p>
122 * Healing resolves HTML entities (from literal to code number) in the reader.
123 * <p>
124 * The healing is done only with the build(File) and build(Reader) signatures.
125 * <p>
126 * By default is TRUE.
127 * <p>
128 * @param heals TRUE enables stream healing, FALSE disables it.
129 *
130 */
131 public void setXmlHealerOn(boolean heals) {
132 _xmlHealerOn = heals;
133 }
134
135 /***
136 * Indicates if the WiredFeedInput instance will XML heal (if necessary) the character stream.
137 * <p>
138 * Healing trims leading chars from the stream (empty spaces and comments) until the XML prolog.
139 * <p>
140 * Healing resolves HTML entities (from literal to code number) in the reader.
141 * <p>
142 * The healing is done only with the build(File) and build(Reader) signatures.
143 * <p>
144 * By default is TRUE.
145 * <p>
146 * @return TRUE if healing is enabled, FALSE if not.
147 *
148 */
149 public boolean getXmlHealerOn() {
150 return _xmlHealerOn;
151 }
152
153 /***
154 * Builds an WireFeed (RSS or Atom) from a file.
155 * <p>
156 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
157 * <p>
158 * @param file file to read to create the WireFeed.
159 * @return the WireFeed read from the file.
160 * @throws FileNotFoundException thrown if the file could not be found.
161 * @throws IOException thrown if there is problem reading the file.
162 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
163 * @throws FeedException if the feed could not be parsed
164 *
165 */
166 public WireFeed build(File file) throws FileNotFoundException,IOException,IllegalArgumentException,FeedException {
167 WireFeed feed;
168 Reader reader = new FileReader(file);
169 if (_xmlHealerOn) {
170 reader = new XmlFixerReader(reader);
171 }
172 feed = build(reader);
173 reader.close();
174 return feed;
175 }
176
177 /***
178 * Builds an WireFeed (RSS or Atom) from an Reader.
179 * <p>
180 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
181 * <p>
182 * @param reader Reader to read to create the WireFeed.
183 * @return the WireFeed read from the Reader.
184 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
185 * @throws FeedException if the feed could not be parsed
186 *
187 */
188 public WireFeed build(Reader reader) throws IllegalArgumentException,FeedException {
189 SAXBuilder saxBuilder = createSAXBuilder();
190 try {
191 if (_xmlHealerOn) {
192 reader = new XmlFixerReader(reader);
193 }
194 Document document = saxBuilder.build(reader);
195 return build(document);
196 }
197 catch (JDOMParseException ex) {
198 throw new ParsingFeedException("Invalid XML: " + ex.getMessage(), ex);
199 }
200 catch (IllegalArgumentException ex) {
201 throw ex;
202 }
203 catch (Exception ex) {
204 throw new ParsingFeedException("Invalid XML",ex);
205 }
206 }
207
208 /***
209 * Builds an WireFeed (RSS or Atom) from an W3C SAX InputSource.
210 * <p>
211 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
212 * <p>
213 * @param is W3C SAX InputSource to read to create the WireFeed.
214 * @return the WireFeed read from the W3C SAX InputSource.
215 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
216 * @throws FeedException if the feed could not be parsed
217 *
218 */
219 public WireFeed build(InputSource is) throws IllegalArgumentException,FeedException {
220 SAXBuilder saxBuilder = createSAXBuilder();
221 try {
222 Document document = saxBuilder.build(is);
223 return build(document);
224 }
225 catch (JDOMParseException ex) {
226 throw new ParsingFeedException("Invalid XML: " + ex.getMessage(), ex);
227 }
228 catch (IllegalArgumentException ex) {
229 throw ex;
230 }
231 catch (Exception ex) {
232 throw new ParsingFeedException("Invalid XML",ex);
233 }
234 }
235
236 /***
237 * Builds an WireFeed (RSS or Atom) from an W3C DOM document.
238 * <p>
239 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
240 * <p>
241 * @param document W3C DOM document to read to create the WireFeed.
242 * @return the WireFeed read from the W3C DOM document.
243 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
244 * @throws FeedException if the feed could not be parsed
245 *
246 */
247 public WireFeed build(org.w3c.dom.Document document) throws IllegalArgumentException,FeedException {
248 DOMBuilder domBuilder = new DOMBuilder();
249 try {
250 Document jdomDoc = domBuilder.build(document);
251 return build(jdomDoc);
252 }
253 catch (IllegalArgumentException ex) {
254 throw ex;
255 }
256 catch (Exception ex) {
257 throw new ParsingFeedException("Invalid XML",ex);
258 }
259 }
260
261 /***
262 * Builds an WireFeed (RSS or Atom) from an JDOM document.
263 * <p>
264 * NOTE: All other build methods delegate to this method.
265 * <p>
266 * @param document JDOM document to read to create the WireFeed.
267 * @return the WireFeed read from the JDOM document.
268 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
269 * @throws FeedException if the feed could not be parsed
270 *
271 */
272 public WireFeed build(Document document) throws IllegalArgumentException,FeedException {
273 WireFeedParser parser = getFeedParsers().getParserFor(document);
274 if (parser==null) {
275 throw new IllegalArgumentException("Invalid document");
276 }
277 return parser.parse(document, _validate);
278 }
279
280 /***
281 * Creates and sets up a org.jdom.input.SAXBuilder for parsing.
282 *
283 * @return a new org.jdom.input.SAXBuilder object
284 */
285 protected SAXBuilder createSAXBuilder() {
286 SAXBuilder saxBuilder = new SAXBuilder(_validate);
287 saxBuilder.setEntityResolver(RESOLVER);
288
289
290
291
292
293
294
295
296
297
298 try {
299 XMLReader parser = saxBuilder.createParser();
300 try {
301 parser.setFeature("http://xml.org/sax/features/external-general-entities", false);
302 saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
303 } catch (SAXNotRecognizedException e) {
304
305 } catch (SAXNotSupportedException e) {
306
307 }
308
309 try {
310 parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
311 saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
312 } catch (SAXNotRecognizedException e) {
313
314 } catch (SAXNotSupportedException e) {
315
316 }
317
318 try {
319 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
320 saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
321 } catch (SAXNotRecognizedException e) {
322
323 } catch (SAXNotSupportedException e) {
324
325 }
326
327 } catch (JDOMException e) {
328 throw new IllegalStateException("JDOM could not create a SAX parser");
329 }
330
331 saxBuilder.setExpandEntities(false);
332 return saxBuilder;
333 }
334 }