1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.io;
18
19 import java.io.ByteArrayInputStream;
20 import java.io.File;
21 import java.io.FileNotFoundException;
22 import java.io.FileReader;
23 import java.io.IOException;
24 import java.io.Reader;
25 import java.util.List;
26
27 import org.jdom.Document;
28 import org.jdom.JDOMException;
29 import org.jdom.input.DOMBuilder;
30 import org.jdom.input.JDOMParseException;
31 import org.xml.sax.EntityResolver;
32 import org.xml.sax.InputSource;
33 import org.xml.sax.SAXNotRecognizedException;
34 import org.xml.sax.SAXNotSupportedException;
35 import org.xml.sax.XMLReader;
36
37 import com.sun.syndication.feed.WireFeed;
38 import com.sun.syndication.io.impl.FeedParsers;
39 import com.sun.syndication.io.impl.XmlFixerReader;
40
41 /***
42 * Parses an XML document (File, InputStream, Reader, W3C SAX InputSource, W3C DOM Document or JDom DOcument)
43 * into an WireFeed (RSS/Atom).
44 * <p>
45 * It accepts all flavors of RSS (0.90, 0.91, 0.92, 0.93, 0.94, 1.0 and 2.0) and
46 * Atom 0.3 feeds. Parsers are plugable (they must implement the WireFeedParser interface).
47 * <p>
48 * The WireFeedInput useds liberal parsers.
49 * <p>
50 * @author Alejandro Abdelnur
51 *
52 */
53 public class WireFeedInput {
54 private static FeedParsers FEED_PARSERS = new FeedParsers();
55 private static final InputSource EMPTY_INPUTSOURCE = new InputSource(new ByteArrayInputStream(new byte[0]));
56 private static final EntityResolver RESOLVER = new EmptyEntityResolver();
57
58 private static class EmptyEntityResolver implements EntityResolver {
59 public InputSource resolveEntity(String publicId, String systemId) {
60 if(systemId != null && systemId.endsWith(".dtd")) return EMPTY_INPUTSOURCE;
61 return null;
62 }
63 }
64
65 private boolean _validate;
66
67 private boolean _xmlHealerOn;
68
69 /***
70 * Returns the list of supported input feed types.
71 * <p>
72 * @see WireFeed for details on the format of these strings.
73 * <p>
74 * @return a list of String elements with the supported input feed types.
75 *
76 */
77 public static List getSupportedFeedTypes() {
78 return FEED_PARSERS.getSupportedFeedTypes();
79 }
80
81 /***
82 * Creates a WireFeedInput instance with input validation turned off.
83 * <p>
84 *
85 */
86 public WireFeedInput() {
87 this (false);
88 }
89
90 /***
91 * Creates a WireFeedInput instance.
92 * <p>
93 * @param validate indicates if the input should be validated. NOT IMPLEMENTED YET (validation does not happen)
94 *
95 */
96 public WireFeedInput(boolean validate) {
97 _validate = false;
98 _xmlHealerOn = true;
99 }
100
101 /***
102 * Enables XML healing in the WiredFeedInput instance.
103 * <p>
104 * Healing trims leading chars from the stream (empty spaces and comments) until the XML prolog.
105 * <p>
106 * Healing resolves HTML entities (from literal to code number) in the reader.
107 * <p>
108 * The healing is done only with the build(File) and build(Reader) signatures.
109 * <p>
110 * By default is TRUE.
111 * <p>
112 * @param heals TRUE enables stream healing, FALSE disables it.
113 *
114 */
115 public void setXmlHealerOn(boolean heals) {
116 _xmlHealerOn = heals;
117 }
118
119 /***
120 * Indicates if the WiredFeedInput instance will XML heal (if necessary) the character stream.
121 * <p>
122 * Healing trims leading chars from the stream (empty spaces and comments) until the XML prolog.
123 * <p>
124 * Healing resolves HTML entities (from literal to code number) in the reader.
125 * <p>
126 * The healing is done only with the build(File) and build(Reader) signatures.
127 * <p>
128 * By default is TRUE.
129 * <p>
130 * @return TRUE if healing is enabled, FALSE if not.
131 *
132 */
133 public boolean getXmlHealerOn() {
134 return _xmlHealerOn;
135 }
136
137 /***
138 * Builds an WireFeed (RSS or Atom) from a file.
139 * <p>
140 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
141 * <p>
142 * @param file file to read to create the WireFeed.
143 * @return the WireFeed read from the file.
144 * @throws FileNotFoundException thrown if the file could not be found.
145 * @throws IOException thrown if there is problem reading the file.
146 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
147 * @throws FeedException if the feed could not be parsed
148 *
149 */
150 public WireFeed build(File file) throws FileNotFoundException,IOException,IllegalArgumentException,FeedException {
151 WireFeed feed;
152 Reader reader = new FileReader(file);
153 if (_xmlHealerOn) {
154 reader = new XmlFixerReader(reader);
155 }
156 feed = build(reader);
157 reader.close();
158 return feed;
159 }
160
161 /***
162 * Builds an WireFeed (RSS or Atom) from an Reader.
163 * <p>
164 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
165 * <p>
166 * @param reader Reader to read to create the WireFeed.
167 * @return the WireFeed read from the Reader.
168 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
169 * @throws FeedException if the feed could not be parsed
170 *
171 */
172 public WireFeed build(Reader reader) throws IllegalArgumentException,FeedException {
173 SAXBuilder saxBuilder = createSAXBuilder();
174 try {
175 if (_xmlHealerOn) {
176 reader = new XmlFixerReader(reader);
177 }
178 Document document = saxBuilder.build(reader);
179 return build(document);
180 }
181 catch (JDOMParseException ex) {
182 throw new ParsingFeedException("Invalid XML: " + ex.getMessage(), ex);
183 }
184 catch (Exception ex) {
185 throw new ParsingFeedException("Invalid XML",ex);
186 }
187 }
188
189 /***
190 * Builds an WireFeed (RSS or Atom) from an W3C SAX InputSource.
191 * <p>
192 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
193 * <p>
194 * @param is W3C SAX InputSource to read to create the WireFeed.
195 * @return the WireFeed read from the W3C SAX InputSource.
196 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
197 * @throws FeedException if the feed could not be parsed
198 *
199 */
200 public WireFeed build(InputSource is) throws IllegalArgumentException,FeedException {
201 SAXBuilder saxBuilder = createSAXBuilder();
202 try {
203 Document document = saxBuilder.build(is);
204 return build(document);
205 }
206 catch (JDOMParseException ex) {
207 throw new ParsingFeedException("Invalid XML: " + ex.getMessage(), ex);
208 }
209 catch (Exception ex) {
210 throw new ParsingFeedException("Invalid XML",ex);
211 }
212 }
213
214 /***
215 * Builds an WireFeed (RSS or Atom) from an W3C DOM document.
216 * <p>
217 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
218 * <p>
219 * @param document W3C DOM document to read to create the WireFeed.
220 * @return the WireFeed read from the W3C DOM document.
221 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
222 * @throws FeedException if the feed could not be parsed
223 *
224 */
225 public WireFeed build(org.w3c.dom.Document document) throws IllegalArgumentException,FeedException {
226 DOMBuilder domBuilder = new DOMBuilder();
227 try {
228 Document jdomDoc = domBuilder.build(document);
229 return build(jdomDoc);
230 }
231 catch (Exception ex) {
232 throw new ParsingFeedException("Invalid XML",ex);
233 }
234 }
235
236 /***
237 * Builds an WireFeed (RSS or Atom) from an JDOM document.
238 * <p>
239 * NOTE: All other build methods delegate to this method.
240 * <p>
241 * @param document JDOM document to read to create the WireFeed.
242 * @return the WireFeed read from the JDOM document.
243 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
244 * @throws FeedException if the feed could not be parsed
245 *
246 */
247 public WireFeed build(Document document) throws IllegalArgumentException,FeedException {
248 WireFeedParser parser = FEED_PARSERS.getParserFor(document);
249 if (parser==null) {
250 throw new IllegalArgumentException("Invalid document");
251 }
252 return parser.parse(document, _validate);
253 }
254
255 /***
256 * Creates and sets up a org.jdom.input.SAXBuilder for parsing.
257 *
258 * @return a new org.jdom.input.SAXBuilder object
259 */
260 protected SAXBuilder createSAXBuilder() {
261 SAXBuilder saxBuilder = new SAXBuilder(_validate);
262 saxBuilder.setEntityResolver(RESOLVER);
263
264
265
266
267
268
269
270
271
272
273 try {
274 XMLReader parser = saxBuilder.createParser();
275 try {
276 parser.setFeature("http://xml.org/sax/features/external-general-entities", false);
277 saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
278 } catch (SAXNotRecognizedException e) {
279
280 } catch (SAXNotSupportedException e) {
281
282 }
283
284 try {
285 parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
286 saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
287 } catch (SAXNotRecognizedException e) {
288
289 } catch (SAXNotSupportedException e) {
290
291 }
292
293 try {
294 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
295 saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
296 } catch (SAXNotRecognizedException e) {
297
298 } catch (SAXNotSupportedException e) {
299
300 }
301
302 } catch (JDOMException e) {
303 throw new IllegalStateException("JDOM could not create a SAX parser");
304 }
305
306 saxBuilder.setExpandEntities(false);
307 return saxBuilder;
308 }
309 }