1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.io;
18
19 import com.sun.syndication.feed.WireFeed;
20 import com.sun.syndication.io.impl.FeedParsers;
21 import com.sun.syndication.io.impl.XmlFixerReader;
22 import org.jdom.Document;
23 import org.jdom.input.DOMBuilder;
24 import org.jdom.input.JDOMParseException;
25 import org.jdom.input.SAXBuilder;
26 import org.xml.sax.EntityResolver;
27 import org.xml.sax.InputSource;
28
29 import java.io.*;
30 import java.util.List;
31
32 /***
33 * Parses an XML document (File, InputStream, Reader, W3C SAX InputSource, W3C DOM Document or JDom DOcument)
34 * into an WireFeed (RSS/Atom).
35 * <p>
36 * It accepts all flavors of RSS (0.90, 0.91, 0.92, 0.93, 0.94, 1.0 and 2.0) and
37 * Atom 0.3 feeds. Parsers are plugable (they must implement the WireFeedParser interface).
38 * <p>
39 * The WireFeedInput useds liberal parsers.
40 * <p>
41 * @author Alejandro Abdelnur
42 *
43 */
44 public class WireFeedInput {
45 private static FeedParsers FEED_PARSERS = new FeedParsers();
46 private static final InputSource EMPTY_INPUTSOURCE = new InputSource(new ByteArrayInputStream(new byte[0]));
47 private static final EntityResolver RESOLVER = new EmptyEntityResolver();
48
49 private static class EmptyEntityResolver implements EntityResolver {
50 public InputSource resolveEntity(String publicId, String systemId) {
51 if(systemId != null && systemId.endsWith(".dtd")) return EMPTY_INPUTSOURCE;
52 return null;
53 }
54 }
55
56 private boolean _validate;
57
58 private boolean _xmlHealerOn;
59
60 /***
61 * Returns the list of supported input feed types.
62 * <p>
63 * @see WireFeed for details on the format of these strings.
64 * <p>
65 * @return a list of String elements with the supported input feed types.
66 *
67 */
68 public static List getSupportedFeedTypes() {
69 return FEED_PARSERS.getSupportedFeedTypes();
70 }
71
72 /***
73 * Creates a WireFeedInput instance with input validation turned off.
74 * <p>
75 *
76 */
77 public WireFeedInput() {
78 this (false);
79 }
80
81 /***
82 * Creates a WireFeedInput instance.
83 * <p>
84 * @param validate indicates if the input should be validated. NOT IMPLEMENTED YET (validation does not happen)
85 *
86 */
87 public WireFeedInput(boolean validate) {
88 _validate = false;
89 _xmlHealerOn = true;
90 }
91
92 /***
93 * Enables XML healing in the WiredFeedInput instance.
94 * <p>
95 * Healing trims leading chars from the stream (empty spaces and comments) until the XML prolog.
96 * <p>
97 * Healing resolves HTML entities (from literal to code number) in the reader.
98 * <p>
99 * The healing is done only with the build(File) and build(Reader) signatures.
100 * <p>
101 * By default is TRUE.
102 * <p>
103 * @param heals TRUE enables stream healing, FALSE disables it.
104 *
105 */
106 public void setXmlHealerOn(boolean heals) {
107 _xmlHealerOn = heals;
108 }
109
110 /***
111 * Indicates if the WiredFeedInput instance will XML heal (if necessary) the character stream.
112 * <p>
113 * Healing trims leading chars from the stream (empty spaces and comments) until the XML prolog.
114 * <p>
115 * Healing resolves HTML entities (from literal to code number) in the reader.
116 * <p>
117 * The healing is done only with the build(File) and build(Reader) signatures.
118 * <p>
119 * By default is TRUE.
120 * <p>
121 * @return TRUE if healing is enabled, FALSE if not.
122 *
123 */
124 public boolean getXmlHealerOn() {
125 return _xmlHealerOn;
126 }
127
128 /***
129 * Builds an WireFeed (RSS or Atom) from a file.
130 * <p>
131 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
132 * <p>
133 * @param file file to read to create the WireFeed.
134 * @return the WireFeed read from the file.
135 * @throws FileNotFoundException thrown if the file could not be found.
136 * @throws IOException thrown if there is problem reading the file.
137 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
138 * @throws FeedException if the feed could not be parsed
139 *
140 */
141 public WireFeed build(File file) throws FileNotFoundException,IOException,IllegalArgumentException,FeedException {
142 WireFeed feed;
143 Reader reader = new FileReader(file);
144 if (_xmlHealerOn) {
145 reader = new XmlFixerReader(reader);
146 }
147 feed = build(reader);
148 reader.close();
149 return feed;
150 }
151
152 /***
153 * Builds an WireFeed (RSS or Atom) from an Reader.
154 * <p>
155 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
156 * <p>
157 * @param reader Reader to read to create the WireFeed.
158 * @return the WireFeed read from the Reader.
159 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
160 * @throws FeedException if the feed could not be parsed
161 *
162 */
163 public WireFeed build(Reader reader) throws IllegalArgumentException,FeedException {
164 SAXBuilder saxBuilder = new SAXBuilder(_validate);
165 saxBuilder.setEntityResolver(RESOLVER);
166 try {
167 if (_xmlHealerOn) {
168 reader = new XmlFixerReader(reader);
169 }
170 Document document = saxBuilder.build(reader);
171 return build(document);
172 }
173 catch (JDOMParseException ex) {
174 throw new ParsingFeedException("Invalid XML: " + ex.getMessage(), ex);
175 }
176 catch (Exception ex) {
177 throw new ParsingFeedException("Invalid XML",ex);
178 }
179 }
180
181 /***
182 * Builds an WireFeed (RSS or Atom) from an W3C SAX InputSource.
183 * <p>
184 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
185 * <p>
186 * @param is W3C SAX InputSource to read to create the WireFeed.
187 * @return the WireFeed read from the W3C SAX InputSource.
188 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
189 * @throws FeedException if the feed could not be parsed
190 *
191 */
192 public WireFeed build(InputSource is) throws IllegalArgumentException,FeedException {
193 SAXBuilder saxBuilder = new SAXBuilder(_validate);
194 saxBuilder.setEntityResolver(RESOLVER);
195 try {
196 Document document = saxBuilder.build(is);
197 return build(document);
198 }
199 catch (JDOMParseException ex) {
200 throw new ParsingFeedException("Invalid XML: " + ex.getMessage(), ex);
201 }
202 catch (Exception ex) {
203 throw new ParsingFeedException("Invalid XML",ex);
204 }
205 }
206
207 /***
208 * Builds an WireFeed (RSS or Atom) from an W3C DOM document.
209 * <p>
210 * NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom.Document)'.
211 * <p>
212 * @param document W3C DOM document to read to create the WireFeed.
213 * @return the WireFeed read from the W3C DOM document.
214 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
215 * @throws FeedException if the feed could not be parsed
216 *
217 */
218 public WireFeed build(org.w3c.dom.Document document) throws IllegalArgumentException,FeedException {
219 DOMBuilder domBuilder = new DOMBuilder();
220 try {
221 Document jdomDoc = domBuilder.build(document);
222 return build(jdomDoc);
223 }
224 catch (Exception ex) {
225 throw new ParsingFeedException("Invalid XML",ex);
226 }
227 }
228
229 /***
230 * Builds an WireFeed (RSS or Atom) from an JDOM document.
231 * <p>
232 * NOTE: All other build methods delegate to this method.
233 * <p>
234 * @param document JDOM document to read to create the WireFeed.
235 * @return the WireFeed read from the JDOM document.
236 * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers.
237 * @throws FeedException if the feed could not be parsed
238 *
239 */
240 public WireFeed build(Document document) throws IllegalArgumentException,FeedException {
241 WireFeedParser parser = FEED_PARSERS.getParserFor(document);
242 if (parser==null) {
243 throw new IllegalArgumentException("Invalid document");
244 }
245 return parser.parse(document, _validate);
246 }
247
248 }