1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.io.impl;
18
19 import com.sun.syndication.feed.WireFeed;
20 import com.sun.syndication.feed.rss.Channel;
21 import com.sun.syndication.feed.rss.Image;
22 import com.sun.syndication.feed.rss.Item;
23 import com.sun.syndication.feed.rss.TextInput;
24 import com.sun.syndication.io.FeedException;
25 import org.jdom.Document;
26 import org.jdom.Element;
27 import org.jdom.Namespace;
28
29 import java.util.ArrayList;
30 import java.util.Collection;
31 import java.util.Iterator;
32 import java.util.List;
33
34 /***
35 */
36 public class RSS090Parser extends BaseWireFeedParser {
37
38 private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
39 private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/";
40 private static final String CONTENT_URI = "http://purl.org/rss/1.0/modules/content/";
41
42 private static final Namespace RDF_NS = Namespace.getNamespace(RDF_URI);
43 private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI);
44 private static final Namespace CONTENT_NS = Namespace.getNamespace(CONTENT_URI);
45
46
47 public RSS090Parser() {
48 this("rss_0.9", RSS_NS);
49 }
50
51 protected RSS090Parser(String type, Namespace ns) {
52 super(type, ns);
53 }
54
55 public boolean isMyType(Document document) {
56 boolean ok = false;
57
58 Element rssRoot = document.getRootElement();
59 Namespace defaultNS = rssRoot.getNamespace();
60 List additionalNSs = rssRoot.getAdditionalNamespaces();
61
62 ok = defaultNS!=null && defaultNS.equals(getRDFNamespace());
63 if (ok) {
64 if (additionalNSs==null) {
65 ok = false;
66 }
67 else {
68 ok = false;
69 for (int i=0;!ok && i<additionalNSs.size();i++) {
70 ok = getRSSNamespace().equals(additionalNSs.get(i));
71 }
72 }
73 }
74 return ok;
75 }
76
77 public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException,FeedException {
78 if (validate) {
79 validateFeed(document);
80 }
81 Element rssRoot = document.getRootElement();
82 return parseChannel(rssRoot);
83 }
84
85 protected void validateFeed(Document document) throws FeedException {
86
87
88
89
90
91
92 }
93
94 /***
95 * Returns the namespace used by RSS elements in document of the RSS version the parser supports.
96 * <P>
97 * This implementation returns the EMTPY namespace.
98 * <p>
99 *
100 * @return returns the EMPTY namespace.
101 */
102 protected Namespace getRSSNamespace() {
103 return RSS_NS;
104 }
105
106 /***
107 * Returns the namespace used by RDF elements in document of the RSS version the parser supports.
108 * <P>
109 * This implementation returns the EMTPY namespace.
110 * <p>
111 *
112 * @return returns the EMPTY namespace.
113 */
114 protected Namespace getRDFNamespace() {
115 return RDF_NS;
116 }
117
118 /***
119 * Returns the namespace used by Content Module elements in document.
120 * <P>
121 * This implementation returns the EMTPY namespace.
122 * <p>
123 *
124 * @return returns the EMPTY namespace.
125 */
126 protected Namespace getContentNamespace() {
127 return CONTENT_NS;
128 }
129
130 /***
131 * Parses the root element of an RSS document into a Channel bean.
132 * <p/>
133 * It reads title, link and description and delegates to parseImage, parseItems
134 * and parseTextInput. This delegation always passes the root element of the RSS
135 * document as different RSS version may have this information in different parts
136 * of the XML tree (no assumptions made thanks to the specs variaty)
137 * <p/>
138 *
139 * @param rssRoot the root element of the RSS document to parse.
140 * @return the parsed Channel bean.
141 */
142 protected WireFeed parseChannel(Element rssRoot) {
143 Element eChannel = rssRoot.getChild("channel", getRSSNamespace());
144
145 Channel channel = new Channel(getType());
146
147 Element e = eChannel.getChild("title",getRSSNamespace());
148 if (e!=null) {
149 channel.setTitle(e.getText());
150 }
151 e = eChannel.getChild("link",getRSSNamespace());
152 if (e!=null) {
153 channel.setLink(e.getText());
154 }
155 e = eChannel.getChild("description",getRSSNamespace());
156 if (e!=null) {
157 channel.setDescription(e.getText());
158 }
159
160 channel.setImage(parseImage(rssRoot));
161
162 channel.setTextInput(parseTextInput(rssRoot));
163
164
165
166
167
168 List allFeedModules = new ArrayList();
169 List rootModules = parseFeedModules(rssRoot);
170 List channelModules = parseFeedModules(eChannel);
171 if (rootModules != null) {
172 allFeedModules.addAll(rootModules);
173 }
174 if (channelModules != null) {
175 allFeedModules.addAll(channelModules);
176 }
177 channel.setModules(allFeedModules);
178 channel.setItems(parseItems(rssRoot));
179
180 List foreignMarkup =
181 extractForeignMarkup(eChannel, channel, getRSSNamespace());
182 if (foreignMarkup.size() > 0) {
183 channel.setForeignMarkup(foreignMarkup);
184 }
185 return channel;
186 }
187
188
189 /***
190 * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment.
191 * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element.
192 * <p/>
193 */
194 protected List getItems(Element rssRoot) {
195 return rssRoot.getChildren("item",getRSSNamespace());
196 }
197
198 /***
199 * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment.
200 * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
201 * <p/>
202 */
203 protected Element getImage(Element rssRoot) {
204 return rssRoot.getChild("image",getRSSNamespace());
205 }
206
207 /***
208 * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment.
209 * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
210 * <p/>
211 */
212 protected Element getTextInput(Element rssRoot) {
213 return rssRoot.getChild("textinput",getRSSNamespace());
214 }
215
216 /***
217 * Parses the root element of an RSS document looking for image information.
218 * <p/>
219 * It reads title and url out of the 'image' element.
220 * <p/>
221 *
222 * @param rssRoot the root element of the RSS document to parse for image information.
223 * @return the parsed image bean.
224 */
225 protected Image parseImage(Element rssRoot) {
226 Image image = null;
227 Element eImage = getImage(rssRoot);
228 if (eImage!=null) {
229 image = new Image();
230
231 Element e = eImage.getChild("title",getRSSNamespace());
232 if (e!=null) {
233 image.setTitle(e.getText());
234 }
235 e = eImage.getChild("url",getRSSNamespace());
236 if (e!=null) {
237 image.setUrl(e.getText());
238 }
239 e = eImage.getChild("link",getRSSNamespace());
240 if (e!=null) {
241 image.setLink(e.getText());
242 }
243 }
244 return image;
245 }
246
247 /***
248 * Parses the root element of an RSS document looking for all items information.
249 * <p/>
250 * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem()
251 * for each item element. The resulting RSSItem of each item element is stored in a list.
252 * <p/>
253 *
254 * @param rssRoot the root element of the RSS document to parse for all items information.
255 * @return a list with all the parsed RSSItem beans.
256 */
257 protected List parseItems(Element rssRoot) {
258 Collection eItems = getItems(rssRoot);
259
260 List items = new ArrayList();
261 for (Iterator i=eItems.iterator();i.hasNext();) {
262 Element eItem = (Element) i.next();
263 items.add(parseItem(rssRoot,eItem));
264 }
265 return items;
266 }
267
268 /***
269 * Parses an item element of an RSS document looking for item information.
270 * <p/>
271 * It reads title and link out of the 'item' element.
272 * <p/>
273 *
274 * @param rssRoot the root element of the RSS document in case it's needed for context.
275 * @param eItem the item element to parse.
276 * @return the parsed RSSItem bean.
277 */
278 protected Item parseItem(Element rssRoot,Element eItem) {
279 Item item = new Item();
280 Element e = eItem.getChild("title",getRSSNamespace());
281 if (e!=null) {
282 item.setTitle(e.getText());
283 }
284 e = eItem.getChild("link",getRSSNamespace());
285 if (e!=null) {
286 item.setLink(e.getText());
287 item.setUri(e.getText());
288 }
289
290 item.setModules(parseItemModules(eItem));
291
292 List foreignMarkup =
293 extractForeignMarkup(eItem, item, getRSSNamespace());
294
295
296
297 Iterator iterator = foreignMarkup.iterator();
298 while (iterator.hasNext()) {
299 Element ie = (Element)iterator.next();
300 if (getContentNamespace().equals(ie.getNamespace()) && ie.getName().equals("encoded")) {
301 iterator.remove();
302 }
303 }
304 if (foreignMarkup.size() > 0) {
305 item.setForeignMarkup(foreignMarkup);
306 }
307 return item;
308 }
309
310
311 /***
312 * Parses the root element of an RSS document looking for text-input information.
313 * <p/>
314 * It reads title, description, name and link out of the 'textinput' or 'textInput' element.
315 * <p/>
316 *
317 * @param rssRoot the root element of the RSS document to parse for text-input information.
318 * @return the parsed RSSTextInput bean.
319 */
320 protected TextInput parseTextInput(Element rssRoot) {
321 TextInput textInput = null;
322 Element eTextInput = getTextInput(rssRoot);
323 if (eTextInput!=null) {
324 textInput = new TextInput();
325 Element e = eTextInput.getChild("title",getRSSNamespace());
326 if (e!=null) {
327 textInput.setTitle(e.getText());
328 }
329 e = eTextInput.getChild("description",getRSSNamespace());
330 if (e!=null) {
331 textInput.setDescription(e.getText());
332 }
333 e = eTextInput.getChild("name",getRSSNamespace());
334 if (e!=null) {
335 textInput.setName(e.getText());
336 }
337 e = eTextInput.getChild("link",getRSSNamespace());
338 if (e!=null) {
339 textInput.setLink(e.getText());
340 }
341 }
342 return textInput;
343 }
344
345
346 }