1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.io.impl;
18
19 import com.sun.syndication.feed.AbstractFeed;
20 import com.sun.syndication.io.FeedException;
21 import com.sun.syndication.io.FeedParser;
22 import com.sun.syndication.feed.rss.Channel;
23 import com.sun.syndication.feed.rss.Image;
24 import com.sun.syndication.feed.rss.Item;
25 import com.sun.syndication.feed.rss.TextInput;
26 import org.jdom.Document;
27 import org.jdom.Element;
28 import org.jdom.Namespace;
29
30 import java.util.ArrayList;
31 import java.util.Collection;
32 import java.util.Iterator;
33 import java.util.List;
34
35 /***
36 */
37 public class RSS090Parser implements FeedParser {
38
39 private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
40 private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/";
41
42 private String _type;
43
44 public RSS090Parser() {
45 this("rss_0.9");
46 }
47
48 protected RSS090Parser(String type) {
49 _type = type;
50 }
51
52 public boolean isMyType(Document document) {
53 boolean ok = false;
54 Namespace rdfNS = Namespace.getNamespace(RDF_URI);
55 Namespace rssNS = Namespace.getNamespace(RSS_URI);
56
57 Element rssRoot = document.getRootElement();
58 Namespace defaultNS = rssRoot.getNamespace();
59 List additionalNSs = rssRoot.getAdditionalNamespaces();
60
61 ok = defaultNS!=null && defaultNS.equals(rdfNS);
62 if (ok) {
63 if (additionalNSs==null) {
64 ok = false;
65 }
66 else {
67 ok = false;
68 for (int i=0;!ok && i<additionalNSs.size();i++) {
69 ok = rssNS.equals(additionalNSs.get(i));
70 }
71 }
72 }
73 return ok;
74 }
75
76 public AbstractFeed parse(Document document, boolean validate) throws IllegalArgumentException,FeedException {
77 if (validate) {
78 validateFeed(document);
79 }
80 Element rssRoot = document.getRootElement();
81 return parseChannel(rssRoot);
82 }
83
84 protected void validateFeed(Document document) throws FeedException {
85
86
87
88
89
90
91 }
92
93 /***
94 * Returns the namespace used by RSS elements in document of the RSS version the parser supports.
95 * <P>
96 * This implementation returns the EMTPY namespace.
97 * <p>
98 *
99 * @return returns the EMPTY namespace.
100 */
101 protected Namespace getRSSNamespace() {
102 return Namespace.getNamespace("http://my.netscape.com/rdf/simple/0.9/");
103 }
104
105 public String getType() {
106 return _type;
107 }
108
109 /***
110 * Parses the root element of an RSS document into a Channel bean.
111 * <p/>
112 * It reads title, link and description and delegates to parseImage, parseItems
113 * and parseTextInput. This delegation always passes the root element of the RSS
114 * document as different RSS version may have this information in different parts
115 * of the XML tree (no assumptions made thanks to the specs variaty)
116 * <p/>
117 *
118 * @param rssRoot the root element of the RSS document to parse.
119 * @return the parsed Channel bean.
120 */
121 protected AbstractFeed parseChannel(Element rssRoot) {
122 Element eChannel = rssRoot.getChild("channel",getRSSNamespace());
123
124 Channel channel = new Channel(getType());
125
126 Element e = eChannel.getChild("title",getRSSNamespace());
127 if (e!=null) {
128 channel.setTitle(e.getText());
129 }
130 e = eChannel.getChild("link",getRSSNamespace());
131 if (e!=null) {
132 channel.setLink(e.getText());
133 }
134 e = eChannel.getChild("description",getRSSNamespace());
135 if (e!=null) {
136 channel.setDescription(e.getText());
137 }
138
139 channel.setImage(parseImage(rssRoot));
140
141 channel.setTextInput(parseTextInput(rssRoot));
142
143 channel.setItems(parseItems(rssRoot));
144
145 return channel;
146 }
147
148
149 /***
150 * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment.
151 * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element.
152 * <p/>
153 */
154 protected List getItems(Element rssRoot) {
155 return rssRoot.getChildren("item",getRSSNamespace());
156 }
157
158 /***
159 * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment.
160 * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
161 * <p/>
162 */
163 protected Element getImage(Element rssRoot) {
164 return rssRoot.getChild("image",getRSSNamespace());
165 }
166
167 /***
168 * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment.
169 * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
170 * <p/>
171 */
172 protected Element getTextInput(Element rssRoot) {
173 return rssRoot.getChild("textinput",getRSSNamespace());
174 }
175
176 /***
177 * Parses the root element of an RSS document looking for image information.
178 * <p/>
179 * It reads title and url out of the 'image' element.
180 * <p/>
181 *
182 * @param rssRoot the root element of the RSS document to parse for image information.
183 * @return the parsed image bean.
184 */
185 protected Image parseImage(Element rssRoot) {
186 Image image = null;
187 Element eImage = getImage(rssRoot);
188 if (eImage!=null) {
189 image = new Image();
190
191 Element e = eImage.getChild("title",getRSSNamespace());
192 if (e!=null) {
193 image.setTitle(e.getText());
194 }
195 e = eImage.getChild("url",getRSSNamespace());
196 if (e!=null) {
197 image.setUrl(e.getText());
198 }
199 e = eImage.getChild("link",getRSSNamespace());
200 if (e!=null) {
201 image.setLink(e.getText());
202 }
203 }
204 return image;
205 }
206
207 /***
208 * Parses the root element of an RSS document looking for all items information.
209 * <p/>
210 * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem()
211 * for each item element. The resulting RSSItem of each item element is stored in a list.
212 * <p/>
213 *
214 * @param rssRoot the root element of the RSS document to parse for all items information.
215 * @return a list with all the parsed RSSItem beans.
216 */
217 protected List parseItems(Element rssRoot) {
218 Collection eItems = getItems(rssRoot);
219
220 List items = new ArrayList();
221 for (Iterator i=eItems.iterator();i.hasNext();) {
222 Element eItem = (Element) i.next();
223 items.add(parseItem(rssRoot,eItem));
224 }
225 return items;
226 }
227
228 /***
229 * Parses an item element of an RSS document looking for item information.
230 * <p/>
231 * It reads title and link out of the 'item' element.
232 * <p/>
233 *
234 * @param rssRoot the root element of the RSS document in case it's needed for context.
235 * @param eItem the item element to parse.
236 * @return the parsed RSSItem bean.
237 */
238 protected Item parseItem(Element rssRoot,Element eItem) {
239 Item item = new Item();
240 Element e = eItem.getChild("title",getRSSNamespace());
241 if (e!=null) {
242 item.setTitle(e.getText());
243 }
244 e = eItem.getChild("link",getRSSNamespace());
245 if (e!=null) {
246 item.setLink(e.getText());
247 }
248 return item;
249 }
250
251
252 /***
253 * Parses the root element of an RSS document looking for text-input information.
254 * <p/>
255 * It reads title, description, name and link out of the 'textinput' or 'textInput' element.
256 * <p/>
257 *
258 * @param rssRoot the root element of the RSS document to parse for text-input information.
259 * @return the parsed RSSTextInput bean.
260 */
261 protected TextInput parseTextInput(Element rssRoot) {
262 TextInput textInput = null;
263 Element eTextInput = getTextInput(rssRoot);
264 if (eTextInput!=null) {
265 textInput = new TextInput();
266 Element e = eTextInput.getChild("title",getRSSNamespace());
267 if (e!=null) {
268 textInput.setTitle(e.getText());
269 }
270 e = eTextInput.getChild("description",getRSSNamespace());
271 if (e!=null) {
272 textInput.setDescription(e.getText());
273 }
274 e = eTextInput.getChild("name",getRSSNamespace());
275 if (e!=null) {
276 textInput.setName(e.getText());
277 }
278 e = eTextInput.getChild("link",getRSSNamespace());
279 if (e!=null) {
280 textInput.setLink(e.getText());
281 }
282 }
283 return textInput;
284 }
285
286
287 }