View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.io.impl;
18  
19  import com.sun.syndication.feed.WireFeed;
20  import com.sun.syndication.feed.rss.Channel;
21  import com.sun.syndication.feed.rss.Image;
22  import com.sun.syndication.feed.rss.Item;
23  import com.sun.syndication.feed.rss.TextInput;
24  import com.sun.syndication.io.FeedException;
25  import org.jdom.Document;
26  import org.jdom.Element;
27  import org.jdom.Namespace;
28  
29  import java.util.ArrayList;
30  import java.util.Collection;
31  import java.util.Iterator;
32  import java.util.List;
33  
34  /***
35   */
36  public class RSS090Parser extends BaseWireFeedParser {
37  
38      private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
39      private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/";
40      private static final String CONTENT_URI = "http://purl.org/rss/1.0/modules/content/";
41      
42      private static final Namespace RDF_NS = Namespace.getNamespace(RDF_URI);
43      private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI);
44      private static final Namespace CONTENT_NS = Namespace.getNamespace(CONTENT_URI);
45  
46  
47      public RSS090Parser() {
48          this("rss_0.9");
49      }
50  
51      protected RSS090Parser(String type) {
52          super(type);
53      }
54  
55      public boolean isMyType(Document document) {
56          boolean ok = false;
57  
58          Element rssRoot = document.getRootElement();
59          Namespace defaultNS = rssRoot.getNamespace();
60          List additionalNSs = rssRoot.getAdditionalNamespaces();
61  
62          ok = defaultNS!=null && defaultNS.equals(getRDFNamespace());
63          if (ok) {
64              if (additionalNSs==null) {
65                  ok = false;
66              }
67              else {
68                  ok = false;
69                  for (int i=0;!ok && i<additionalNSs.size();i++) {
70                      ok = getRSSNamespace().equals(additionalNSs.get(i));
71                  }
72              }
73          }
74          return ok;
75      }
76  
77      public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException,FeedException {
78          if (validate) {
79              validateFeed(document);
80          }
81          Element rssRoot = document.getRootElement();
82          return parseChannel(rssRoot);
83      }
84  
85      protected void validateFeed(Document document) throws FeedException {
86          // TBD
87          // here we have to validate the Feed against a schema or whatever
88          // not sure how to do it
89          // one posibility would be to inject our own schema for the feed (they don't exist out there)
90          // to the document, produce an ouput and attempt to parse it again with validation turned on.
91          // otherwise will have to check the document elements by hand.
92      }
93  
94      /***
95       * Returns the namespace used by RSS elements in document of the RSS version the parser supports.
96       * <P>
97       * This implementation returns the EMTPY namespace.
98       * <p>
99       *
100      * @return returns the EMPTY namespace.
101      */
102     protected Namespace getRSSNamespace() {
103         return RSS_NS;
104     }
105 
106     /***
107      * Returns the namespace used by RDF elements in document of the RSS version the parser supports.
108      * <P>
109      * This implementation returns the EMTPY namespace.
110      * <p>
111      *
112      * @return returns the EMPTY namespace.
113      */
114     protected Namespace getRDFNamespace() {
115         return RDF_NS;
116     }
117 
118     /***
119      * Returns the namespace used by Content Module elements in document.
120      * <P>
121      * This implementation returns the EMTPY namespace.
122      * <p>
123      *
124      * @return returns the EMPTY namespace.
125      */
126     protected Namespace getContentNamespace() {
127         return CONTENT_NS;
128     }
129 
130     /***
131      * Parses the root element of an RSS document into a Channel bean.
132      * <p/>
133      * It reads title, link and description and delegates to parseImage, parseItems
134      * and parseTextInput. This delegation always passes the root element of the RSS
135      * document as different RSS version may have this information in different parts
136      * of the XML tree (no assumptions made thanks to the specs variaty)
137      * <p/>
138      *
139      * @param rssRoot the root element of the RSS document to parse.
140      * @return the parsed Channel bean.
141      */
142     protected WireFeed parseChannel(Element rssRoot) {
143         Element eChannel = rssRoot.getChild("channel", getRSSNamespace());
144 
145         Channel channel = new Channel(getType());
146 
147         Element e = eChannel.getChild("title",getRSSNamespace());
148         if (e!=null) {
149             channel.setTitle(e.getText());
150         }
151         e = eChannel.getChild("link",getRSSNamespace());
152         if (e!=null) {
153             channel.setLink(e.getText());
154         }
155         e = eChannel.getChild("description",getRSSNamespace());
156         if (e!=null) {
157             channel.setDescription(e.getText());
158         }
159 
160         channel.setImage(parseImage(rssRoot));
161 
162         channel.setTextInput(parseTextInput(rssRoot));
163 
164         // Unfortunately Microsoft's SSE extension has a special case of 
165         // effectively putting the sharing channel module inside the RSS tag 
166         // and not inside the channel itself. So we also need to look for 
167         // channel modules from the root RSS element.
168         List allFeedModules = new ArrayList();
169         List rootModules = parseFeedModules(rssRoot);
170         List channelModules = parseFeedModules(eChannel); 
171         if (rootModules != null) {
172             allFeedModules.addAll(rootModules);
173         }
174         if (channelModules != null) {
175             allFeedModules.addAll(channelModules);
176         }
177         channel.setModules(allFeedModules);
178         channel.setItems(parseItems(rssRoot));
179 
180         List foreignMarkup = 
181             extractForeignMarkup(eChannel, channel, getRSSNamespace());
182         if (foreignMarkup.size() > 0) {
183             channel.setForeignMarkup(foreignMarkup);
184         }          
185         return channel;
186     }
187 
188 
189     /***
190      * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment.
191      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element.
192      * <p/>
193      */
194     protected List getItems(Element rssRoot) {
195         return rssRoot.getChildren("item",getRSSNamespace());
196     }
197 
198     /***
199      * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment.
200      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
201      * <p/>
202      */
203     protected Element getImage(Element rssRoot) {
204         return rssRoot.getChild("image",getRSSNamespace());
205     }
206 
207     /***
208      * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment.
209      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
210      * <p/>
211      */
212     protected Element getTextInput(Element rssRoot) {
213         return rssRoot.getChild("textinput",getRSSNamespace());
214     }
215 
216     /***
217      * Parses the root element of an RSS document looking for  image information.
218      * <p/>
219      * It reads title and url out of the 'image' element.
220      * <p/>
221      *
222      * @param rssRoot the root element of the RSS document to parse for image information.
223      * @return the parsed image bean.
224      */
225     protected Image parseImage(Element rssRoot) {
226         Image image = null;
227         Element eImage = getImage(rssRoot);
228         if (eImage!=null) {
229             image = new Image();
230 
231             Element e = eImage.getChild("title",getRSSNamespace());
232             if (e!=null) {
233                 image.setTitle(e.getText());
234             }
235             e = eImage.getChild("url",getRSSNamespace());
236             if (e!=null) {
237                 image.setUrl(e.getText());
238             }
239             e = eImage.getChild("link",getRSSNamespace());
240             if (e!=null) {
241                 image.setLink(e.getText());
242             }
243         }
244         return image;
245     }
246 
247     /***
248      * Parses the root element of an RSS document looking for all items information.
249      * <p/>
250      * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem()
251      * for each item element. The resulting RSSItem of each item element is stored in a list.
252      * <p/>
253      *
254      * @param rssRoot the root element of the RSS document to parse for all items information.
255      * @return a list with all the parsed RSSItem beans.
256      */
257     protected List parseItems(Element rssRoot)  {
258         Collection eItems = getItems(rssRoot);
259 
260         List items = new ArrayList();
261         for (Iterator i=eItems.iterator();i.hasNext();) {
262             Element eItem = (Element) i.next();
263             items.add(parseItem(rssRoot,eItem));
264         }
265         return items;
266     }
267 
268     /***
269      * Parses an item element of an RSS document looking for item information.
270      * <p/>
271      * It reads title and link out of the 'item' element.
272      * <p/>
273      *
274      * @param rssRoot the root element of the RSS document in case it's needed for context.
275      * @param eItem the item element to parse.
276      * @return the parsed RSSItem bean.
277      */
278     protected Item parseItem(Element rssRoot,Element eItem) {
279         Item item = new Item();
280         Element e = eItem.getChild("title",getRSSNamespace());
281         if (e!=null) {
282             item.setTitle(e.getText());
283         }
284         e = eItem.getChild("link",getRSSNamespace());
285         if (e!=null) {
286             item.setLink(e.getText());
287         }
288         
289         item.setModules(parseItemModules(eItem));
290                 
291         List foreignMarkup = 
292             extractForeignMarkup(eItem, item, getRSSNamespace());
293         if (foreignMarkup.size() > 0) {
294             item.setForeignMarkup(foreignMarkup);
295         }
296         return item;
297     }
298 
299 
300     /***
301      * Parses the root element of an RSS document looking for  text-input information.
302      * <p/>
303      * It reads title, description, name and link out of the 'textinput' or 'textInput' element.
304      * <p/>
305      *
306      * @param rssRoot the root element of the RSS document to parse for text-input information.
307      * @return the parsed RSSTextInput bean.
308      */
309     protected TextInput parseTextInput(Element rssRoot) {
310         TextInput textInput = null;
311         Element eTextInput = getTextInput(rssRoot);
312         if (eTextInput!=null) {
313             textInput = new TextInput();
314             Element e = eTextInput.getChild("title",getRSSNamespace());
315             if (e!=null) {
316                 textInput.setTitle(e.getText());
317             }
318             e = eTextInput.getChild("description",getRSSNamespace());
319             if (e!=null) {
320                 textInput.setDescription(e.getText());
321             }
322             e = eTextInput.getChild("name",getRSSNamespace());
323             if (e!=null) {
324                 textInput.setName(e.getText());
325             }
326             e = eTextInput.getChild("link",getRSSNamespace());
327             if (e!=null) {
328                 textInput.setLink(e.getText());
329             }
330         }
331         return textInput;
332     }
333 
334 
335 }