View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.io.impl;
18  
19  import com.sun.syndication.feed.WireFeed;
20  import com.sun.syndication.feed.rss.Channel;
21  import com.sun.syndication.feed.rss.Image;
22  import com.sun.syndication.feed.rss.Item;
23  import com.sun.syndication.feed.rss.TextInput;
24  import com.sun.syndication.io.FeedException;
25  import org.jdom.Document;
26  import org.jdom.Element;
27  import org.jdom.Namespace;
28  
29  import java.util.ArrayList;
30  import java.util.Collection;
31  import java.util.Iterator;
32  import java.util.List;
33  
34  /***
35   */
36  public class RSS090Parser extends BaseWireFeedParser {
37  
38      private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
39      private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/";
40  
41      public RSS090Parser() {
42          this("rss_0.9");
43      }
44  
45      protected RSS090Parser(String type) {
46          super(type);
47      }
48  
49      public boolean isMyType(Document document) {
50          boolean ok = false;
51          Namespace rdfNS = Namespace.getNamespace(RDF_URI);
52          Namespace rssNS = Namespace.getNamespace(RSS_URI);
53  
54          Element rssRoot = document.getRootElement();
55          Namespace defaultNS = rssRoot.getNamespace();
56          List additionalNSs = rssRoot.getAdditionalNamespaces();
57  
58          ok = defaultNS!=null && defaultNS.equals(rdfNS);
59          if (ok) {
60              if (additionalNSs==null) {
61                  ok = false;
62              }
63              else {
64                  ok = false;
65                  for (int i=0;!ok && i<additionalNSs.size();i++) {
66                      ok = rssNS.equals(additionalNSs.get(i));
67                  }
68              }
69          }
70          return ok;
71      }
72  
73      public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException,FeedException {
74          if (validate) {
75              validateFeed(document);
76          }
77          Element rssRoot = document.getRootElement();
78          return parseChannel(rssRoot);
79      }
80  
81      protected void validateFeed(Document document) throws FeedException {
82          // TBD
83          // here we have to validate the Feed against a schema or whatever
84          // not sure how to do it
85          // one posibility would be to inject our own schema for the feed (they don't exist out there)
86          // to the document, produce an ouput and attempt to parse it again with validation turned on.
87          // otherwise will have to check the document elements by hand.
88      }
89  
90      /***
91       * Returns the namespace used by RSS elements in document of the RSS version the parser supports.
92       * <P>
93       * This implementation returns the EMTPY namespace.
94       * <p>
95       *
96       * @return returns the EMPTY namespace.
97       */
98      protected Namespace getRSSNamespace() {
99          return Namespace.getNamespace("http://my.netscape.com/rdf/simple/0.9/");
100     }
101 
102     /***
103      * Parses the root element of an RSS document into a Channel bean.
104      * <p/>
105      * It reads title, link and description and delegates to parseImage, parseItems
106      * and parseTextInput. This delegation always passes the root element of the RSS
107      * document as different RSS version may have this information in different parts
108      * of the XML tree (no assumptions made thanks to the specs variaty)
109      * <p/>
110      *
111      * @param rssRoot the root element of the RSS document to parse.
112      * @return the parsed Channel bean.
113      */
114     protected WireFeed parseChannel(Element rssRoot) {
115         Element eChannel = rssRoot.getChild("channel",getRSSNamespace());
116 
117         Channel channel = new Channel(getType());
118 
119         Element e = eChannel.getChild("title",getRSSNamespace());
120         if (e!=null) {
121             channel.setTitle(e.getText());
122         }
123         e = eChannel.getChild("link",getRSSNamespace());
124         if (e!=null) {
125             channel.setLink(e.getText());
126         }
127         e = eChannel.getChild("description",getRSSNamespace());
128         if (e!=null) {
129             channel.setDescription(e.getText());
130         }
131 
132         channel.setImage(parseImage(rssRoot));
133 
134         channel.setTextInput(parseTextInput(rssRoot));
135 
136         channel.setItems(parseItems(rssRoot));
137 
138         channel.setModules(parseFeedModules(eChannel));
139 
140         return channel;
141     }
142 
143 
144     /***
145      * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment.
146      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element.
147      * <p/>
148      */
149     protected List getItems(Element rssRoot) {
150         return rssRoot.getChildren("item",getRSSNamespace());
151     }
152 
153     /***
154      * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment.
155      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
156      * <p/>
157      */
158     protected Element getImage(Element rssRoot) {
159         return rssRoot.getChild("image",getRSSNamespace());
160     }
161 
162     /***
163      * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment.
164      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
165      * <p/>
166      */
167     protected Element getTextInput(Element rssRoot) {
168         return rssRoot.getChild("textinput",getRSSNamespace());
169     }
170 
171     /***
172      * Parses the root element of an RSS document looking for  image information.
173      * <p/>
174      * It reads title and url out of the 'image' element.
175      * <p/>
176      *
177      * @param rssRoot the root element of the RSS document to parse for image information.
178      * @return the parsed image bean.
179      */
180     protected Image parseImage(Element rssRoot) {
181         Image image = null;
182         Element eImage = getImage(rssRoot);
183         if (eImage!=null) {
184             image = new Image();
185 
186             Element e = eImage.getChild("title",getRSSNamespace());
187             if (e!=null) {
188                 image.setTitle(e.getText());
189             }
190             e = eImage.getChild("url",getRSSNamespace());
191             if (e!=null) {
192                 image.setUrl(e.getText());
193             }
194             e = eImage.getChild("link",getRSSNamespace());
195             if (e!=null) {
196                 image.setLink(e.getText());
197             }
198         }
199         return image;
200     }
201 
202     /***
203      * Parses the root element of an RSS document looking for all items information.
204      * <p/>
205      * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem()
206      * for each item element. The resulting RSSItem of each item element is stored in a list.
207      * <p/>
208      *
209      * @param rssRoot the root element of the RSS document to parse for all items information.
210      * @return a list with all the parsed RSSItem beans.
211      */
212     protected List parseItems(Element rssRoot)  {
213         Collection eItems = getItems(rssRoot);
214 
215         List items = new ArrayList();
216         for (Iterator i=eItems.iterator();i.hasNext();) {
217             Element eItem = (Element) i.next();
218             items.add(parseItem(rssRoot,eItem));
219         }
220         return items;
221     }
222 
223     /***
224      * Parses an item element of an RSS document looking for item information.
225      * <p/>
226      * It reads title and link out of the 'item' element.
227      * <p/>
228      *
229      * @param rssRoot the root element of the RSS document in case it's needed for context.
230      * @param eItem the item element to parse.
231      * @return the parsed RSSItem bean.
232      */
233     protected Item parseItem(Element rssRoot,Element eItem) {
234         Item item = new Item();
235         Element e = eItem.getChild("title",getRSSNamespace());
236         if (e!=null) {
237             item.setTitle(e.getText());
238         }
239         e = eItem.getChild("link",getRSSNamespace());
240         if (e!=null) {
241             item.setLink(e.getText());
242         }
243         
244         item.setModules(parseItemModules(eItem));
245 
246         return item;
247     }
248 
249 
250     /***
251      * Parses the root element of an RSS document looking for  text-input information.
252      * <p/>
253      * It reads title, description, name and link out of the 'textinput' or 'textInput' element.
254      * <p/>
255      *
256      * @param rssRoot the root element of the RSS document to parse for text-input information.
257      * @return the parsed RSSTextInput bean.
258      */
259     protected TextInput parseTextInput(Element rssRoot) {
260         TextInput textInput = null;
261         Element eTextInput = getTextInput(rssRoot);
262         if (eTextInput!=null) {
263             textInput = new TextInput();
264             Element e = eTextInput.getChild("title",getRSSNamespace());
265             if (e!=null) {
266                 textInput.setTitle(e.getText());
267             }
268             e = eTextInput.getChild("description",getRSSNamespace());
269             if (e!=null) {
270                 textInput.setDescription(e.getText());
271             }
272             e = eTextInput.getChild("name",getRSSNamespace());
273             if (e!=null) {
274                 textInput.setName(e.getText());
275             }
276             e = eTextInput.getChild("link",getRSSNamespace());
277             if (e!=null) {
278                 textInput.setLink(e.getText());
279             }
280         }
281         return textInput;
282     }
283 
284 
285 }