View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.io.impl;
18  
19  import com.sun.syndication.feed.WireFeed;
20  import com.sun.syndication.feed.rss.Channel;
21  import com.sun.syndication.feed.rss.Image;
22  import com.sun.syndication.feed.rss.Item;
23  import com.sun.syndication.feed.rss.TextInput;
24  import com.sun.syndication.io.FeedException;
25  import org.jdom.Document;
26  import org.jdom.Element;
27  import org.jdom.Namespace;
28  
29  import java.util.ArrayList;
30  import java.util.Collection;
31  import java.util.Iterator;
32  import java.util.List;
33  
34  /***
35   */
36  public class RSS090Parser extends BaseWireFeedParser {
37  
38      private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
39      private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/";
40      
41      private static final Namespace RDF_NS = Namespace.getNamespace(RDF_URI);
42      private static final Namespace RSS_NS = Namespace.getNamespace(RSS_URI);
43  
44  
45      public RSS090Parser() {
46          this("rss_0.9");
47      }
48  
49      protected RSS090Parser(String type) {
50          super(type);
51      }
52  
53      public boolean isMyType(Document document) {
54          boolean ok = false;
55  
56          Element rssRoot = document.getRootElement();
57          Namespace defaultNS = rssRoot.getNamespace();
58          List additionalNSs = rssRoot.getAdditionalNamespaces();
59  
60          ok = defaultNS!=null && defaultNS.equals(getRDFNamespace());
61          if (ok) {
62              if (additionalNSs==null) {
63                  ok = false;
64              }
65              else {
66                  ok = false;
67                  for (int i=0;!ok && i<additionalNSs.size();i++) {
68                      ok = getRSSNamespace().equals(additionalNSs.get(i));
69                  }
70              }
71          }
72          return ok;
73      }
74  
75      public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException,FeedException {
76          if (validate) {
77              validateFeed(document);
78          }
79          Element rssRoot = document.getRootElement();
80          return parseChannel(rssRoot);
81      }
82  
83      protected void validateFeed(Document document) throws FeedException {
84          // TBD
85          // here we have to validate the Feed against a schema or whatever
86          // not sure how to do it
87          // one posibility would be to inject our own schema for the feed (they don't exist out there)
88          // to the document, produce an ouput and attempt to parse it again with validation turned on.
89          // otherwise will have to check the document elements by hand.
90      }
91  
92      /***
93       * Returns the namespace used by RSS elements in document of the RSS version the parser supports.
94       * <P>
95       * This implementation returns the EMTPY namespace.
96       * <p>
97       *
98       * @return returns the EMPTY namespace.
99       */
100     protected Namespace getRSSNamespace() {
101         return RSS_NS;
102     }
103 
104     /***
105      * Returns the namespace used by RDF elements in document of the RSS version the parser supports.
106      * <P>
107      * This implementation returns the EMTPY namespace.
108      * <p>
109      *
110      * @return returns the EMPTY namespace.
111      */
112     protected Namespace getRDFNamespace() {
113         return RDF_NS;
114     }
115 
116     /***
117      * Parses the root element of an RSS document into a Channel bean.
118      * <p/>
119      * It reads title, link and description and delegates to parseImage, parseItems
120      * and parseTextInput. This delegation always passes the root element of the RSS
121      * document as different RSS version may have this information in different parts
122      * of the XML tree (no assumptions made thanks to the specs variaty)
123      * <p/>
124      *
125      * @param rssRoot the root element of the RSS document to parse.
126      * @return the parsed Channel bean.
127      */
128     protected WireFeed parseChannel(Element rssRoot) {
129         Element eChannel = rssRoot.getChild("channel",getRSSNamespace());
130 
131         Channel channel = new Channel(getType());
132 
133         Element e = eChannel.getChild("title",getRSSNamespace());
134         if (e!=null) {
135             channel.setTitle(e.getText());
136         }
137         e = eChannel.getChild("link",getRSSNamespace());
138         if (e!=null) {
139             channel.setLink(e.getText());
140         }
141         e = eChannel.getChild("description",getRSSNamespace());
142         if (e!=null) {
143             channel.setDescription(e.getText());
144         }
145 
146         channel.setImage(parseImage(rssRoot));
147 
148         channel.setTextInput(parseTextInput(rssRoot));
149 
150         channel.setItems(parseItems(rssRoot));
151 
152         channel.setModules(parseFeedModules(eChannel));
153 
154         return channel;
155     }
156 
157 
158     /***
159      * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment.
160      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element.
161      * <p/>
162      */
163     protected List getItems(Element rssRoot) {
164         return rssRoot.getChildren("item",getRSSNamespace());
165     }
166 
167     /***
168      * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment.
169      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
170      * <p/>
171      */
172     protected Element getImage(Element rssRoot) {
173         return rssRoot.getChild("image",getRSSNamespace());
174     }
175 
176     /***
177      * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment.
178      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
179      * <p/>
180      */
181     protected Element getTextInput(Element rssRoot) {
182         return rssRoot.getChild("textinput",getRSSNamespace());
183     }
184 
185     /***
186      * Parses the root element of an RSS document looking for  image information.
187      * <p/>
188      * It reads title and url out of the 'image' element.
189      * <p/>
190      *
191      * @param rssRoot the root element of the RSS document to parse for image information.
192      * @return the parsed image bean.
193      */
194     protected Image parseImage(Element rssRoot) {
195         Image image = null;
196         Element eImage = getImage(rssRoot);
197         if (eImage!=null) {
198             image = new Image();
199 
200             Element e = eImage.getChild("title",getRSSNamespace());
201             if (e!=null) {
202                 image.setTitle(e.getText());
203             }
204             e = eImage.getChild("url",getRSSNamespace());
205             if (e!=null) {
206                 image.setUrl(e.getText());
207             }
208             e = eImage.getChild("link",getRSSNamespace());
209             if (e!=null) {
210                 image.setLink(e.getText());
211             }
212         }
213         return image;
214     }
215 
216     /***
217      * Parses the root element of an RSS document looking for all items information.
218      * <p/>
219      * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem()
220      * for each item element. The resulting RSSItem of each item element is stored in a list.
221      * <p/>
222      *
223      * @param rssRoot the root element of the RSS document to parse for all items information.
224      * @return a list with all the parsed RSSItem beans.
225      */
226     protected List parseItems(Element rssRoot)  {
227         Collection eItems = getItems(rssRoot);
228 
229         List items = new ArrayList();
230         for (Iterator i=eItems.iterator();i.hasNext();) {
231             Element eItem = (Element) i.next();
232             items.add(parseItem(rssRoot,eItem));
233         }
234         return items;
235     }
236 
237     /***
238      * Parses an item element of an RSS document looking for item information.
239      * <p/>
240      * It reads title and link out of the 'item' element.
241      * <p/>
242      *
243      * @param rssRoot the root element of the RSS document in case it's needed for context.
244      * @param eItem the item element to parse.
245      * @return the parsed RSSItem bean.
246      */
247     protected Item parseItem(Element rssRoot,Element eItem) {
248         Item item = new Item();
249         Element e = eItem.getChild("title",getRSSNamespace());
250         if (e!=null) {
251             item.setTitle(e.getText());
252         }
253         e = eItem.getChild("link",getRSSNamespace());
254         if (e!=null) {
255             item.setLink(e.getText());
256         }
257         
258         item.setModules(parseItemModules(eItem));
259 
260         return item;
261     }
262 
263 
264     /***
265      * Parses the root element of an RSS document looking for  text-input information.
266      * <p/>
267      * It reads title, description, name and link out of the 'textinput' or 'textInput' element.
268      * <p/>
269      *
270      * @param rssRoot the root element of the RSS document to parse for text-input information.
271      * @return the parsed RSSTextInput bean.
272      */
273     protected TextInput parseTextInput(Element rssRoot) {
274         TextInput textInput = null;
275         Element eTextInput = getTextInput(rssRoot);
276         if (eTextInput!=null) {
277             textInput = new TextInput();
278             Element e = eTextInput.getChild("title",getRSSNamespace());
279             if (e!=null) {
280                 textInput.setTitle(e.getText());
281             }
282             e = eTextInput.getChild("description",getRSSNamespace());
283             if (e!=null) {
284                 textInput.setDescription(e.getText());
285             }
286             e = eTextInput.getChild("name",getRSSNamespace());
287             if (e!=null) {
288                 textInput.setName(e.getText());
289             }
290             e = eTextInput.getChild("link",getRSSNamespace());
291             if (e!=null) {
292                 textInput.setLink(e.getText());
293             }
294         }
295         return textInput;
296     }
297 
298 
299 }