View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.io.impl;
18  
19  import com.sun.syndication.feed.WireFeed;
20  import com.sun.syndication.io.FeedException;
21  import com.sun.syndication.io.WireFeedParser;
22  import com.sun.syndication.feed.rss.Channel;
23  import com.sun.syndication.feed.rss.Image;
24  import com.sun.syndication.feed.rss.Item;
25  import com.sun.syndication.feed.rss.TextInput;
26  import org.jdom.Document;
27  import org.jdom.Element;
28  import org.jdom.Namespace;
29  
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Iterator;
33  import java.util.List;
34  
35  /***
36   */
37  public class RSS090Parser implements WireFeedParser {
38  
39      private static final String RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
40      private static final String RSS_URI = "http://my.netscape.com/rdf/simple/0.9/";
41  
42      private String _type;
43  
44      public RSS090Parser() {
45          this("rss_0.9");
46      }
47  
48      protected RSS090Parser(String type) {
49          _type = type;
50      }
51  
52      public boolean isMyType(Document document) {
53          boolean ok = false;
54          Namespace rdfNS = Namespace.getNamespace(RDF_URI);
55          Namespace rssNS = Namespace.getNamespace(RSS_URI);
56  
57          Element rssRoot = document.getRootElement();
58          Namespace defaultNS = rssRoot.getNamespace();
59          List additionalNSs = rssRoot.getAdditionalNamespaces();
60  
61          ok = defaultNS!=null && defaultNS.equals(rdfNS);
62          if (ok) {
63              if (additionalNSs==null) {
64                  ok = false;
65              }
66              else {
67                  ok = false;
68                  for (int i=0;!ok && i<additionalNSs.size();i++) {
69                      ok = rssNS.equals(additionalNSs.get(i));
70                  }
71              }
72          }
73          return ok;
74      }
75  
76      public WireFeed parse(Document document, boolean validate) throws IllegalArgumentException,FeedException {
77          if (validate) {
78              validateFeed(document);
79          }
80          Element rssRoot = document.getRootElement();
81          return parseChannel(rssRoot);
82      }
83  
84      protected void validateFeed(Document document) throws FeedException {
85          // TBD
86          // here we have to validate the Feed against a schema or whatever
87          // not sure how to do it
88          // one posibility would be to inject our own schema for the feed (they don't exist out there)
89          // to the document, produce an ouput and attempt to parse it again with validation turned on.
90          // otherwise will have to check the document elements by hand.
91      }
92  
93      /***
94       * Returns the namespace used by RSS elements in document of the RSS version the parser supports.
95       * <P>
96       * This implementation returns the EMTPY namespace.
97       * <p>
98       *
99       * @return returns the EMPTY namespace.
100      */
101     protected Namespace getRSSNamespace() {
102         return Namespace.getNamespace("http://my.netscape.com/rdf/simple/0.9/");
103     }
104 
105     public String getType() {
106         return _type;
107     }
108 
109     /***
110      * Parses the root element of an RSS document into a Channel bean.
111      * <p/>
112      * It reads title, link and description and delegates to parseImage, parseItems
113      * and parseTextInput. This delegation always passes the root element of the RSS
114      * document as different RSS version may have this information in different parts
115      * of the XML tree (no assumptions made thanks to the specs variaty)
116      * <p/>
117      *
118      * @param rssRoot the root element of the RSS document to parse.
119      * @return the parsed Channel bean.
120      */
121     protected WireFeed parseChannel(Element rssRoot) {
122         Element eChannel = rssRoot.getChild("channel",getRSSNamespace());
123 
124         Channel channel = new Channel(getType());
125 
126         Element e = eChannel.getChild("title",getRSSNamespace());
127         if (e!=null) {
128             channel.setTitle(e.getText());
129         }
130         e = eChannel.getChild("link",getRSSNamespace());
131         if (e!=null) {
132             channel.setLink(e.getText());
133         }
134         e = eChannel.getChild("description",getRSSNamespace());
135         if (e!=null) {
136             channel.setDescription(e.getText());
137         }
138 
139         channel.setImage(parseImage(rssRoot));
140 
141         channel.setTextInput(parseTextInput(rssRoot));
142 
143         channel.setItems(parseItems(rssRoot));
144 
145         return channel;
146     }
147 
148 
149     /***
150      * This method exists because RSS0.90 and RSS1.0 have the 'item' elements under the root elemment.
151      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have the item elements under the 'channel' element.
152      * <p/>
153      */
154     protected List getItems(Element rssRoot) {
155         return rssRoot.getChildren("item",getRSSNamespace());
156     }
157 
158     /***
159      * This method exists because RSS0.90 and RSS1.0 have the 'image' element under the root elemment.
160      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
161      * <p/>
162      */
163     protected Element getImage(Element rssRoot) {
164         return rssRoot.getChild("image",getRSSNamespace());
165     }
166 
167     /***
168      * This method exists because RSS0.90 and RSS1.0 have the 'textinput' element under the root elemment.
169      * And RSS0.91, RSS0.02, RSS0.93, RSS0.94 and RSS2.0 have it under the 'channel' element.
170      * <p/>
171      */
172     protected Element getTextInput(Element rssRoot) {
173         return rssRoot.getChild("textinput",getRSSNamespace());
174     }
175 
176     /***
177      * Parses the root element of an RSS document looking for  image information.
178      * <p/>
179      * It reads title and url out of the 'image' element.
180      * <p/>
181      *
182      * @param rssRoot the root element of the RSS document to parse for image information.
183      * @return the parsed image bean.
184      */
185     protected Image parseImage(Element rssRoot) {
186         Image image = null;
187         Element eImage = getImage(rssRoot);
188         if (eImage!=null) {
189             image = new Image();
190 
191             Element e = eImage.getChild("title",getRSSNamespace());
192             if (e!=null) {
193                 image.setTitle(e.getText());
194             }
195             e = eImage.getChild("url",getRSSNamespace());
196             if (e!=null) {
197                 image.setUrl(e.getText());
198             }
199             e = eImage.getChild("link",getRSSNamespace());
200             if (e!=null) {
201                 image.setLink(e.getText());
202             }
203         }
204         return image;
205     }
206 
207     /***
208      * Parses the root element of an RSS document looking for all items information.
209      * <p/>
210      * It iterates through the item elements list, obtained from the getItems() method, and invoke parseItem()
211      * for each item element. The resulting RSSItem of each item element is stored in a list.
212      * <p/>
213      *
214      * @param rssRoot the root element of the RSS document to parse for all items information.
215      * @return a list with all the parsed RSSItem beans.
216      */
217     protected List parseItems(Element rssRoot)  {
218         Collection eItems = getItems(rssRoot);
219 
220         List items = new ArrayList();
221         for (Iterator i=eItems.iterator();i.hasNext();) {
222             Element eItem = (Element) i.next();
223             items.add(parseItem(rssRoot,eItem));
224         }
225         return items;
226     }
227 
228     /***
229      * Parses an item element of an RSS document looking for item information.
230      * <p/>
231      * It reads title and link out of the 'item' element.
232      * <p/>
233      *
234      * @param rssRoot the root element of the RSS document in case it's needed for context.
235      * @param eItem the item element to parse.
236      * @return the parsed RSSItem bean.
237      */
238     protected Item parseItem(Element rssRoot,Element eItem) {
239         Item item = new Item();
240         Element e = eItem.getChild("title",getRSSNamespace());
241         if (e!=null) {
242             item.setTitle(e.getText());
243         }
244         e = eItem.getChild("link",getRSSNamespace());
245         if (e!=null) {
246             item.setLink(e.getText());
247         }
248         return item;
249     }
250 
251 
252     /***
253      * Parses the root element of an RSS document looking for  text-input information.
254      * <p/>
255      * It reads title, description, name and link out of the 'textinput' or 'textInput' element.
256      * <p/>
257      *
258      * @param rssRoot the root element of the RSS document to parse for text-input information.
259      * @return the parsed RSSTextInput bean.
260      */
261     protected TextInput parseTextInput(Element rssRoot) {
262         TextInput textInput = null;
263         Element eTextInput = getTextInput(rssRoot);
264         if (eTextInput!=null) {
265             textInput = new TextInput();
266             Element e = eTextInput.getChild("title",getRSSNamespace());
267             if (e!=null) {
268                 textInput.setTitle(e.getText());
269             }
270             e = eTextInput.getChild("description",getRSSNamespace());
271             if (e!=null) {
272                 textInput.setDescription(e.getText());
273             }
274             e = eTextInput.getChild("name",getRSSNamespace());
275             if (e!=null) {
276                 textInput.setName(e.getText());
277             }
278             e = eTextInput.getChild("link",getRSSNamespace());
279             if (e!=null) {
280                 textInput.setLink(e.getText());
281             }
282         }
283         return textInput;
284     }
285 
286 
287 }