View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.io.impl;
18  
19  import java.util.ArrayList;
20  import java.util.Iterator;
21  import java.util.List;
22  
23  import org.jdom.Document;
24  import org.jdom.Element;
25  import org.jdom.Namespace;
26  import org.jdom.output.XMLOutputter;
27  
28  import com.sun.syndication.feed.WireFeed;
29  import com.sun.syndication.feed.atom.Category;
30  import com.sun.syndication.feed.atom.Content;
31  import com.sun.syndication.feed.atom.Entry;
32  import com.sun.syndication.feed.atom.Feed;
33  import com.sun.syndication.feed.atom.Generator;
34  import com.sun.syndication.feed.atom.Link;
35  import com.sun.syndication.feed.atom.Person;
36  import com.sun.syndication.io.FeedException;
37  import java.net.MalformedURLException;
38  import java.net.URL;
39  import org.jdom.Attribute;
40  import org.jdom.Parent;
41  
42  /***
43   * @author Dave Johnson (updated for Atom 1.0)
44   */
45  public class Atom10Parser extends BaseWireFeedParser {
46      private static final String ATOM_10_URI = "http://www.w3.org/2005/Atom";
47      Namespace ns = Namespace.getNamespace("http://www.w3.org/2005/Atom");
48  
49      public Atom10Parser() {
50          this("atom_1.0");
51      }
52  
53      protected Atom10Parser(String type) {
54          super(type);
55      }
56  
57      protected Namespace getAtomNamespace() {
58          return Namespace.getNamespace(ATOM_10_URI);
59      }
60  
61      public boolean isMyType(Document document) {
62          Element rssRoot = document.getRootElement();
63          Namespace defaultNS = rssRoot.getNamespace();
64          return (defaultNS!=null) && defaultNS.equals(getAtomNamespace());
65      }
66  
67      public WireFeed parse(Document document, boolean validate) 
68          throws IllegalArgumentException,FeedException {
69          if (validate) {
70              validateFeed(document);
71          }
72          Element rssRoot = document.getRootElement();
73          return parseFeed(rssRoot);
74      }
75  
76      protected void validateFeed(Document document) throws FeedException {
77          // TBD
78          // here we have to validate the Feed against a schema or whatever
79          // not sure how to do it
80          // one posibility would be to produce an ouput and attempt to parse it again
81          // with validation turned on.
82          // otherwise will have to check the document elements by hand.
83      }
84  
85      protected WireFeed parseFeed(Element eFeed) {
86  
87          com.sun.syndication.feed.atom.Feed feed = 
88              new com.sun.syndication.feed.atom.Feed(getType());
89          
90          URL baseURI = findBaseURI(eFeed);
91  
92          String xmlBase = eFeed.getAttributeValue("base", Namespace.XML_NAMESPACE);
93          if (xmlBase != null) {
94              feed.setXmlBase(xmlBase);
95          }
96          
97          Element e = eFeed.getChild("title",getAtomNamespace());
98          if (e!=null) {
99              feed.setTitle(e.getText());
100         }
101 
102         List eList = eFeed.getChildren("link",getAtomNamespace());
103         feed.setAlternateLinks(parseAlternateLinks(feed, null, baseURI, eList));
104         feed.setAlternateLinks(parseOtherLinks(feed, null, baseURI, eList));
105 
106         List cList = eFeed.getChildren("category",getAtomNamespace());
107         feed.setCategories(parseCategories(baseURI, cList));
108 
109         eList = eFeed.getChildren("author", getAtomNamespace());
110         if (eList.size()>0) {
111             feed.setAuthors(parsePersons(baseURI, eList));
112         }
113 
114         eList = eFeed.getChildren("contributor",getAtomNamespace());
115         if (eList.size()>0) {
116             feed.setContributors(parsePersons(baseURI, eList));
117         }
118 
119         e = eFeed.getChild("subtitle",getAtomNamespace());
120         if (e!=null) {
121             Content subtitle = new Content();
122             subtitle.setType(Content.TEXT); // TODO: need content type of SyndFeed level
123             subtitle.setValue(e.getText());
124             feed.setSubtitle(subtitle);
125         }
126 
127         e = eFeed.getChild("id",getAtomNamespace());
128         if (e!=null) {
129             feed.setId(e.getText());
130         }
131 
132         e = eFeed.getChild("generator",getAtomNamespace());
133         if (e!=null) {
134             Generator gen = new Generator();
135             gen.setValue(e.getText());
136             String att = e.getAttributeValue("url");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
137             if (att!=null) {
138                 gen.setUrl(att);
139             }
140             att = e.getAttributeValue("version");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
141             if (att!=null) {
142                 gen.setVersion(att);
143             }
144             feed.setGenerator(gen);
145         }
146 
147         e = eFeed.getChild("rights",getAtomNamespace());
148         if (e!=null) {
149             feed.setRights(e.getText());
150         }
151 
152         e = eFeed.getChild("icon",getAtomNamespace());
153         if (e!=null) {
154             feed.setIcon(e.getText());
155         }
156 
157         e = eFeed.getChild("logo",getAtomNamespace());
158         if (e!=null) {
159             feed.setLogo(e.getText());
160         }
161 
162         e = eFeed.getChild("updated",getAtomNamespace());
163         if (e!=null) {
164             feed.setUpdated(DateParser.parseDate(e.getText()));
165         }
166 
167         eList = eFeed.getChildren("entry",getAtomNamespace());
168         if (eList.size()>0) {
169             feed.setEntries(parseEntries(feed, baseURI, eList));
170         }
171 
172         feed.setModules(parseFeedModules(eFeed));
173 
174         return feed;
175     }
176 
177     private Link parseLink(Feed feed , Entry entry, URL baseURI, Element eLink) {
178         Link link = new Link();
179         String att = eLink.getAttributeValue("rel");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
180         if (att!=null) {
181             link.setRel(att);
182         }
183         att = eLink.getAttributeValue("type");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
184         if (att!=null) {
185             link.setType(att);
186         }
187         att = eLink.getAttributeValue("href");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
188         if (att!=null) {
189             if (isRelativeURI(att)) { //
190                 link.setHref(resolveURI(baseURI, eLink, ""));
191             } else {
192                 link.setHref(att);
193             }
194         }
195         att = eLink.getAttributeValue("hreflang");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
196         if (att!=null) {
197             link.setHreflang(att);
198         }
199         att = eLink.getAttributeValue("length");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
200         if (att!=null) {
201             link.setLength(Long.parseLong(att));
202         }
203         return link;
204     }
205 
206     // List(Elements) -> List(Link)
207     private List parseAlternateLinks(Feed feed, Entry entry, URL baseURI, List eLinks) {
208         List links = new ArrayList();
209         for (int i=0;i<eLinks.size();i++) {
210             Element eLink = (Element) eLinks.get(i);
211             Link link = parseLink(feed, entry, baseURI, eLink);
212             if (link.getRel() == null 
213                     || "".equals(link.getRel().trim()) 
214                     || "alternate".equals(link.getRel())) {
215                 links.add(link);
216             }
217         }
218         return (links.size()>0) ? links : null;
219     }
220 
221     private List parseOtherLinks(Feed feed, Entry entry, URL baseURI, List eLinks) {
222         List links = new ArrayList();
223         for (int i=0;i<eLinks.size();i++) {
224             Element eLink = (Element) eLinks.get(i);
225             Link link = parseLink(feed, entry, baseURI, eLink);
226             if (!"alternate".equals(link.getRel())) {
227                 links.add(link);
228             }
229         }
230         return (links.size()>0) ? links : null;
231     }
232 
233     private Person parsePerson(URL baseURI, Element ePerson) {
234         Person person = new Person();
235         Element e = ePerson.getChild("name",getAtomNamespace());
236         if (e!=null) {
237             person.setName(e.getText());
238         }
239         e = ePerson.getChild("uri",getAtomNamespace());
240         if (e!=null) {
241             person.setUri(resolveURI(baseURI, ePerson, e.getText()));
242         }
243         e = ePerson.getChild("email",getAtomNamespace());
244         if (e!=null) {
245             person.setEmail(e.getText());
246         }
247         return person;
248     }
249 
250     // List(Elements) -> List(Persons)
251     private List parsePersons(URL baseURI, List ePersons) {
252         List persons = new ArrayList();
253         for (int i=0;i<ePersons.size();i++) {
254             persons.add(parsePerson(baseURI, (Element)ePersons.get(i)));
255         }
256         return (persons.size()>0) ? persons : null;
257     }
258 
259     private Content parseContent(Element e) {
260         String value = null;
261         String src = e.getAttributeValue("src");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
262         String type = e.getAttributeValue("type");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
263         type = (type!=null) ? type : Content.TEXT;
264         if (type.equals(Content.TEXT)) {
265             // do nothing XML Parser took care of this
266             value = e.getText();
267         }
268         else if (type.equals(Content.HTML)) {
269             value = e.getText();
270         }
271         else if (type.equals(Content.XHTML)) {
272             XMLOutputter outputter = new XMLOutputter();
273             List eContent = e.getContent();
274             Iterator i = eContent.iterator();
275             while (i.hasNext()) {
276                 org.jdom.Content c = (org.jdom.Content) i.next();
277                 if (c instanceof Element) {
278                     Element eC = (Element) c;
279                     if (eC.getNamespace().equals(getAtomNamespace())) {
280                         ((Element)c).setNamespace(Namespace.NO_NAMESPACE);
281                     }
282                 }
283             }
284             value = outputter.outputString(eContent);
285         }
286                
287         Content content = new Content();
288         content.setSrc(src);
289         content.setType(type);
290         content.setValue(value);
291         return content;
292     }
293 
294     // List(Elements) -> List(Entries)
295     private List parseEntries(Feed feed, URL baseURI, List eEntries) {
296         List entries = new ArrayList();
297         for (int i=0;i<eEntries.size();i++) {
298             entries.add(parseEntry(feed, (Element)eEntries.get(i), baseURI));
299         }
300         return (entries.size()>0) ? entries : null;
301     }
302 
303     private Entry parseEntry(Feed feed, Element eEntry, URL baseURI) {
304         Entry entry = new Entry();
305 
306         String xmlBase = eEntry.getAttributeValue("base", Namespace.XML_NAMESPACE);
307         if (xmlBase != null) {
308             entry.setXmlBase(xmlBase);
309         }
310         
311         Element e = eEntry.getChild("title",getAtomNamespace());
312         if (e!=null) {
313             entry.setTitle(e.getText());
314         }
315 
316         List eList = eEntry.getChildren("link",getAtomNamespace());
317         entry.setAlternateLinks(parseAlternateLinks(feed, entry, baseURI, eList));
318         entry.setOtherLinks(parseOtherLinks(feed, entry, baseURI, eList));
319 
320         eList = eEntry.getChildren("author", getAtomNamespace());
321         if (eList.size()>0) {
322             entry.setAuthors(parsePersons(baseURI, eList));
323         }
324 
325         eList = eEntry.getChildren("contributor",getAtomNamespace());
326         if (eList.size()>0) {
327             entry.setContributors(parsePersons(baseURI, eList));
328         }
329 
330         e = eEntry.getChild("id",getAtomNamespace());
331         if (e!=null) {
332             entry.setId(e.getText());
333         }
334 
335         e = eEntry.getChild("updated",getAtomNamespace());
336         if (e!=null) {
337             entry.setUpdated(DateParser.parseDate(e.getText()));
338         }
339 
340         e = eEntry.getChild("published",getAtomNamespace());
341         if (e!=null) {
342             entry.setPublished(DateParser.parseDate(e.getText()));
343         }
344 
345         e = eEntry.getChild("summary",getAtomNamespace());
346         if (e!=null) {
347             entry.setSummary(parseContent(e));
348         }
349 
350         e = eEntry.getChild("content",getAtomNamespace());
351         if (e!=null) {
352             List contents = new ArrayList();
353             contents.add(parseContent(e));
354             entry.setContents(contents);
355         }
356 
357         e = eEntry.getChild("rights",getAtomNamespace());
358         if (e!=null) {
359             entry.setRights(e.getText());
360         }
361 
362         List cList = eEntry.getChildren("category",getAtomNamespace());
363         entry.setCategories(parseCategories(baseURI, cList));
364 
365         // TODO: SHOULD handle Atom entry source element
366         
367         entry.setModules(parseItemModules(eEntry));
368 
369         return entry;
370     }
371 
372     private List parseCategories(URL baseURI, List eCategories) {
373         List cats = new ArrayList();
374         for (int i=0;i<eCategories.size();i++) {
375             Element eCategory = (Element) eCategories.get(i);
376             cats.add(parseCategory(baseURI, eCategory));
377         }
378         return (cats.size()>0) ? cats : null;
379     }
380     
381     private Category parseCategory(URL baseURI, Element eCategory) {
382         Category category = new Category();
383         String att = eCategory.getAttributeValue("term");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
384         if (att!=null) {
385             category.setTerm(att);
386         }
387         att = eCategory.getAttributeValue("scheme");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
388         if (att!=null) {
389             category.setScheme(resolveURI(baseURI, eCategory, att));
390         }
391         att = eCategory.getAttributeValue("label");//getAtomNamespace()); DONT KNOW WHY DOESN'T WORK
392         if (att!=null) {
393             category.setLabel(att);
394         }
395         return category;
396 
397     }
398 
399     /*** Use xml:base attributes at feed and entry level to resolve relative links */
400     private String resolveURI(URL baseURI, Parent parent, String url) {
401         url = (url.equals(".") || url.equals("./")) ? "" : url;
402         if (isRelativeURI(url) && parent != null && parent instanceof Element) {
403             Attribute baseAtt = ((Element)parent).getAttribute("base", Namespace.XML_NAMESPACE);
404             String xmlBase = (baseAtt == null) ? "" : baseAtt.getValue();
405             if (!isRelativeURI(xmlBase) && !xmlBase.endsWith("/")) {
406                 xmlBase = xmlBase.substring(0, xmlBase.lastIndexOf("/")+1);
407             }
408             return resolveURI(baseURI, parent.getParent(), xmlBase + url);
409         } else if (isRelativeURI(url) && parent == null) {
410             return baseURI + url;
411         } else if (baseURI != null && url.startsWith("/")) {
412             String hostURI = baseURI.getProtocol() + "://" + baseURI.getHost();
413             if (baseURI.getPort() != baseURI.getDefaultPort()) {
414                 hostURI = hostURI + ":" + baseURI.getPort();
415             }
416             return hostURI + url;
417         }
418         return url;
419     }
420     private boolean isRelativeURI(String uri) {
421         if (  uri.startsWith("http://")
422            || uri.startsWith("https://")
423            || uri.startsWith("/")) {
424             return false;
425         }
426         return true;
427     }
428     /*** Use feed links and/or xml:base attribute to determine baseURI of feed */
429     private URL findBaseURI(Element root) {
430         URL baseURI = null;
431         List linksList = root.getChildren("link", ns);
432         if (linksList != null) {
433             for (Iterator links = linksList.iterator(); links.hasNext(); ) {
434                 Element link = (Element)links.next();
435                 if (!root.equals(link.getParent())) break;
436                 String href = link.getAttribute("href").getValue();
437                 if (   link.getAttribute("rel", ns) == null
438                     || link.getAttribute("rel", ns).getValue().equals("alternate")) {
439                     href = resolveURI(null, link, href);
440                     try {
441                         baseURI = new URL(href);
442                         break;
443                     } catch (MalformedURLException e) {
444                         System.err.println("Base URI is malformed: " + href);
445                     }
446                 }
447             }
448         }
449         return baseURI;
450     } 
451 }