1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.io.impl;
18
19 import java.util.ArrayList;
20 import java.util.Iterator;
21 import java.util.List;
22
23 import org.jdom.Document;
24 import org.jdom.Element;
25 import org.jdom.Namespace;
26 import org.jdom.output.XMLOutputter;
27
28 import com.sun.syndication.feed.WireFeed;
29 import com.sun.syndication.feed.atom.Category;
30 import com.sun.syndication.feed.atom.Content;
31 import com.sun.syndication.feed.atom.Entry;
32 import com.sun.syndication.feed.atom.Feed;
33 import com.sun.syndication.feed.atom.Generator;
34 import com.sun.syndication.feed.atom.Link;
35 import com.sun.syndication.feed.atom.Person;
36 import com.sun.syndication.io.FeedException;
37 import java.net.MalformedURLException;
38 import java.net.URL;
39 import org.jdom.Attribute;
40 import org.jdom.Parent;
41
42 /***
43 * @author Dave Johnson (updated for Atom 1.0)
44 */
45 public class Atom10Parser extends BaseWireFeedParser {
46 private static final String ATOM_10_URI = "http://www.w3.org/2005/Atom";
47 Namespace ns = Namespace.getNamespace("http://www.w3.org/2005/Atom");
48
49 public Atom10Parser() {
50 this("atom_1.0");
51 }
52
53 protected Atom10Parser(String type) {
54 super(type);
55 }
56
57 protected Namespace getAtomNamespace() {
58 return Namespace.getNamespace(ATOM_10_URI);
59 }
60
61 public boolean isMyType(Document document) {
62 Element rssRoot = document.getRootElement();
63 Namespace defaultNS = rssRoot.getNamespace();
64 return (defaultNS!=null) && defaultNS.equals(getAtomNamespace());
65 }
66
67 public WireFeed parse(Document document, boolean validate)
68 throws IllegalArgumentException,FeedException {
69 if (validate) {
70 validateFeed(document);
71 }
72 Element rssRoot = document.getRootElement();
73 return parseFeed(rssRoot);
74 }
75
76 protected void validateFeed(Document document) throws FeedException {
77
78
79
80
81
82
83 }
84
85 protected WireFeed parseFeed(Element eFeed) {
86
87 com.sun.syndication.feed.atom.Feed feed =
88 new com.sun.syndication.feed.atom.Feed(getType());
89
90 URL baseURI = findBaseURI(eFeed);
91
92 String xmlBase = eFeed.getAttributeValue("base", Namespace.XML_NAMESPACE);
93 if (xmlBase != null) {
94 feed.setXmlBase(xmlBase);
95 }
96
97 Element e = eFeed.getChild("title",getAtomNamespace());
98 if (e!=null) {
99 feed.setTitle(e.getText());
100 }
101
102 List eList = eFeed.getChildren("link",getAtomNamespace());
103 feed.setAlternateLinks(parseAlternateLinks(feed, null, baseURI, eList));
104 feed.setAlternateLinks(parseOtherLinks(feed, null, baseURI, eList));
105
106 List cList = eFeed.getChildren("category",getAtomNamespace());
107 feed.setCategories(parseCategories(baseURI, cList));
108
109 eList = eFeed.getChildren("author", getAtomNamespace());
110 if (eList.size()>0) {
111 feed.setAuthors(parsePersons(baseURI, eList));
112 }
113
114 eList = eFeed.getChildren("contributor",getAtomNamespace());
115 if (eList.size()>0) {
116 feed.setContributors(parsePersons(baseURI, eList));
117 }
118
119 e = eFeed.getChild("subtitle",getAtomNamespace());
120 if (e!=null) {
121 Content subtitle = new Content();
122 subtitle.setType(Content.TEXT);
123 subtitle.setValue(e.getText());
124 feed.setSubtitle(subtitle);
125 }
126
127 e = eFeed.getChild("id",getAtomNamespace());
128 if (e!=null) {
129 feed.setId(e.getText());
130 }
131
132 e = eFeed.getChild("generator",getAtomNamespace());
133 if (e!=null) {
134 Generator gen = new Generator();
135 gen.setValue(e.getText());
136 String att = e.getAttributeValue("url");
137 if (att!=null) {
138 gen.setUrl(att);
139 }
140 att = e.getAttributeValue("version");
141 if (att!=null) {
142 gen.setVersion(att);
143 }
144 feed.setGenerator(gen);
145 }
146
147 e = eFeed.getChild("rights",getAtomNamespace());
148 if (e!=null) {
149 feed.setRights(e.getText());
150 }
151
152 e = eFeed.getChild("icon",getAtomNamespace());
153 if (e!=null) {
154 feed.setIcon(e.getText());
155 }
156
157 e = eFeed.getChild("logo",getAtomNamespace());
158 if (e!=null) {
159 feed.setLogo(e.getText());
160 }
161
162 e = eFeed.getChild("updated",getAtomNamespace());
163 if (e!=null) {
164 feed.setUpdated(DateParser.parseDate(e.getText()));
165 }
166
167 eList = eFeed.getChildren("entry",getAtomNamespace());
168 if (eList.size()>0) {
169 feed.setEntries(parseEntries(feed, baseURI, eList));
170 }
171
172 feed.setModules(parseFeedModules(eFeed));
173
174 return feed;
175 }
176
177 private Link parseLink(Feed feed , Entry entry, URL baseURI, Element eLink) {
178 Link link = new Link();
179 String att = eLink.getAttributeValue("rel");
180 if (att!=null) {
181 link.setRel(att);
182 }
183 att = eLink.getAttributeValue("type");
184 if (att!=null) {
185 link.setType(att);
186 }
187 att = eLink.getAttributeValue("href");
188 if (att!=null) {
189 if (isRelativeURI(att)) {
190 link.setHref(resolveURI(baseURI, eLink, ""));
191 } else {
192 link.setHref(att);
193 }
194 }
195 att = eLink.getAttributeValue("hreflang");
196 if (att!=null) {
197 link.setHreflang(att);
198 }
199 att = eLink.getAttributeValue("length");
200 if (att!=null) {
201 link.setLength(Long.parseLong(att));
202 }
203 return link;
204 }
205
206
207 private List parseAlternateLinks(Feed feed, Entry entry, URL baseURI, List eLinks) {
208 List links = new ArrayList();
209 for (int i=0;i<eLinks.size();i++) {
210 Element eLink = (Element) eLinks.get(i);
211 Link link = parseLink(feed, entry, baseURI, eLink);
212 if (link.getRel() == null
213 || "".equals(link.getRel().trim())
214 || "alternate".equals(link.getRel())) {
215 links.add(link);
216 }
217 }
218 return (links.size()>0) ? links : null;
219 }
220
221 private List parseOtherLinks(Feed feed, Entry entry, URL baseURI, List eLinks) {
222 List links = new ArrayList();
223 for (int i=0;i<eLinks.size();i++) {
224 Element eLink = (Element) eLinks.get(i);
225 Link link = parseLink(feed, entry, baseURI, eLink);
226 if (!"alternate".equals(link.getRel())) {
227 links.add(link);
228 }
229 }
230 return (links.size()>0) ? links : null;
231 }
232
233 private Person parsePerson(URL baseURI, Element ePerson) {
234 Person person = new Person();
235 Element e = ePerson.getChild("name",getAtomNamespace());
236 if (e!=null) {
237 person.setName(e.getText());
238 }
239 e = ePerson.getChild("uri",getAtomNamespace());
240 if (e!=null) {
241 person.setUri(resolveURI(baseURI, ePerson, e.getText()));
242 }
243 e = ePerson.getChild("email",getAtomNamespace());
244 if (e!=null) {
245 person.setEmail(e.getText());
246 }
247 return person;
248 }
249
250
251 private List parsePersons(URL baseURI, List ePersons) {
252 List persons = new ArrayList();
253 for (int i=0;i<ePersons.size();i++) {
254 persons.add(parsePerson(baseURI, (Element)ePersons.get(i)));
255 }
256 return (persons.size()>0) ? persons : null;
257 }
258
259 private Content parseContent(Element e) {
260 String value = null;
261 String src = e.getAttributeValue("src");
262 String type = e.getAttributeValue("type");
263 type = (type!=null) ? type : Content.TEXT;
264 if (type.equals(Content.TEXT)) {
265
266 value = e.getText();
267 }
268 else if (type.equals(Content.HTML)) {
269 value = e.getText();
270 }
271 else if (type.equals(Content.XHTML)) {
272 XMLOutputter outputter = new XMLOutputter();
273 List eContent = e.getContent();
274 Iterator i = eContent.iterator();
275 while (i.hasNext()) {
276 org.jdom.Content c = (org.jdom.Content) i.next();
277 if (c instanceof Element) {
278 Element eC = (Element) c;
279 if (eC.getNamespace().equals(getAtomNamespace())) {
280 ((Element)c).setNamespace(Namespace.NO_NAMESPACE);
281 }
282 }
283 }
284 value = outputter.outputString(eContent);
285 }
286
287 Content content = new Content();
288 content.setSrc(src);
289 content.setType(type);
290 content.setValue(value);
291 return content;
292 }
293
294
295 private List parseEntries(Feed feed, URL baseURI, List eEntries) {
296 List entries = new ArrayList();
297 for (int i=0;i<eEntries.size();i++) {
298 entries.add(parseEntry(feed, (Element)eEntries.get(i), baseURI));
299 }
300 return (entries.size()>0) ? entries : null;
301 }
302
303 private Entry parseEntry(Feed feed, Element eEntry, URL baseURI) {
304 Entry entry = new Entry();
305
306 String xmlBase = eEntry.getAttributeValue("base", Namespace.XML_NAMESPACE);
307 if (xmlBase != null) {
308 entry.setXmlBase(xmlBase);
309 }
310
311 Element e = eEntry.getChild("title",getAtomNamespace());
312 if (e!=null) {
313 entry.setTitle(e.getText());
314 }
315
316 List eList = eEntry.getChildren("link",getAtomNamespace());
317 entry.setAlternateLinks(parseAlternateLinks(feed, entry, baseURI, eList));
318 entry.setOtherLinks(parseOtherLinks(feed, entry, baseURI, eList));
319
320 eList = eEntry.getChildren("author", getAtomNamespace());
321 if (eList.size()>0) {
322 entry.setAuthors(parsePersons(baseURI, eList));
323 }
324
325 eList = eEntry.getChildren("contributor",getAtomNamespace());
326 if (eList.size()>0) {
327 entry.setContributors(parsePersons(baseURI, eList));
328 }
329
330 e = eEntry.getChild("id",getAtomNamespace());
331 if (e!=null) {
332 entry.setId(e.getText());
333 }
334
335 e = eEntry.getChild("updated",getAtomNamespace());
336 if (e!=null) {
337 entry.setUpdated(DateParser.parseDate(e.getText()));
338 }
339
340 e = eEntry.getChild("published",getAtomNamespace());
341 if (e!=null) {
342 entry.setPublished(DateParser.parseDate(e.getText()));
343 }
344
345 e = eEntry.getChild("summary",getAtomNamespace());
346 if (e!=null) {
347 entry.setSummary(parseContent(e));
348 }
349
350 e = eEntry.getChild("content",getAtomNamespace());
351 if (e!=null) {
352 List contents = new ArrayList();
353 contents.add(parseContent(e));
354 entry.setContents(contents);
355 }
356
357 e = eEntry.getChild("rights",getAtomNamespace());
358 if (e!=null) {
359 entry.setRights(e.getText());
360 }
361
362 List cList = eEntry.getChildren("category",getAtomNamespace());
363 entry.setCategories(parseCategories(baseURI, cList));
364
365
366
367 entry.setModules(parseItemModules(eEntry));
368
369 return entry;
370 }
371
372 private List parseCategories(URL baseURI, List eCategories) {
373 List cats = new ArrayList();
374 for (int i=0;i<eCategories.size();i++) {
375 Element eCategory = (Element) eCategories.get(i);
376 cats.add(parseCategory(baseURI, eCategory));
377 }
378 return (cats.size()>0) ? cats : null;
379 }
380
381 private Category parseCategory(URL baseURI, Element eCategory) {
382 Category category = new Category();
383 String att = eCategory.getAttributeValue("term");
384 if (att!=null) {
385 category.setTerm(att);
386 }
387 att = eCategory.getAttributeValue("scheme");
388 if (att!=null) {
389 category.setScheme(resolveURI(baseURI, eCategory, att));
390 }
391 att = eCategory.getAttributeValue("label");
392 if (att!=null) {
393 category.setLabel(att);
394 }
395 return category;
396
397 }
398
399 /*** Use xml:base attributes at feed and entry level to resolve relative links */
400 private String resolveURI(URL baseURI, Parent parent, String url) {
401 url = (url.equals(".") || url.equals("./")) ? "" : url;
402 if (isRelativeURI(url) && parent != null && parent instanceof Element) {
403 Attribute baseAtt = ((Element)parent).getAttribute("base", Namespace.XML_NAMESPACE);
404 String xmlBase = (baseAtt == null) ? "" : baseAtt.getValue();
405 if (!isRelativeURI(xmlBase) && !xmlBase.endsWith("/")) {
406 xmlBase = xmlBase.substring(0, xmlBase.lastIndexOf("/")+1);
407 }
408 return resolveURI(baseURI, parent.getParent(), xmlBase + url);
409 } else if (isRelativeURI(url) && parent == null) {
410 return baseURI + url;
411 } else if (baseURI != null && url.startsWith("/")) {
412 String hostURI = baseURI.getProtocol() + "://" + baseURI.getHost();
413 if (baseURI.getPort() != baseURI.getDefaultPort()) {
414 hostURI = hostURI + ":" + baseURI.getPort();
415 }
416 return hostURI + url;
417 }
418 return url;
419 }
420 private boolean isRelativeURI(String uri) {
421 if ( uri.startsWith("http://")
422 || uri.startsWith("https://")
423 || uri.startsWith("/")) {
424 return false;
425 }
426 return true;
427 }
428 /*** Use feed links and/or xml:base attribute to determine baseURI of feed */
429 private URL findBaseURI(Element root) {
430 URL baseURI = null;
431 List linksList = root.getChildren("link", ns);
432 if (linksList != null) {
433 for (Iterator links = linksList.iterator(); links.hasNext(); ) {
434 Element link = (Element)links.next();
435 if (!root.equals(link.getParent())) break;
436 String href = link.getAttribute("href").getValue();
437 if ( link.getAttribute("rel", ns) == null
438 || link.getAttribute("rel", ns).getValue().equals("alternate")) {
439 href = resolveURI(null, link, href);
440 try {
441 baseURI = new URL(href);
442 break;
443 } catch (MalformedURLException e) {
444 System.err.println("Base URI is malformed: " + href);
445 }
446 }
447 }
448 }
449 return baseURI;
450 }
451 }