1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.io.impl;
18
19 import java.util.ArrayList;
20 import java.util.Iterator;
21 import java.util.List;
22
23 import org.jdom.Document;
24 import org.jdom.Element;
25 import org.jdom.Namespace;
26 import org.jdom.output.XMLOutputter;
27
28 import com.sun.syndication.feed.WireFeed;
29 import com.sun.syndication.feed.atom.Category;
30 import com.sun.syndication.feed.atom.Content;
31 import com.sun.syndication.feed.atom.Entry;
32 import com.sun.syndication.feed.atom.Feed;
33 import com.sun.syndication.feed.atom.Generator;
34 import com.sun.syndication.feed.atom.Link;
35 import com.sun.syndication.feed.atom.Person;
36 import com.sun.syndication.io.FeedException;
37 import com.sun.syndication.io.WireFeedInput;
38 import com.sun.syndication.io.WireFeedOutput;
39 import java.io.IOException;
40 import java.io.Reader;
41 import java.net.MalformedURLException;
42 import java.util.regex.Pattern;
43 import org.jdom.Attribute;
44 import org.jdom.JDOMException;
45 import org.jdom.Parent;
46 import org.jdom.input.SAXBuilder;
47
48 /***
49 * Parser for Atom 1.0
50 * @author Dave Johnson
51 */
52 public class Atom10Parser extends BaseWireFeedParser {
53 private static final String ATOM_10_URI = "http://www.w3.org/2005/Atom";
54 private static final Namespace ATOM_10_NS = Namespace.getNamespace(ATOM_10_URI);
55
56 private static boolean resolveURIs = false;
57
58 public static void setResolveURIs(boolean resolveURIs) {
59 Atom10Parser.resolveURIs = resolveURIs;
60 }
61
62 public static boolean getResolveURIs() {
63 return resolveURIs;
64 }
65
66 public Atom10Parser() {
67 this("atom_1.0");
68 }
69
70 protected Atom10Parser(String type) {
71 super(type, ATOM_10_NS);
72 }
73
74 protected Namespace getAtomNamespace() {
75 return ATOM_10_NS;
76 }
77
78 public boolean isMyType(Document document) {
79 Element rssRoot = document.getRootElement();
80 Namespace defaultNS = rssRoot.getNamespace();
81 return (defaultNS!=null) && defaultNS.equals(getAtomNamespace());
82 }
83
84 public WireFeed parse(Document document, boolean validate)
85 throws IllegalArgumentException,FeedException {
86 if (validate) {
87 validateFeed(document);
88 }
89 Element rssRoot = document.getRootElement();
90 return parseFeed(rssRoot);
91 }
92
93 protected void validateFeed(Document document) throws FeedException {
94
95
96
97
98
99
100 }
101
102 protected WireFeed parseFeed(Element eFeed) throws FeedException {
103
104 String baseURI = null;
105 try {
106 baseURI = findBaseURI(eFeed);
107 } catch (Exception e) {
108 throw new FeedException("ERROR while finding base URI of feed", e);
109 }
110
111 Feed feed = parseFeedMetadata(baseURI, eFeed);
112
113 String xmlBase = eFeed.getAttributeValue("base", Namespace.XML_NAMESPACE);
114 if (xmlBase != null) {
115 feed.setXmlBase(xmlBase);
116 }
117
118 feed.setModules(parseFeedModules(eFeed));
119
120 List eList = eFeed.getChildren("entry",getAtomNamespace());
121 if (eList.size()>0) {
122 feed.setEntries(parseEntries(feed, baseURI, eList));
123 }
124
125 List foreignMarkup =
126 extractForeignMarkup(eFeed, feed, getAtomNamespace());
127 if (foreignMarkup.size() > 0) {
128 feed.setForeignMarkup(foreignMarkup);
129 }
130 return feed;
131 }
132
133 private Feed parseFeedMetadata(String baseURI, Element eFeed) {
134 com.sun.syndication.feed.atom.Feed feed =
135 new com.sun.syndication.feed.atom.Feed(getType());
136
137 Element e = eFeed.getChild("title",getAtomNamespace());
138 if (e!=null) {
139 Content c = new Content();
140 c.setValue(parseTextConstructToString(e));
141 c.setType(getAttributeValue(e, "type"));
142 feed.setTitleEx(c);
143 }
144
145 List eList = eFeed.getChildren("link",getAtomNamespace());
146 feed.setAlternateLinks(parseAlternateLinks(feed, null, baseURI, eList));
147 feed.setOtherLinks(parseOtherLinks(feed, null, baseURI, eList));
148
149 List cList = eFeed.getChildren("category",getAtomNamespace());
150 feed.setCategories(parseCategories(baseURI, cList));
151
152 eList = eFeed.getChildren("author", getAtomNamespace());
153 if (eList.size()>0) {
154 feed.setAuthors(parsePersons(baseURI, eList));
155 }
156
157 eList = eFeed.getChildren("contributor",getAtomNamespace());
158 if (eList.size()>0) {
159 feed.setContributors(parsePersons(baseURI, eList));
160 }
161
162 e = eFeed.getChild("subtitle",getAtomNamespace());
163 if (e!=null) {
164 Content subtitle = new Content();
165 subtitle.setValue(parseTextConstructToString(e));
166 subtitle.setType(getAttributeValue(e, "type"));
167 feed.setSubtitle(subtitle);
168 }
169
170 e = eFeed.getChild("id",getAtomNamespace());
171 if (e!=null) {
172 feed.setId(e.getText());
173 }
174
175 e = eFeed.getChild("generator",getAtomNamespace());
176 if (e!=null) {
177 Generator gen = new Generator();
178 gen.setValue(e.getText());
179 String att = getAttributeValue(e, "uri");
180 if (att!=null) {
181 gen.setUrl(att);
182 }
183 att = getAttributeValue(e, "version");
184 if (att!=null) {
185 gen.setVersion(att);
186 }
187 feed.setGenerator(gen);
188 }
189
190 e = eFeed.getChild("rights",getAtomNamespace());
191 if (e!=null) {
192 feed.setRights(parseTextConstructToString(e));
193 }
194
195 e = eFeed.getChild("icon",getAtomNamespace());
196 if (e!=null) {
197 feed.setIcon(e.getText());
198 }
199
200 e = eFeed.getChild("logo",getAtomNamespace());
201 if (e!=null) {
202 feed.setLogo(e.getText());
203 }
204
205 e = eFeed.getChild("updated",getAtomNamespace());
206 if (e!=null) {
207 feed.setUpdated(DateParser.parseDate(e.getText()));
208 }
209
210 return feed;
211 }
212
213 private Link parseLink(Feed feed , Entry entry, String baseURI, Element eLink) {
214 Link link = new Link();
215 String att = getAttributeValue(eLink, "rel");
216 if (att!=null) {
217 link.setRel(att);
218 }
219 att = getAttributeValue(eLink, "type");
220 if (att!=null) {
221 link.setType(att);
222 }
223 att = getAttributeValue(eLink, "href");
224 if (att!=null) {
225 link.setHref(att);
226 if (isRelativeURI(att)) {
227 link.setHrefResolved(resolveURI(baseURI, eLink, att));
228 }
229 }
230 att = getAttributeValue(eLink, "title");
231 if (att!=null) {
232 link.setTitle(att);
233 }
234 att = getAttributeValue(eLink, "hreflang");
235 if (att!=null) {
236 link.setHreflang(att);
237 }
238 att = getAttributeValue(eLink, "length");
239 if (att!=null) {
240 link.setLength(Long.parseLong(att));
241 }
242 return link;
243 }
244
245
246 private List parseAlternateLinks(Feed feed, Entry entry, String baseURI, List eLinks) {
247 List links = new ArrayList();
248 for (int i=0;i<eLinks.size();i++) {
249 Element eLink = (Element) eLinks.get(i);
250 Link link = parseLink(feed, entry, baseURI, eLink);
251 if (link.getRel() == null
252 || "".equals(link.getRel().trim())
253 || "alternate".equals(link.getRel())) {
254 links.add(link);
255 }
256 }
257 return (links.size()>0) ? links : null;
258 }
259
260 private List parseOtherLinks(Feed feed, Entry entry, String baseURI, List eLinks) {
261 List links = new ArrayList();
262 for (int i=0;i<eLinks.size();i++) {
263 Element eLink = (Element) eLinks.get(i);
264 Link link = parseLink(feed, entry, baseURI, eLink);
265 if (!"alternate".equals(link.getRel())) {
266 links.add(link);
267 }
268 }
269 return (links.size()>0) ? links : null;
270 }
271
272 private Person parsePerson(String baseURI, Element ePerson) {
273 Person person = new Person();
274 Element e = ePerson.getChild("name",getAtomNamespace());
275 if (e!=null) {
276 person.setName(e.getText());
277 }
278 e = ePerson.getChild("uri",getAtomNamespace());
279 if (e!=null) {
280 person.setUri(e.getText());
281 if (isRelativeURI(e.getText())) {
282 person.setUriResolved(resolveURI(baseURI, ePerson, e.getText()));
283 }
284 }
285 e = ePerson.getChild("email",getAtomNamespace());
286 if (e!=null) {
287 person.setEmail(e.getText());
288 }
289 return person;
290 }
291
292
293 private List parsePersons(String baseURI, List ePersons) {
294 List persons = new ArrayList();
295 for (int i=0;i<ePersons.size();i++) {
296 persons.add(parsePerson(baseURI, (Element)ePersons.get(i)));
297 }
298 return (persons.size()>0) ? persons : null;
299 }
300
301 private Content parseContent(Element e) {
302 String value = parseTextConstructToString(e);
303 String src = getAttributeValue(e, "src");
304 String type = getAttributeValue(e, "type");
305 Content content = new Content();
306 content.setSrc(src);
307 content.setType(type);
308 content.setValue(value);
309 return content;
310 }
311
312 private String parseTextConstructToString(Element e) {
313 String value = null;
314 String type = getAttributeValue(e, "type");
315 type = (type!=null) ? type : Content.TEXT;
316 if (type.equals(Content.XHTML) || (type.indexOf("/xml")) != -1 || (type.indexOf("+xml")) != -1) {
317
318 XMLOutputter outputter = new XMLOutputter();
319 List eContent = e.getContent();
320 Iterator i = eContent.iterator();
321 while (i.hasNext()) {
322 org.jdom.Content c = (org.jdom.Content) i.next();
323 if (c instanceof Element) {
324 Element eC = (Element) c;
325 if (eC.getNamespace().equals(getAtomNamespace())) {
326 ((Element)c).setNamespace(Namespace.NO_NAMESPACE);
327 }
328 }
329 }
330 value = outputter.outputString(eContent);
331 } else {
332
333 value = e.getText();
334 }
335 return value;
336 }
337
338
339 protected List parseEntries(Feed feed, String baseURI, List eEntries) {
340 List entries = new ArrayList();
341 for (int i=0;i<eEntries.size();i++) {
342 entries.add(parseEntry(feed, (Element)eEntries.get(i), baseURI));
343 }
344 return (entries.size()>0) ? entries : null;
345 }
346
347 protected Entry parseEntry(Feed feed, Element eEntry, String baseURI) {
348 Entry entry = new Entry();
349
350 String xmlBase = eEntry.getAttributeValue("base", Namespace.XML_NAMESPACE);
351 if (xmlBase != null) {
352 entry.setXmlBase(xmlBase);
353 }
354
355 Element e = eEntry.getChild("title",getAtomNamespace());
356 if (e!=null) {
357 Content c = new Content();
358 c.setValue(parseTextConstructToString(e));
359 c.setType(getAttributeValue(e, "type"));
360 entry.setTitleEx(c);
361 }
362
363 List eList = eEntry.getChildren("link",getAtomNamespace());
364 entry.setAlternateLinks(parseAlternateLinks(feed, entry, baseURI, eList));
365 entry.setOtherLinks(parseOtherLinks(feed, entry, baseURI, eList));
366
367 eList = eEntry.getChildren("author", getAtomNamespace());
368 if (eList.size()>0) {
369 entry.setAuthors(parsePersons(baseURI, eList));
370 }
371
372 eList = eEntry.getChildren("contributor",getAtomNamespace());
373 if (eList.size()>0) {
374 entry.setContributors(parsePersons(baseURI, eList));
375 }
376
377 e = eEntry.getChild("id",getAtomNamespace());
378 if (e!=null) {
379 entry.setId(e.getText());
380 }
381
382 e = eEntry.getChild("updated",getAtomNamespace());
383 if (e!=null) {
384 entry.setUpdated(DateParser.parseW3CDateTime(e.getText()));
385 }
386
387 e = eEntry.getChild("published",getAtomNamespace());
388 if (e!=null) {
389 entry.setPublished(DateParser.parseW3CDateTime(e.getText()));
390 }
391
392 e = eEntry.getChild("summary",getAtomNamespace());
393 if (e!=null) {
394 entry.setSummary(parseContent(e));
395 }
396
397 e = eEntry.getChild("content",getAtomNamespace());
398 if (e!=null) {
399 List contents = new ArrayList();
400 contents.add(parseContent(e));
401 entry.setContents(contents);
402 }
403
404 e = eEntry.getChild("rights",getAtomNamespace());
405 if (e!=null) {
406 entry.setRights(e.getText());
407 }
408
409 List cList = eEntry.getChildren("category",getAtomNamespace());
410 entry.setCategories(parseCategories(baseURI, cList));
411
412
413 e = eEntry.getChild("source", getAtomNamespace());
414 if (e!=null) {
415 entry.setSource(parseFeedMetadata(baseURI, e));
416 }
417
418 entry.setModules(parseItemModules(eEntry));
419
420 List foreignMarkup =
421 extractForeignMarkup(eEntry, entry, getAtomNamespace());
422 if (foreignMarkup.size() > 0) {
423 entry.setForeignMarkup(foreignMarkup);
424 }
425 return entry;
426 }
427
428 private List parseCategories(String baseURI, List eCategories) {
429 List cats = new ArrayList();
430 for (int i=0;i<eCategories.size();i++) {
431 Element eCategory = (Element) eCategories.get(i);
432 cats.add(parseCategory(baseURI, eCategory));
433 }
434 return (cats.size()>0) ? cats : null;
435 }
436
437 private Category parseCategory(String baseURI, Element eCategory) {
438 Category category = new Category();
439 String att = getAttributeValue(eCategory, "term");
440 if (att!=null) {
441 category.setTerm(att);
442 }
443 att = getAttributeValue(eCategory, "scheme");
444 if (att!=null) {
445 category.setScheme(att);
446 if (isRelativeURI(att)) {
447 category.setSchemeResolved(resolveURI(baseURI, eCategory, att));
448 }
449 }
450 att = getAttributeValue(eCategory, "label");
451 if (att!=null) {
452 category.setLabel(att);
453 }
454 return category;
455
456 }
457
458
459
460
461
462
463
464
465
466 static Pattern absoluteURIPattern = Pattern.compile("^[a-z0-9]*:.*$");
467
468 public static boolean isAbsoluteURI(String uri) {
469 return absoluteURIPattern.matcher(uri).find();
470 }
471
472 /*** Returns true if URI is relative. */
473 public static boolean isRelativeURI(String uri) {
474 return !isAbsoluteURI(uri);
475 }
476
477 /***
478 * Resolve URI via base URL and parent element.
479 * Resolve URI based considering xml:base and baseURI.
480 * @param baseURI Base URI used to fetch the XML document
481 * @param parent Parent element from which to consider xml:base
482 * @param url URL to be resolved
483 */
484 public static String resolveURI(String baseURI, Parent parent, String url) {
485 if (!resolveURIs) {
486 return url;
487 }
488 if (isRelativeURI(url)) {
489 url = (!".".equals(url) && !"./".equals(url)) ? url : "";
490
491 if (url.startsWith("/") && baseURI != null) {
492 String base = null;
493 int slashslash = baseURI.indexOf("//");
494 int nextslash = baseURI.indexOf("/", slashslash + 2);
495 if (nextslash != -1) base = baseURI.substring(0, nextslash);
496 return formURI(base, url);
497 }
498
499
500 if (parent != null && parent instanceof Element) {
501
502
503 String xmlbase = ((Element)parent).getAttributeValue(
504 "base", Namespace.XML_NAMESPACE);
505 if (xmlbase != null && xmlbase.trim().length() > 0) {
506 if (isAbsoluteURI(xmlbase)) {
507
508 if (url.startsWith("/")) {
509
510 int slashslash = xmlbase.indexOf("//");
511 int nextslash = xmlbase.indexOf("/", slashslash + 2);
512 if (nextslash != -1) xmlbase = xmlbase.substring(0, nextslash);
513 return formURI(xmlbase, url);
514 }
515 if (!xmlbase.endsWith("/")) {
516
517 xmlbase = xmlbase.substring(0, xmlbase.lastIndexOf("/"));
518 }
519 return formURI(xmlbase, url);
520 } else {
521
522 return resolveURI(baseURI, parent.getParent(),
523 stripTrailingSlash(xmlbase) + "/"+ stripStartingSlash(url));
524 }
525 }
526
527 return resolveURI(baseURI, parent.getParent(), url);
528
529
530 } else if (parent == null || parent instanceof Document) {
531 return formURI(baseURI, url);
532 }
533 }
534 return url;
535 }
536
537 /***
538 * Find base URI of feed considering relative URIs.
539 * @param root Root element of feed.
540 */
541 private String findBaseURI(Element root) throws MalformedURLException {
542 String ret = null;
543 if (findAtomLink(root, "self") != null) {
544 ret = findAtomLink(root, "self");
545 if (".".equals(ret) || "./".equals(ret)) ret = "";
546 if (ret.indexOf("/") != -1) ret = ret.substring(0, ret.lastIndexOf("/"));
547 ret = resolveURI(null, root, ret);
548 }
549 return ret;
550 }
551
552 /***
553 * Return URL string of Atom link element under parent element.
554 * Link with no rel attribute is considered to be rel="alternate"
555 * @param parent Consider only children of this parent element
556 * @param rel Consider only links with this relationship
557 */
558 private String findAtomLink(Element parent, String rel) {
559 String ret = null;
560 List linksList = parent.getChildren("link", ATOM_10_NS);
561 if (linksList != null) {
562 for (Iterator links = linksList.iterator(); links.hasNext(); ) {
563 Element link = (Element)links.next();
564 Attribute relAtt = getAttribute(link, "rel");
565 Attribute hrefAtt = getAttribute(link, "href");
566 if ( (relAtt == null && "alternate".equals(rel))
567 || (relAtt != null && relAtt.getValue().equals(rel))) {
568 ret = hrefAtt.getValue();
569 break;
570 }
571 }
572 }
573 return ret;
574 }
575
576 /***
577 * Form URI by combining base with append portion and giving
578 * special consideration to append portions that begin with ".."
579 * @param base Base of URI, may end with trailing slash
580 * @param append String to append, may begin with slash or ".."
581 */
582 private static String formURI(String base, String append) {
583 base = stripTrailingSlash(base);
584 append = stripStartingSlash(append);
585 if (append.startsWith("..")) {
586 String ret = null;
587 String[] parts = append.split("/");
588 for (int i=0; i<parts.length; i++) {
589 if ("..".equals(parts[i])) {
590 int last = base.lastIndexOf("/");
591 if (last != -1) {
592 base = base.substring(0, last);
593 append = append.substring(3, append.length());
594 }
595 else break;
596 }
597 }
598 }
599 return base + "/" + append;
600 }
601
602 /***
603 * Strip starting slash from beginning of string.
604 */
605 private static String stripStartingSlash(String s) {
606 if (s != null && s.startsWith("/")) {
607 s = s.substring(1, s.length());
608 }
609 return s;
610 }
611
612 /***
613 * Strip trailing slash from end of string.
614 */
615 private static String stripTrailingSlash(String s) {
616 if (s != null && s.endsWith("/")) {
617 s = s.substring(0, s.length() - 1);
618 }
619 return s;
620 }
621
622
623 /***
624 * Parse entry from reader.
625 */
626 public static Entry parseEntry(Reader rd, String baseURI)
627 throws JDOMException, IOException, IllegalArgumentException, FeedException {
628
629 SAXBuilder builder = new SAXBuilder();
630 Document entryDoc = builder.build(rd);
631 Element fetchedEntryElement = entryDoc.getRootElement();
632 fetchedEntryElement.detach();
633
634
635 Feed feed = new Feed();
636 feed.setFeedType("atom_1.0");
637 WireFeedOutput wireFeedOutput = new WireFeedOutput();
638 Document feedDoc = wireFeedOutput.outputJDom(feed);
639 feedDoc.getRootElement().addContent(fetchedEntryElement);
640
641 if (baseURI != null) {
642 feedDoc.getRootElement().setAttribute("base", baseURI, Namespace.XML_NAMESPACE);
643 }
644
645 WireFeedInput input = new WireFeedInput();
646 Feed parsedFeed = (Feed)input.build(feedDoc);
647 return (Entry)parsedFeed.getEntries().get(0);
648 }
649 }
650
651