Incorrect handling of & in CDATA sections in XmlFixerReader fixed (fixes

#134)
This commit is contained in:
Martin Kurz 2013-10-08 19:31:40 +02:00
parent 8948021500
commit 9ebfafee0c
3 changed files with 125 additions and 3 deletions

View file

@ -38,6 +38,7 @@ public class XmlFixerReader extends Reader {
} }
private boolean trimmed; private boolean trimmed;
private boolean cdata = false;
private final StringBuffer buffer; private final StringBuffer buffer;
private int bufferPos; private int bufferPos;
private int state = 0; private int state = 0;
@ -197,7 +198,18 @@ public class XmlFixerReader extends Reader {
buffer.setLength(0); buffer.setLength(0);
bufferPos = 0; bufferPos = 0;
buffer.append((char) c); buffer.append((char) c);
state = 1; loop = true;
} else if (c == '<') {
state = 4;
buffer.setLength(0);
bufferPos = 0;
buffer.append((char) c);
loop = true;
} else if (c == ']' && cdata) {
state = 5;
buffer.setLength(0);
bufferPos = 0;
buffer.append((char) c);
loop = true; loop = true;
} else { } else {
loop = false; loop = false;
@ -219,7 +231,9 @@ public class XmlFixerReader extends Reader {
} else { } else {
// no ';' to match the '&' lets just make the '&' // no ';' to match the '&' lets just make the '&'
// a legal xml character entity '&amp;' // a legal xml character entity '&amp;'
buffer.insert(1, "amp;"); if (!cdata) {
buffer.insert(1, "amp;");
}
buffer.append((char) c); buffer.append((char) c);
state = 3; state = 3;
loop = true; loop = true;
@ -227,7 +241,9 @@ public class XmlFixerReader extends Reader {
} else { } else {
// no ';' to match the '&' lets just make the '&' // no ';' to match the '&' lets just make the '&'
// a legal xml character entity '&amp;' // a legal xml character entity '&amp;'
buffer.insert(1, "amp;"); if (!cdata) {
buffer.insert(1, "amp;");
}
state = 3; state = 3;
loop = true; loop = true;
} }
@ -253,6 +269,58 @@ public class XmlFixerReader extends Reader {
loop = true; loop = true;
} }
break; break;
case 4: // checking for CDATA
c = in.read();
loop = true;
state = 3;
switch (c) {
case -1:
// end of stream
break;
case ' ':
case '>':
case '/':
// tag end or something like this
buffer.append((char) c);
break;
case '[':
buffer.append((char) c);
final String actBufferContent = buffer.toString();
if ("<![CDATA[".equals(actBufferContent)) {
cdata = true;
} else {
state = 4;
}
break;
default:
state = 4;
buffer.append((char) c);
}
break;
case 5: // checking end of CDATA
c = in.read();
loop = true;
state = 3;
switch (c) {
case -1:
// end of stream
break;
case ']':
buffer.append((char) c);
state = 5;
break;
case '>':
buffer.append((char) c);
final String actBufferContent = buffer.toString();
if ("]]>".equals(actBufferContent)) {
cdata = false;
}
break;
default:
buffer.append((char) c);
break;
}
break;
default: default:
throw new IOException("It shouldn't happen"); throw new IOException("It shouldn't happen");
} }

View file

@ -0,0 +1,22 @@
package com.sun.syndication.unittest.issues;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.unittest.FeedTest;
/**
* Test for #134: Incorrect handling of CDATA sections.
* @author Martin Kurz
*
*/
public class Issue134Test extends FeedTest {
public Issue134Test() {
super("CDATATestFeed.xml");
}
public void testCDataLinks() throws Exception {
final SyndFeed feed = this.getCachedSyndFeed();
assertEquals("links differ", feed.getEntries().get(0).getLink(), feed.getEntries().get(1).getLink());
}
}

View file

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
<channel>
<title>test</title>
<link>test</link>
<description>test</description>
<language>de</language>
<copyright>no</copyright>
<pubDate>Tue, 04 Aug 2009 11:25:23 +0200</pubDate>
<ttl>15</ttl>
<image>
<url>https://rome.dev.java.net/branding/images/header_jnet_new.jpg</url>
<title>java.net</title>
<link>https://rome.dev.java.net/</link>
<description>no thanks</description>
</image>
<item>
<title>item1</title>
<description>Some Description</description>
<link>http://rtl-now.rtl.de/awz.php?container_id=26908&amp;paytype=ppv&amp;productdetail=1&amp;na=1</link>
<guid><![CDATA[http://rtl-now.rtl.de/awz.php?container_id=26908&paytype=ppv&productdetail=1&na=1]]></guid>
<pubDate>Wed, 05 Aug 2009 07:30:00 +0200</pubDate>
</item>
<item>
<title>item2</title>
<description>Some Description</description>
<link><![CDATA[http://rtl-now.rtl.de/awz.php?container_id=26908&paytype=ppv&productdetail=1&na=1]]></link>
<guid><![CDATA[http://rtl-now.rtl.de/awz.php?container_id=26908&paytype=ppv&productdetail=1&na=1]]></guid>
<pubDate>Wed, 05 Aug 2009 07:30:00 +0200</pubDate>
</item>
</channel>
</rss>