1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.unittest;
18
19 import com.sun.syndication.io.XmlReader;
20 import junit.framework.TestCase;
21
22 import java.io.*;
23 import java.text.MessageFormat;
24 import java.util.HashMap;
25 import java.util.Map;
26
27 /***
28 * @author pat, tucu
29 *
30 */
31 public class TestXmlReader extends TestCase {
32 private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
33 private static final String XML4 = "xml-prolog-encoding-single-quotes";
34 private static final String XML3 = "xml-prolog-encoding-double-quotes";
35 private static final String XML2 = "xml-prolog";
36 private static final String XML1 = "xml";
37
38 public static void main(String[] args) throws Exception {
39 TestXmlReader test = new TestXmlReader();
40 test.testRawBom();
41 test.testRawNoBom();
42 test.testHttp();
43 }
44
45 protected void _testRawNoBomValid(String encoding) throws Exception {
46 InputStream is = getXmlStream("no-bom",XML1,encoding,encoding);
47 XmlReader xmlReader = new XmlReader(is,false);
48 assertEquals(xmlReader.getEncoding(),"UTF-8");
49
50 is = getXmlStream("no-bom",XML2,encoding,encoding);
51 xmlReader = new XmlReader(is);
52 assertEquals(xmlReader.getEncoding(),"UTF-8");
53
54 is = getXmlStream("no-bom",XML3,encoding,encoding);
55 xmlReader = new XmlReader(is);
56 assertEquals(xmlReader.getEncoding(),encoding);
57
58 is = getXmlStream("no-bom", XML4, encoding, encoding);
59 xmlReader = new XmlReader(is);
60 assertEquals(xmlReader.getEncoding(), encoding);
61
62 is = getXmlStream("no-bom", XML5, encoding, encoding);
63 xmlReader = new XmlReader(is);
64 assertEquals(xmlReader.getEncoding(), encoding);
65 }
66
67 protected void _testRawNoBomInvalid(String encoding) throws Exception {
68 InputStream is = getXmlStream("no-bom",XML3,encoding,encoding);
69 try {
70 XmlReader xmlReader = new XmlReader(is,false);
71 fail("It should have failed");
72 }
73 catch (IOException ex) {
74 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
75 }
76 }
77
78 public void testRawNoBom() throws Exception {
79 _testRawNoBomValid("US-ASCII");
80 _testRawNoBomValid("UTF-8");
81 _testRawNoBomValid("ISO-8859-1");
82 }
83
84 protected void _testRawBomValid(String encoding) throws Exception {
85 InputStream is = getXmlStream(encoding+"-bom",XML3,encoding,encoding);
86 XmlReader xmlReader = new XmlReader(is,false);
87 if (!encoding.equals("UTF-16")) {
88 assertEquals(xmlReader.getEncoding(),encoding);
89 }
90 else {
91 assertEquals(xmlReader.getEncoding().substring(0,encoding.length()),encoding);
92 }
93 }
94
95 protected void _testRawBomInvalid(String bomEnc,String streamEnc,String prologEnc) throws Exception {
96 InputStream is = getXmlStream(bomEnc,XML3,streamEnc,prologEnc);
97 try {
98 XmlReader xmlReader = new XmlReader(is,false);
99 fail("It should have failed for BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
100 }
101 catch (IOException ex) {
102 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
103 }
104 }
105
106 public void testRawBom() throws Exception {
107 _testRawBomValid("UTF-8");
108 _testRawBomValid("UTF-16BE");
109 _testRawBomValid("UTF-16LE");
110 _testRawBomValid("UTF-16");
111
112 _testRawBomInvalid("UTF-8-bom","US-ASCII","US-ASCII");
113 _testRawBomInvalid("UTF-8-bom","ISO-8859-1","ISO-8859-1");
114 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16");
115 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16BE");
116 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16LE");
117 _testRawBomInvalid("UTF-16BE-bom","UTF-16BE","UTF-16LE");
118 _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-16BE");
119 _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-8");
120 }
121
122 public void testHttp() throws Exception {
123 _testHttpValid("application/xml","no-bom","US-ASCII",null);
124 _testHttpValid("application/xml","UTF-8-bom","US-ASCII",null);
125 _testHttpValid("application/xml","UTF-8-bom","UTF-8",null);
126 _testHttpValid("application/xml","UTF-8-bom","UTF-8","UTF-8");
127 _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
128 _testHttpValid("application/xml;charset=\"UTF-8\"","UTF-8-bom","UTF-8",null);
129 _testHttpValid("application/xml;charset='UTF-8'","UTF-8-bom","UTF-8",null);
130 _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
131 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
132 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
133 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
134
135 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
136 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
137 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
138 _testHttpInvalid("application/xml","UTF-8-bom","US-ASCII","US-ASCII");
139 _testHttpInvalid("application/xml;charset=UTF-16","UTF-16LE","UTF-8","UTF-8");
140 _testHttpInvalid("application/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
141
142 _testHttpValid("text/xml","no-bom","US-ASCII",null);
143 _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
144 _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
145 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
146 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
147 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
148 _testHttpValid("text/xml","UTF-8-bom","US-ASCII",null);
149
150 _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, null);
151 _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII", null, "US-ASCII");
152 _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, "UTF-8");
153 _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, null);
154 _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
155 _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "UTF-8");
156
157 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
158 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
159 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
160 _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
161 _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE",null);
162
163 _testHttpLenient("text/xml","no-bom","US-ASCII",null, "US-ASCII");
164 _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8", "UTF-8");
165 _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null, "UTF-8");
166 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
167 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
168 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
169 _testHttpLenient("text/xml","UTF-8-bom","US-ASCII",null, "US-ASCII");
170
171 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
172 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
173 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
174 _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
175 _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE",null, "UTF-16");
176
177 _testHttpLenient("text/html","no-bom","US-ASCII","US-ASCII", "US-ASCII");
178 _testHttpLenient("text/html","no-bom","US-ASCII",null, "US-ASCII");
179 _testHttpLenient("text/html;charset=UTF-8","no-bom","US-ASCII","UTF-8", "UTF-8");
180 _testHttpLenient("text/html;charset=UTF-16BE","no-bom","US-ASCII","UTF-8", "UTF-8");
181 }
182
183 public void _testAlternateDefaultEncoding(String cT, String bomEnc, String streamEnc, String prologEnc, String alternateEnc) throws Exception {
184 try {
185 InputStream is = getXmlStream(bomEnc, (prologEnc == null) ? XML1 : XML3, streamEnc, prologEnc);
186 XmlReader.setDefaultEncoding(alternateEnc);
187 XmlReader xmlReader = new XmlReader(is, cT, false);
188 if (!streamEnc.equals("UTF-16")) {
189
190 }
191 else {
192 String enc = (alternateEnc != null) ? alternateEnc : streamEnc;
193 assertEquals(xmlReader.getEncoding().substring(0, streamEnc.length()), streamEnc);
194 }
195 }
196 finally {
197 XmlReader.setDefaultEncoding(null);
198 }
199 }
200
201 public void _testHttpValid(String cT, String bomEnc, String streamEnc, String prologEnc) throws Exception {
202 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML1 :XML3,streamEnc,prologEnc);
203 XmlReader xmlReader = new XmlReader(is,cT,false);
204 if (!streamEnc.equals("UTF-16")) {
205
206 }
207 else {
208 assertEquals(xmlReader.getEncoding().substring(0,streamEnc.length()),streamEnc);
209 }
210 }
211
212 protected void _testHttpInvalid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
213 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML2 :XML3,streamEnc,prologEnc);
214 try {
215 new XmlReader(is,cT,false);
216 fail("It should have failed for HTTP Content-type "+cT+", BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
217 }
218 catch (IOException ex) {
219 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
220 }
221 }
222
223 protected void _testHttpLenient(String cT, String bomEnc, String streamEnc, String prologEnc, String shouldbe) throws Exception {
224 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML2 :XML3,streamEnc,prologEnc);
225 XmlReader xmlReader = new XmlReader(is,cT,true);
226 assertEquals(xmlReader.getEncoding(),shouldbe);
227 }
228
229 private static final String ENCODING_ATTRIBUTE_XML =
230 "<?xml version=\"1.0\" ?> \n" +
231 "<atom:feed xmlns:atom=\"http://www.w3.org/2005/Atom\">\n" +
232 "\n" +
233 " <atom:entry>\n" +
234 " <atom:title encoding=\"base64\"><![CDATA\n" +
235 "aW5nTGluZSIgLz4";
236
237 public void testEncodingAttributeXML() throws Exception {
238 InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML.getBytes());
239 XmlReader xmlReader = new XmlReader(is, "", true);
240 assertEquals(xmlReader.getEncoding(), "UTF-8");
241 }
242
243
244
245 private static final int[] NO_BOM_BYTES = {};
246 private static final int[] UTF_16BE_BOM_BYTES = {0xFE,0xFF};
247 private static final int[] UTF_16LE_BOM_BYTES = {0xFF,0XFE};
248 private static final int[] UTF_8_BOM_BYTES = {0xEF,0xBB,0xBF};
249
250 private static final Map BOMs = new HashMap();
251
252 static {
253 BOMs.put("no-bom",NO_BOM_BYTES);
254 BOMs.put("UTF-16BE-bom",UTF_16BE_BOM_BYTES);
255 BOMs.put("UTF-16LE-bom",UTF_16LE_BOM_BYTES);
256 BOMs.put("UTF-16-bom",NO_BOM_BYTES);
257 BOMs.put("UTF-8-bom",UTF_8_BOM_BYTES);
258 }
259
260 private static final MessageFormat XML = new MessageFormat(
261 "<root>{2}</root>");
262 private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
263 "<?xml version=\"1.0\"?>\n<root>{2}</root>");
264 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
265 "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
266 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
267 "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
268 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
269 "<?xml version=\"1.0\" encoding = \t \n \r''{1}''?>\n<root>{2}</root>");
270
271 private static final MessageFormat INFO = new MessageFormat(
272 "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
273
274 private static final Map XMLs = new HashMap();
275
276 static {
277 XMLs.put(XML1, XML);
278 XMLs.put(XML2, XML_WITH_PROLOG);
279 XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
280 XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
281 XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
282 }
283
284 /***
285 *
286 * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
287 * @param xmlType xml, xml-prolog, xml-prolog-charset
288 * @return XML stream
289 */
290 protected InputStream getXmlStream(String bomType,String xmlType,String streamEnc,String prologEnc) throws IOException {
291 ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
292 int[] bom = (int[]) BOMs.get(bomType);
293 if (bom==null) {
294 bom = new int[0];
295 }
296 MessageFormat xml = (MessageFormat) XMLs.get(xmlType);
297 for (int i=0;i<bom.length;i++) {
298 baos.write(bom[i]);
299 }
300 Writer writer = new OutputStreamWriter(baos,streamEnc);
301 String info = INFO.format(new Object[]{bomType,xmlType,prologEnc});
302 String xmlDoc = xml.format(new Object[]{streamEnc,prologEnc,info});
303 writer.write(xmlDoc);
304
305
306 writer.write("<da>\n");
307 for (int i=0;i<10000;i++) {
308 writer.write("<do/>\n");
309 }
310 writer.write("</da>\n");
311
312 writer.close();
313 return new ByteArrayInputStream(baos.toByteArray());
314 }
315
316
317 }