1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.sun.syndication.unittest;
18
19 import com.sun.syndication.io.XmlReader;
20 import junit.framework.TestCase;
21
22 import java.io.*;
23 import java.text.MessageFormat;
24 import java.util.HashMap;
25 import java.util.Map;
26
27 /***
28 * @author pat, tucu
29 *
30 */
31 public class TestXmlReader extends TestCase {
32
33 public static void main(String[] args) throws Exception {
34 TestXmlReader test = new TestXmlReader();
35 test.testRawBom();
36 test.testRawNoBom();
37 test.testHttp();
38 }
39
40 protected void _testRawNoBomValid(String encoding) throws Exception {
41 InputStream is = getXmlStream("no-bom","xml",encoding,encoding);
42 XmlReader xmlReader = new XmlReader(is,false);
43 assertEquals(xmlReader.getEncoding(),"UTF-8");
44
45 is = getXmlStream("no-bom","xml-prolog",encoding,encoding);
46 xmlReader = new XmlReader(is);
47 assertEquals(xmlReader.getEncoding(),"UTF-8");
48
49 is = getXmlStream("no-bom","xml-prolog-encoding",encoding,encoding);
50 xmlReader = new XmlReader(is);
51 assertEquals(xmlReader.getEncoding(),encoding);
52 }
53
54 protected void _testRawNoBomInvalid(String encoding) throws Exception {
55 InputStream is = getXmlStream("no-bom","xml-prolog-encoding",encoding,encoding);
56 try {
57 XmlReader xmlReader = new XmlReader(is,false);
58 fail("It should have failed");
59 }
60 catch (IOException ex) {
61 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
62 }
63 }
64
65 public void testRawNoBom() throws Exception {
66 _testRawNoBomValid("US-ASCII");
67 _testRawNoBomValid("UTF-8");
68 _testRawNoBomValid("ISO-8859-1");
69 }
70
71 protected void _testRawBomValid(String encoding) throws Exception {
72 InputStream is = getXmlStream(encoding+"-bom","xml-prolog-encoding",encoding,encoding);
73 XmlReader xmlReader = new XmlReader(is,false);
74 if (!encoding.equals("UTF-16")) {
75 assertEquals(xmlReader.getEncoding(),encoding);
76 }
77 else {
78 assertEquals(xmlReader.getEncoding().substring(0,encoding.length()),encoding);
79 }
80 }
81
82 protected void _testRawBomInvalid(String bomEnc,String streamEnc,String prologEnc) throws Exception {
83 InputStream is = getXmlStream(bomEnc,"xml-prolog-encoding",streamEnc,prologEnc);
84 try {
85 XmlReader xmlReader = new XmlReader(is,false);
86 fail("It should have failed for BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
87 }
88 catch (IOException ex) {
89 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
90 }
91 }
92
93 public void testRawBom() throws Exception {
94 _testRawBomValid("UTF-8");
95 _testRawBomValid("UTF-16BE");
96 _testRawBomValid("UTF-16LE");
97 _testRawBomValid("UTF-16");
98
99 _testRawBomInvalid("UTF-8-bom","US-ASCII","US-ASCII");
100 _testRawBomInvalid("UTF-8-bom","ISO-8859-1","ISO-8859-1");
101 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16");
102 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16BE");
103 _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16LE");
104 _testRawBomInvalid("UTF-16BE-bom","UTF-16BE","UTF-16LE");
105 _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-16BE");
106 _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-8");
107 }
108
109 public void testHttp() throws Exception {
110 _testHttpValid("application/xml","no-bom","US-ASCII",null);
111 _testHttpValid("application/xml","UTF-8-bom","US-ASCII",null);
112 _testHttpValid("application/xml","UTF-8-bom","UTF-8",null);
113 _testHttpValid("application/xml","UTF-8-bom","UTF-8","UTF-8");
114 _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
115 _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
116 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
117 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
118 _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
119
120 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
121 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
122 _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
123 _testHttpInvalid("application/xml","UTF-8-bom","US-ASCII","US-ASCII");
124 _testHttpInvalid("application/xml;charset=UTF-16","UTF-16LE","UTF-8","UTF-8");
125 _testHttpInvalid("application/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
126
127 _testHttpValid("text/xml","no-bom","US-ASCII",null);
128 _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
129 _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
130 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
131 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
132 _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
133 _testHttpValid("text/xml","UTF-8-bom","US-ASCII",null);
134
135 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
136 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
137 _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
138 _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
139 _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE",null);
140
141 _testHttpLenient("text/xml","no-bom","US-ASCII",null, "US-ASCII");
142 _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8", "UTF-8");
143 _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null, "UTF-8");
144 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
145 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
146 _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
147 _testHttpLenient("text/xml","UTF-8-bom","US-ASCII",null, "US-ASCII");
148
149 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
150 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
151 _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
152 _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
153 _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE",null, "UTF-16");
154
155 _testHttpLenient("text/html","no-bom","US-ASCII","US-ASCII", "US-ASCII");
156 _testHttpLenient("text/html","no-bom","US-ASCII",null, "US-ASCII");
157 _testHttpLenient("text/html;charset=UTF-8","no-bom","US-ASCII","UTF-8", "UTF-8");
158 _testHttpLenient("text/html;charset=UTF-16BE","no-bom","US-ASCII","UTF-8", "UTF-8");
159 }
160
161 public void _testHttpValid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
162 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml":"xml-prolog-encoding",streamEnc,prologEnc);
163 XmlReader xmlReader = new XmlReader(is,cT,false);
164 if (!streamEnc.equals("UTF-16")) {
165
166 }
167 else {
168 assertEquals(xmlReader.getEncoding().substring(0,streamEnc.length()),streamEnc);
169 }
170 }
171
172 protected void _testHttpInvalid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
173 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml-prolog":"xml-prolog-encoding",streamEnc,prologEnc);
174 try {
175 XmlReader xmlReader = new XmlReader(is,cT,false);
176 fail("It should have failed for HTTP Content-type "+cT+", BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
177 }
178 catch (IOException ex) {
179 assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
180 }
181 }
182
183 protected void _testHttpLenient(String cT, String bomEnc, String streamEnc, String prologEnc, String shouldbe) throws Exception {
184 InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml-prolog":"xml-prolog-encoding",streamEnc,prologEnc);
185 XmlReader xmlReader = new XmlReader(is,cT,true);
186 assertEquals(xmlReader.getEncoding(),shouldbe);
187 }
188
189
190
191 private static final int[] NO_BOM_BYTES = {};
192 private static final int[] UTF_16BE_BOM_BYTES = {0xFE,0xFF};
193 private static final int[] UTF_16LE_BOM_BYTES = {0xFF,0XFE};
194 private static final int[] UTF_8_BOM_BYTES = {0xEF,0xBB,0xBF};
195
196 private static final Map BOMs = new HashMap();
197
198 static {
199 BOMs.put("no-bom",NO_BOM_BYTES);
200 BOMs.put("UTF-16BE-bom",UTF_16BE_BOM_BYTES);
201 BOMs.put("UTF-16LE-bom",UTF_16LE_BOM_BYTES);
202 BOMs.put("UTF-16-bom",NO_BOM_BYTES);
203 BOMs.put("UTF-8-bom",UTF_8_BOM_BYTES);
204 }
205
206 private static final MessageFormat XML = new MessageFormat(
207 "<root>{2}</root>");
208 private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
209 "<?xml version=\"1.0\"?>\n<root>{2}</root>");
210 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING = new MessageFormat(
211 "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
212
213 private static final MessageFormat INFO = new MessageFormat(
214 "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
215
216 private static final Map XMLs = new HashMap();
217
218 static {
219 XMLs.put("xml",XML);
220 XMLs.put("xml-prolog",XML_WITH_PROLOG);
221 XMLs.put("xml-prolog-encoding",XML_WITH_PROLOG_AND_ENCODING);
222 }
223
224 /***
225 *
226 * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
227 * @param xmlType xml, xml-prolog, xml-prolog-charset
228 * @return XML stream
229 */
230 protected InputStream getXmlStream(String bomType,String xmlType,String streamEnc,String prologEnc) throws IOException {
231 ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
232 int[] bom = (int[]) BOMs.get(bomType);
233 if (bom==null) {
234 bom = new int[0];
235 }
236 MessageFormat xml = (MessageFormat) XMLs.get(xmlType);
237 for (int i=0;i<bom.length;i++) {
238 baos.write(bom[i]);
239 }
240 Writer writer = new OutputStreamWriter(baos,streamEnc);
241 String info = INFO.format(new Object[]{bomType,xmlType,prologEnc});
242 String xmlDoc = xml.format(new Object[]{streamEnc,prologEnc,info});
243 writer.write(xmlDoc);
244
245
246 writer.write("<da>\n");
247 for (int i=0;i<10000;i++) {
248 writer.write("<do/>\n");
249 }
250 writer.write("</da>\n");
251
252 writer.close();
253 return new ByteArrayInputStream(baos.toByteArray());
254 }
255
256
257 }