1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.unittest;
18  
19  import com.sun.syndication.io.XmlReader;
20  import junit.framework.TestCase;
21  
22  import java.io.*;
23  import java.text.MessageFormat;
24  import java.util.HashMap;
25  import java.util.Map;
26  
27  /***
28   * @author pat, tucu
29   *
30   */
31  public class TestXmlReader extends TestCase {
32  
33      public static void main(String[] args) throws Exception {
34          TestXmlReader test = new TestXmlReader();
35          test.testRawBom();
36          test.testRawNoBom();
37          test.testHttp();
38      }
39  
40      protected void _testRawNoBomValid(String encoding) throws Exception {
41          InputStream is = getXmlStream("no-bom","xml",encoding,encoding);
42          XmlReader xmlReader = new XmlReader(is,false);
43          assertEquals(xmlReader.getEncoding(),"UTF-8");
44  
45          is = getXmlStream("no-bom","xml-prolog",encoding,encoding);
46          xmlReader = new XmlReader(is);
47          assertEquals(xmlReader.getEncoding(),"UTF-8");
48  
49          is = getXmlStream("no-bom","xml-prolog-encoding",encoding,encoding);
50          xmlReader = new XmlReader(is);
51          assertEquals(xmlReader.getEncoding(),encoding);
52      }
53  
54      protected void _testRawNoBomInvalid(String encoding) throws Exception {
55          InputStream is = getXmlStream("no-bom","xml-prolog-encoding",encoding,encoding);
56          try {
57              XmlReader xmlReader = new XmlReader(is,false);
58              fail("It should have failed");
59          }
60          catch (IOException ex) {
61              assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
62          }
63       }
64  
65      public void testRawNoBom() throws Exception {
66          _testRawNoBomValid("US-ASCII");
67          _testRawNoBomValid("UTF-8");
68          _testRawNoBomValid("ISO-8859-1");
69      }
70  
71      protected void _testRawBomValid(String encoding) throws Exception {
72          InputStream is = getXmlStream(encoding+"-bom","xml-prolog-encoding",encoding,encoding);
73          XmlReader xmlReader = new XmlReader(is,false);
74          if (!encoding.equals("UTF-16")) {
75              assertEquals(xmlReader.getEncoding(),encoding);
76          }
77          else {
78              assertEquals(xmlReader.getEncoding().substring(0,encoding.length()),encoding);
79          }
80      }
81  
82      protected void _testRawBomInvalid(String bomEnc,String streamEnc,String prologEnc) throws Exception {
83          InputStream is = getXmlStream(bomEnc,"xml-prolog-encoding",streamEnc,prologEnc);
84          try {
85              XmlReader xmlReader = new XmlReader(is,false);
86              fail("It should have failed for BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
87          }
88          catch (IOException ex) {
89              assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
90          }
91       }
92  
93      public void testRawBom() throws Exception {
94          _testRawBomValid("UTF-8");
95          _testRawBomValid("UTF-16BE");
96          _testRawBomValid("UTF-16LE");
97          _testRawBomValid("UTF-16");
98  
99          _testRawBomInvalid("UTF-8-bom","US-ASCII","US-ASCII");
100         _testRawBomInvalid("UTF-8-bom","ISO-8859-1","ISO-8859-1");
101         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16");
102         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16BE");
103         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16LE");
104         _testRawBomInvalid("UTF-16BE-bom","UTF-16BE","UTF-16LE");
105         _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-16BE");
106         _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-8");
107     }
108 
109     public void testHttp() throws Exception {
110         _testHttpValid("application/xml","no-bom","US-ASCII",null);
111         _testHttpValid("application/xml","UTF-8-bom","US-ASCII",null);
112         _testHttpValid("application/xml","UTF-8-bom","UTF-8",null);
113         _testHttpValid("application/xml","UTF-8-bom","UTF-8","UTF-8");
114         _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
115         _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
116         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
117         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
118         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
119 
120         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
121         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
122         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
123         _testHttpInvalid("application/xml","UTF-8-bom","US-ASCII","US-ASCII");
124         _testHttpInvalid("application/xml;charset=UTF-16","UTF-16LE","UTF-8","UTF-8");
125         _testHttpInvalid("application/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
126 
127         _testHttpValid("text/xml","no-bom","US-ASCII",null);
128         _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
129         _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
130         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
131         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
132         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
133         _testHttpValid("text/xml","UTF-8-bom","US-ASCII",null);
134 
135         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
136         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
137         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
138         _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
139         _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE",null);
140 
141         _testHttpLenient("text/xml","no-bom","US-ASCII",null, "US-ASCII");
142         _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8", "UTF-8");
143         _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null, "UTF-8");
144         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
145         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
146         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
147         _testHttpLenient("text/xml","UTF-8-bom","US-ASCII",null, "US-ASCII");
148 
149         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
150         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
151         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
152         _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
153         _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE",null, "UTF-16");
154 
155         _testHttpLenient("text/html","no-bom","US-ASCII","US-ASCII", "US-ASCII");
156         _testHttpLenient("text/html","no-bom","US-ASCII",null, "US-ASCII");
157         _testHttpLenient("text/html;charset=UTF-8","no-bom","US-ASCII","UTF-8", "UTF-8");
158         _testHttpLenient("text/html;charset=UTF-16BE","no-bom","US-ASCII","UTF-8", "UTF-8");
159     }
160 
161     public void _testHttpValid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
162         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml":"xml-prolog-encoding",streamEnc,prologEnc);
163         XmlReader xmlReader = new XmlReader(is,cT,false);
164         if (!streamEnc.equals("UTF-16")) {
165             // we can not assert things here becuase UTF-8, US-ASCII and ISO-8859-1 look alike for the chars used for detection
166         }
167         else {
168             assertEquals(xmlReader.getEncoding().substring(0,streamEnc.length()),streamEnc);
169         }
170     }
171 
172     protected void _testHttpInvalid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
173         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml-prolog":"xml-prolog-encoding",streamEnc,prologEnc);
174         try {
175             XmlReader xmlReader = new XmlReader(is,cT,false);
176             fail("It should have failed for HTTP Content-type "+cT+", BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
177         }
178         catch (IOException ex) {
179             assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
180         }
181      }
182 
183     protected void _testHttpLenient(String cT, String bomEnc, String streamEnc, String prologEnc, String shouldbe) throws Exception {
184         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml-prolog":"xml-prolog-encoding",streamEnc,prologEnc);
185         XmlReader xmlReader = new XmlReader(is,cT,true);
186         assertEquals(xmlReader.getEncoding(),shouldbe);
187     }
188 
189     // XML Stream generator
190 
191     private static final int[] NO_BOM_BYTES = {};
192     private static final int[] UTF_16BE_BOM_BYTES = {0xFE,0xFF};
193     private static final int[] UTF_16LE_BOM_BYTES = {0xFF,0XFE};
194     private static final int[] UTF_8_BOM_BYTES = {0xEF,0xBB,0xBF};
195 
196     private static final Map BOMs = new HashMap();
197 
198     static {
199         BOMs.put("no-bom",NO_BOM_BYTES);
200         BOMs.put("UTF-16BE-bom",UTF_16BE_BOM_BYTES);
201         BOMs.put("UTF-16LE-bom",UTF_16LE_BOM_BYTES);
202         BOMs.put("UTF-16-bom",NO_BOM_BYTES); // it's added by the writer
203         BOMs.put("UTF-8-bom",UTF_8_BOM_BYTES);
204     }
205 
206     private static final MessageFormat XML = new MessageFormat(
207             "<root>{2}</root>");
208     private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
209             "<?xml version=\"1.0\"?>\n<root>{2}</root>");
210     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING = new MessageFormat(
211             "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
212 
213     private static final MessageFormat INFO = new MessageFormat(
214             "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
215 
216     private static final Map XMLs = new HashMap();
217 
218     static {
219         XMLs.put("xml",XML);
220         XMLs.put("xml-prolog",XML_WITH_PROLOG);
221         XMLs.put("xml-prolog-encoding",XML_WITH_PROLOG_AND_ENCODING);
222     }
223 
224     /***
225      *
226      * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
227      * @param xmlType xml, xml-prolog, xml-prolog-charset
228      * @return XML stream
229      */
230     protected InputStream getXmlStream(String bomType,String xmlType,String streamEnc,String prologEnc) throws IOException {
231         ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
232         int[] bom = (int[]) BOMs.get(bomType);
233         if (bom==null) {
234             bom = new int[0];
235         }
236         MessageFormat xml = (MessageFormat) XMLs.get(xmlType);
237         for (int i=0;i<bom.length;i++) {
238             baos.write(bom[i]);
239         }
240         Writer writer = new OutputStreamWriter(baos,streamEnc);
241         String info = INFO.format(new Object[]{bomType,xmlType,prologEnc});
242         String xmlDoc = xml.format(new Object[]{streamEnc,prologEnc,info});
243         writer.write(xmlDoc);
244 
245         // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
246         writer.write("<da>\n");
247         for (int i=0;i<10000;i++) {
248             writer.write("<do/>\n");
249         }
250         writer.write("</da>\n");
251 
252         writer.close();
253         return new ByteArrayInputStream(baos.toByteArray());
254     }
255 
256 
257 }