1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.unittest;
18  
19  import com.sun.syndication.io.XmlReader;
20  import junit.framework.TestCase;
21  
22  import java.io.*;
23  import java.text.MessageFormat;
24  import java.util.HashMap;
25  import java.util.Map;
26  
27  /***
28   * @author pat, tucu
29   *
30   */
31  public class TestXmlReader extends TestCase {
32      private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
33      private static final String XML4 = "xml-prolog-encoding-single-quotes";
34      private static final String XML3 = "xml-prolog-encoding-double-quotes";
35      private static final String XML2 = "xml-prolog";
36      private static final String XML1 = "xml";
37  
38      public static void main(String[] args) throws Exception {
39          TestXmlReader test = new TestXmlReader();
40          test.testRawBom();
41          test.testRawNoBom();
42          test.testHttp();
43      }
44  
45      protected void _testRawNoBomValid(String encoding) throws Exception {
46          InputStream is = getXmlStream("no-bom",XML1,encoding,encoding);
47          XmlReader xmlReader = new XmlReader(is,false);
48          assertEquals(xmlReader.getEncoding(),"UTF-8");
49  
50          is = getXmlStream("no-bom",XML2,encoding,encoding);
51          xmlReader = new XmlReader(is);
52          assertEquals(xmlReader.getEncoding(),"UTF-8");
53  
54          is = getXmlStream("no-bom",XML3,encoding,encoding);
55          xmlReader = new XmlReader(is);
56          assertEquals(xmlReader.getEncoding(),encoding);
57  
58          is = getXmlStream("no-bom", XML4, encoding, encoding);
59          xmlReader = new XmlReader(is);
60          assertEquals(xmlReader.getEncoding(), encoding);
61  
62          is = getXmlStream("no-bom", XML5, encoding, encoding);
63          xmlReader = new XmlReader(is);
64          assertEquals(xmlReader.getEncoding(), encoding);
65      }
66  
67      protected void _testRawNoBomInvalid(String encoding) throws Exception {
68          InputStream is = getXmlStream("no-bom",XML3,encoding,encoding);
69          try {
70              XmlReader xmlReader = new XmlReader(is,false);
71              fail("It should have failed");
72          }
73          catch (IOException ex) {
74              assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
75          }
76       }
77  
78      public void testRawNoBom() throws Exception {
79          _testRawNoBomValid("US-ASCII");
80          _testRawNoBomValid("UTF-8");
81          _testRawNoBomValid("ISO-8859-1");
82      }
83  
84      protected void _testRawBomValid(String encoding) throws Exception {
85          InputStream is = getXmlStream(encoding+"-bom",XML3,encoding,encoding);
86          XmlReader xmlReader = new XmlReader(is,false);
87          if (!encoding.equals("UTF-16")) {
88              assertEquals(xmlReader.getEncoding(),encoding);
89          }
90          else {
91              assertEquals(xmlReader.getEncoding().substring(0,encoding.length()),encoding);
92          }
93      }
94  
95      protected void _testRawBomInvalid(String bomEnc,String streamEnc,String prologEnc) throws Exception {
96          InputStream is = getXmlStream(bomEnc,XML3,streamEnc,prologEnc);
97          try {
98              XmlReader xmlReader = new XmlReader(is,false);
99              fail("It should have failed for BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
100         }
101         catch (IOException ex) {
102             assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
103         }
104      }
105 
106     public void testRawBom() throws Exception {
107         _testRawBomValid("UTF-8");
108         _testRawBomValid("UTF-16BE");
109         _testRawBomValid("UTF-16LE");
110         _testRawBomValid("UTF-16");
111 
112         _testRawBomInvalid("UTF-8-bom","US-ASCII","US-ASCII");
113         _testRawBomInvalid("UTF-8-bom","ISO-8859-1","ISO-8859-1");
114         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16");
115         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16BE");
116         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16LE");
117         _testRawBomInvalid("UTF-16BE-bom","UTF-16BE","UTF-16LE");
118         _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-16BE");
119         _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-8");
120     }
121 
122     public void testHttp() throws Exception {
123         _testHttpValid("application/xml","no-bom","US-ASCII",null);
124         _testHttpValid("application/xml","UTF-8-bom","US-ASCII",null);
125         _testHttpValid("application/xml","UTF-8-bom","UTF-8",null);
126         _testHttpValid("application/xml","UTF-8-bom","UTF-8","UTF-8");
127         _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
128         _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
129         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
130         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
131         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
132 
133         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
134         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
135         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
136         _testHttpInvalid("application/xml","UTF-8-bom","US-ASCII","US-ASCII");
137         _testHttpInvalid("application/xml;charset=UTF-16","UTF-16LE","UTF-8","UTF-8");
138         _testHttpInvalid("application/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
139 
140         _testHttpValid("text/xml","no-bom","US-ASCII",null);
141         _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
142         _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
143         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
144         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
145         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
146         _testHttpValid("text/xml","UTF-8-bom","US-ASCII",null);
147 
148         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
149         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
150         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
151         _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
152         _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE",null);
153 
154         _testHttpLenient("text/xml","no-bom","US-ASCII",null, "US-ASCII");
155         _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8", "UTF-8");
156         _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null, "UTF-8");
157         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
158         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
159         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
160         _testHttpLenient("text/xml","UTF-8-bom","US-ASCII",null, "US-ASCII");
161 
162         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
163         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
164         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
165         _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
166         _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE",null, "UTF-16");
167 
168         _testHttpLenient("text/html","no-bom","US-ASCII","US-ASCII", "US-ASCII");
169         _testHttpLenient("text/html","no-bom","US-ASCII",null, "US-ASCII");
170         _testHttpLenient("text/html;charset=UTF-8","no-bom","US-ASCII","UTF-8", "UTF-8");
171         _testHttpLenient("text/html;charset=UTF-16BE","no-bom","US-ASCII","UTF-8", "UTF-8");
172     }
173 
174     public void _testHttpValid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
175         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML1 :XML3,streamEnc,prologEnc);
176         XmlReader xmlReader = new XmlReader(is,cT,false);
177         if (!streamEnc.equals("UTF-16")) {
178             // we can not assert things here becuase UTF-8, US-ASCII and ISO-8859-1 look alike for the chars used for detection
179         }
180         else {
181             assertEquals(xmlReader.getEncoding().substring(0,streamEnc.length()),streamEnc);
182         }
183     }
184 
185     protected void _testHttpInvalid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
186         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML2 :XML3,streamEnc,prologEnc);
187         try {
188             new XmlReader(is,cT,false);
189             fail("It should have failed for HTTP Content-type "+cT+", BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
190         }
191         catch (IOException ex) {
192             assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
193         }
194      }
195 
196     protected void _testHttpLenient(String cT, String bomEnc, String streamEnc, String prologEnc, String shouldbe) throws Exception {
197         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?XML2 :XML3,streamEnc,prologEnc);
198         XmlReader xmlReader = new XmlReader(is,cT,true);
199         assertEquals(xmlReader.getEncoding(),shouldbe);
200     }
201 
202     // XML Stream generator
203 
204     private static final int[] NO_BOM_BYTES = {};
205     private static final int[] UTF_16BE_BOM_BYTES = {0xFE,0xFF};
206     private static final int[] UTF_16LE_BOM_BYTES = {0xFF,0XFE};
207     private static final int[] UTF_8_BOM_BYTES = {0xEF,0xBB,0xBF};
208 
209     private static final Map BOMs = new HashMap();
210 
211     static {
212         BOMs.put("no-bom",NO_BOM_BYTES);
213         BOMs.put("UTF-16BE-bom",UTF_16BE_BOM_BYTES);
214         BOMs.put("UTF-16LE-bom",UTF_16LE_BOM_BYTES);
215         BOMs.put("UTF-16-bom",NO_BOM_BYTES); // it's added by the writer
216         BOMs.put("UTF-8-bom",UTF_8_BOM_BYTES);
217     }
218 
219     private static final MessageFormat XML = new MessageFormat(
220             "<root>{2}</root>");
221     private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
222             "<?xml version=\"1.0\"?>\n<root>{2}</root>");
223     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
224             "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
225     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
226             "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
227   private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
228             "<?xml version=\"1.0\" encoding =  \t \n \r''{1}''?>\n<root>{2}</root>");
229 
230   private static final MessageFormat INFO = new MessageFormat(
231             "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
232 
233     private static final Map XMLs = new HashMap();
234 
235     static {
236         XMLs.put(XML1, XML);
237         XMLs.put(XML2, XML_WITH_PROLOG);
238         XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
239         XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
240         XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
241     }
242 
243     /***
244      *
245      * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
246      * @param xmlType xml, xml-prolog, xml-prolog-charset
247      * @return XML stream
248      */
249     protected InputStream getXmlStream(String bomType,String xmlType,String streamEnc,String prologEnc) throws IOException {
250         ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
251         int[] bom = (int[]) BOMs.get(bomType);
252         if (bom==null) {
253             bom = new int[0];
254         }
255         MessageFormat xml = (MessageFormat) XMLs.get(xmlType);
256         for (int i=0;i<bom.length;i++) {
257             baos.write(bom[i]);
258         }
259         Writer writer = new OutputStreamWriter(baos,streamEnc);
260         String info = INFO.format(new Object[]{bomType,xmlType,prologEnc});
261         String xmlDoc = xml.format(new Object[]{streamEnc,prologEnc,info});
262         writer.write(xmlDoc);
263 
264         // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
265         writer.write("<da>\n");
266         for (int i=0;i<10000;i++) {
267             writer.write("<do/>\n");
268         }
269         writer.write("</da>\n");
270 
271         writer.close();
272         return new ByteArrayInputStream(baos.toByteArray());
273     }
274 
275 
276 }