1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.unittest;
18  
19  import com.sun.syndication.io.XmlReader;
20  import junit.framework.TestCase;
21  
22  import java.io.*;
23  import java.text.MessageFormat;
24  import java.util.HashMap;
25  import java.util.Map;
26  
27  /***
28   * @author pat, tucu
29   *
30   */
31  public class TestXmlReader extends TestCase {
32  
33      public static void main(String[] args) throws Exception {
34          TestXmlReader test = new TestXmlReader();
35          test.testRawBom();
36          test.testRawNoBom();
37          test.testHttp();
38      }
39  
40      protected void _testRawNoBomValid(String encoding) throws Exception {
41          InputStream is = getXmlStream("no-bom","xml",encoding,encoding);
42          XmlReader xmlReader = new XmlReader(is);
43          assertEquals(xmlReader.getEncoding(),"UTF-8");
44  
45          is = getXmlStream("no-bom","xml-prolog",encoding,encoding);
46          xmlReader = new XmlReader(is);
47          assertEquals(xmlReader.getEncoding(),"UTF-8");
48  
49          is = getXmlStream("no-bom","xml-prolog-encoding",encoding,encoding);
50          xmlReader = new XmlReader(is);
51          assertEquals(xmlReader.getEncoding(),encoding);
52      }
53  
54      protected void _testRawNoBomInvalid(String encoding) throws Exception {
55          InputStream is = getXmlStream("no-bom","xml-prolog-encoding",encoding,encoding);
56          try {
57              XmlReader xmlReader = new XmlReader(is);
58              fail("It should have failed");
59          }
60          catch (IOException ex) {
61              assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
62          }
63       }
64  
65      public void testRawNoBom() throws Exception {
66          _testRawNoBomValid("US-ASCII");
67          _testRawNoBomValid("UTF-8");
68          _testRawNoBomValid("ISO-8859-1");
69      }
70  
71      protected void _testRawBomValid(String encoding) throws Exception {
72          InputStream is = getXmlStream(encoding+"-bom","xml-prolog-encoding",encoding,encoding);
73          XmlReader xmlReader = new XmlReader(is);
74          if (!encoding.equals("UTF-16")) {
75              assertEquals(xmlReader.getEncoding(),encoding);
76          }
77          else {
78              assertEquals(xmlReader.getEncoding().substring(0,encoding.length()),encoding);
79          }
80      }
81  
82      protected void _testRawBomInvalid(String bomEnc,String streamEnc,String prologEnc) throws Exception {
83          InputStream is = getXmlStream(bomEnc,"xml-prolog-encoding",streamEnc,prologEnc);
84          try {
85              XmlReader xmlReader = new XmlReader(is);
86              fail("It should have failed for BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
87          }
88          catch (IOException ex) {
89              assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
90          }
91       }
92  
93      public void testRawBom() throws Exception {
94          _testRawBomValid("UTF-8");
95          _testRawBomValid("UTF-16BE");
96          _testRawBomValid("UTF-16LE");
97          _testRawBomValid("UTF-16");
98  
99          _testRawBomInvalid("UTF-8-bom","US-ASCII","US-ASCII");
100         _testRawBomInvalid("UTF-8-bom","ISO-8859-1","ISO-8859-1");
101         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16");
102         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16BE");
103         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16LE");
104         _testRawBomInvalid("UTF-16BE-bom","UTF-16BE","UTF-16LE");
105         _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-16BE");
106         _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-8");
107     }
108 
109     public void testHttp() throws Exception {
110         _testHttpValid("application/xml","no-bom","US-ASCII",null);
111         _testHttpValid("application/xml","UTF-8-bom","US-ASCII",null);
112         _testHttpValid("application/xml","UTF-8-bom","UTF-8",null);
113         _testHttpValid("application/xml","UTF-8-bom","UTF-8","UTF-8");
114         _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
115         _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
116         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
117         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
118         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
119 
120         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
121         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
122         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
123         _testHttpInvalid("application/xml","UTF-8-bom","US-ASCII","US-ASCII");
124         _testHttpInvalid("application/xml;charset=UTF-16","UTF-16LE","UTF-8","UTF-8");
125         _testHttpInvalid("application/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
126 
127         _testHttpValid("text/xml","no-bom","US-ASCII",null);
128         _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
129         _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
130         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
131         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
132         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
133         _testHttpValid("text/xml","UTF-8-bom","US-ASCII",null);
134 
135         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
136         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
137         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
138         _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
139         _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE",null);
140     }
141 
142     public void _testHttpValid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
143         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml":"xml-prolog-encoding",streamEnc,prologEnc);
144         XmlReader xmlReader = new XmlReader(is,cT);
145         if (!streamEnc.equals("UTF-16")) {
146             // we can not assert things here becuase UTF-8, US-ASCII and ISO-8859-1 look alike for the chars used for detection
147         }
148         else {
149             assertEquals(xmlReader.getEncoding().substring(0,streamEnc.length()),streamEnc);
150         }
151     }
152 
153     protected void _testHttpInvalid(String cT,String bomEnc,String streamEnc,String prologEnc) throws Exception {
154         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml-prolog":"xml-prolog-encoding",streamEnc,prologEnc);
155         try {
156             XmlReader xmlReader = new XmlReader(is,cT);
157             fail("It should have failed for HTTP Content-type "+cT+", BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
158         }
159         catch (IOException ex) {
160             assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
161         }
162      }
163 
164     // XML Stream generator
165 
166     private static final int[] NO_BOM_BYTES = {};
167     private static final int[] UTF_16BE_BOM_BYTES = {0xFE,0xFF};
168     private static final int[] UTF_16LE_BOM_BYTES = {0xFF,0XFE};
169     private static final int[] UTF_8_BOM_BYTES = {0xEF,0xBB,0xBF};
170 
171     private static final Map BOMs = new HashMap();
172 
173     static {
174         BOMs.put("no-bom",NO_BOM_BYTES);
175         BOMs.put("UTF-16BE-bom",UTF_16BE_BOM_BYTES);
176         BOMs.put("UTF-16LE-bom",UTF_16LE_BOM_BYTES);
177         BOMs.put("UTF-16-bom",NO_BOM_BYTES); // it's added by the writer
178         BOMs.put("UTF-8-bom",UTF_8_BOM_BYTES);
179     }
180 
181     private static final MessageFormat XML = new MessageFormat(
182             "<root>{2}</root>");
183     private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
184             "<?xml version=\"1.0\"?>\n<root>{2}</root>");
185     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING = new MessageFormat(
186             "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
187 
188     private static final MessageFormat INFO = new MessageFormat(
189             "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
190 
191     private static final Map XMLs = new HashMap();
192 
193     static {
194         XMLs.put("xml",XML);
195         XMLs.put("xml-prolog",XML_WITH_PROLOG);
196         XMLs.put("xml-prolog-encoding",XML_WITH_PROLOG_AND_ENCODING);
197     }
198 
199     /***
200      *
201      * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
202      * @param xmlType xml, xml-prolog, xml-prolog-charset
203      * @return XML stream
204      */
205     protected InputStream getXmlStream(String bomType,String xmlType,String streamEnc,String prologEnc) throws IOException {
206         ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
207         int[] bom = (int[]) BOMs.get(bomType);
208         if (bom==null) {
209             bom = new int[0];
210         }
211         MessageFormat xml = (MessageFormat) XMLs.get(xmlType);
212         for (int i=0;i<bom.length;i++) {
213             baos.write(bom[i]);
214         }
215         Writer writer = new OutputStreamWriter(baos,streamEnc);
216         String info = INFO.format(new Object[]{bomType,xmlType,prologEnc});
217         String xmlDoc = xml.format(new Object[]{streamEnc,prologEnc,info});
218         writer.write(xmlDoc);
219         writer.close();
220         return new ByteArrayInputStream(baos.toByteArray());
221     }
222 
223 
224 }