View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.io;
18  
19  import java.io.*;
20  import java.net.URL;
21  import java.net.URLConnection;
22  import java.net.HttpURLConnection;
23  import java.util.regex.Pattern;
24  import java.util.regex.Matcher;
25  import java.text.MessageFormat;
26  
27  /***
28   * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out
29   * the charset encoding of the XML document within the stream.
30   * <p>
31   * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a
32   * character stream.
33   * <p>
34   * All this has to be done without consuming characters from the stream, if not the XML parser
35   * will not recognized the document as a valid XML. This is not 100% true, but it's close enough
36   * (UTF-8 BOM is not handled by all parsers right now, XmlReader handles it and things work in all
37   * parsers).
38   * <p>
39   * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and
40   * HTTP streams by offering a wide set of constructors.
41   * <P>
42   * By default the charset encoding detection is lenient, the constructor with the lenient flag
43   * can be used for an script (following HTTP MIME and XML specifications).
44   * All this is nicely explained by Mark Pilgrim in his blog,
45   * <a href="http://diveintomark.org/archives/2004/02/13/xml-media-types">
46   * Determining the character encoding of a feed</a>.
47   * <p>
48   * @author Alejandro Abdelnur
49   *
50   */
51  public class XmlReader extends Reader {
52      private static final int BUFFER_SIZE = 4096;
53  
54      private static final String UTF_8 = "UTF-8";
55      private static final String US_ASCII = "US-ASCII";
56      private static final String UTF_16BE = "UTF-16BE";
57      private static final String UTF_16LE = "UTF-16LE";
58      private static final String UTF_16 = "UTF-16";
59  
60      private static String _staticDefaultEncoding = null;
61  
62      private Reader _reader;
63      private String _encoding;
64      private String _defaultEncoding;
65  
66      /***
67       * Sets the default encoding to use if none is set in HTTP content-type,
68       * XML prolog and the rules based on content-type are not adequate.
69       * <p/>
70       * If it is set to NULL the content-type based rules are used.
71       * <p/>
72       * By default it is NULL.
73       * <p/>
74       *
75       * @param encoding charset encoding to default to.
76       */
77      public static void setDefaultEncoding(String encoding) {
78          _staticDefaultEncoding = encoding;
79      }
80  
81      /***
82       * Returns the default encoding to use if none is set in HTTP content-type,
83       * XML prolog and the rules based on content-type are not adequate.
84       * <p/>
85       * If it is NULL the content-type based rules are used.
86       * <p/>
87       *
88       * @return the default encoding to use.
89       */
90      public static String getDefaultEncoding() {
91          return _staticDefaultEncoding;
92      }
93  
94      /***
95       * Creates a Reader for a File.
96       * <p>
97       * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also
98       * missing defaults to UTF-8.
99       * <p>
100      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
101      * for details.
102      * <p>
103      * @param file File to create a Reader from.
104      * @throws IOException thrown if there is a problem reading the file.
105      *
106      */
107     public XmlReader(File file) throws IOException {
108         this(new FileInputStream(file));
109     }
110 
111     /***
112      * Creates a Reader for a raw InputStream.
113      * <p>
114      * It follows the same logic used for files.
115      * <p>
116      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
117      * for details.
118      * <p>
119      * @param is InputStream to create a Reader from.
120      * @throws IOException thrown if there is a problem reading the stream.
121      *
122      */
123     public XmlReader(InputStream is) throws IOException {
124         this(is,true);
125     }
126 
127     /***
128      * Creates a Reader for a raw InputStream.
129      * <p>
130      * It follows the same logic used for files.
131      * <p>
132      * If lenient detection is indicated and the detection above fails as per specifications it then attempts
133      * the following:
134      * <p>
135      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
136      * <p>
137      * Else if the XML prolog had a charset encoding that encoding is used.
138      * <p>
139      * Else if the content type had a charset encoding that encoding is used.
140      * <p>
141      * Else 'UTF-8' is used.
142      * <p>
143      * If lenient detection is indicated an XmlReaderException is never thrown.
144      * <p>
145      * @param is InputStream to create a Reader from.
146      * @param lenient indicates if the charset encoding detection should be relaxed.
147      * @throws IOException thrown if there is a problem reading the stream.
148      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
149      *
150      */
151     public XmlReader(InputStream is,boolean lenient) throws IOException, XmlReaderException {
152         _defaultEncoding = _staticDefaultEncoding;
153         try {
154             doRawStream(is,lenient);
155         }
156         catch (XmlReaderException ex) {
157             if (!lenient) {
158                 throw ex;
159             }
160             else {
161                 doLenientDetection(null,ex);
162             }
163         }
164     }
165 
166     /***
167      * Creates a Reader using the InputStream of a URL.
168      * <p>
169      * If the URL is not of type HTTP and there is not 'content-type' header in the fetched
170      * data it uses the same logic used for Files.
171      * <p>
172      * If the URL is a HTTP Url or there is a 'content-type' header in the fetched
173      * data it uses the same logic used for an InputStream with content-type.
174      * <p>
175      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
176      * for details.
177      * <p>
178      * @param url URL to create a Reader from.
179      * @throws IOException thrown if there is a problem reading the stream of the URL.
180      *
181      */
182     public XmlReader(URL url) throws IOException {
183         this(url.openConnection());
184     }
185 
186     /***
187      * Creates a Reader using the InputStream of a URLConnection.
188      * <p>
189      * If the URLConnection is not of type HttpURLConnection and there is not
190      * 'content-type' header in the fetched data it uses the same logic used for files.
191      * <p>
192      * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched
193      * data it uses the same logic used for an InputStream with content-type.
194      * <p>
195      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
196      * for details.
197      * <p>
198      * @param conn URLConnection to create a Reader from.
199      * @throws IOException thrown if there is a problem reading the stream of the URLConnection.
200      *
201      */
202     public XmlReader(URLConnection conn) throws IOException {
203         _defaultEncoding = _staticDefaultEncoding;
204         boolean lenient = true;
205         if (conn instanceof HttpURLConnection) {
206             try {
207                 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
208             }
209             catch (XmlReaderException ex) {
210                 doLenientDetection(conn.getContentType(),ex);
211             }
212         }
213         else
214         if (conn.getContentType()!=null) {
215             try {
216                 doHttpStream(conn.getInputStream(),conn.getContentType(),lenient);
217             }
218             catch (XmlReaderException ex) {
219                 doLenientDetection(conn.getContentType(),ex);
220             }
221         }
222         else {
223             try {
224                 doRawStream(conn.getInputStream(),lenient);
225             }
226             catch (XmlReaderException ex) {
227                 doLenientDetection(null,ex);
228             }
229         }
230     }
231 
232     /***
233      * Creates a Reader using an InputStream an the associated content-type header.
234      * <p>
235      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
236      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
237      * prolog encoding uses the default encoding mandated by the content-type MIME type.
238      * <p>
239      * It does a lenient charset encoding detection, check the constructor with the lenient parameter
240      * for details.
241      * <p>
242      * @param is InputStream to create the reader from.
243      * @param httpContentType content-type header to use for the resolution of the charset encoding.
244      * @throws IOException thrown if there is a problem reading the file.
245      *
246      */
247     public XmlReader(InputStream is,String httpContentType) throws IOException {
248         this(is,httpContentType,true);
249     }
250 
251     /***
252      * Creates a Reader using an InputStream an the associated content-type header. This constructor is
253      * lenient regarding the encoding detection.
254      * <p>
255      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
256      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
257      * prolog encoding uses the default encoding mandated by the content-type MIME type.
258      * <p>
259      * If lenient detection is indicated and the detection above fails as per specifications it then attempts
260      * the following:
261      * <p>
262      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
263      * <p>
264      * Else if the XML prolog had a charset encoding that encoding is used.
265      * <p>
266      * Else if the content type had a charset encoding that encoding is used.
267      * <p>
268      * Else 'UTF-8' is used.
269      * <p>
270      * If lenient detection is indicated an XmlReaderException is never thrown.
271      * <p>
272      * @param is InputStream to create the reader from.
273      * @param httpContentType content-type header to use for the resolution of the charset encoding.
274      * @param lenient indicates if the charset encoding detection should be relaxed.
275      * @throws IOException thrown if there is a problem reading the file.
276      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
277      *
278      */
279     public XmlReader(InputStream is,String httpContentType,boolean lenient, String defaultEncoding)
280         throws IOException, XmlReaderException {
281         _defaultEncoding = (defaultEncoding == null) ? _staticDefaultEncoding : defaultEncoding;
282         try {
283             doHttpStream(is,httpContentType,lenient);
284         }
285         catch (XmlReaderException ex) {
286             if (!lenient) {
287                 throw ex;
288             }
289             else {
290                 doLenientDetection(httpContentType,ex);
291             }
292         }
293     }
294 
295     /***
296      * Creates a Reader using an InputStream an the associated content-type header. This constructor is
297      * lenient regarding the encoding detection.
298      * <p>
299      * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
300      * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
301      * prolog encoding uses the default encoding mandated by the content-type MIME type.
302      * <p>
303      * If lenient detection is indicated and the detection above fails as per specifications it then attempts
304      * the following:
305      * <p>
306      * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
307      * <p>
308      * Else if the XML prolog had a charset encoding that encoding is used.
309      * <p>
310      * Else if the content type had a charset encoding that encoding is used.
311      * <p>
312      * Else 'UTF-8' is used.
313      * <p>
314      * If lenient detection is indicated an XmlReaderException is never thrown.
315      * <p>
316      * @param is InputStream to create the reader from.
317      * @param httpContentType content-type header to use for the resolution of the charset encoding.
318      * @param lenient indicates if the charset encoding detection should be relaxed.
319      * @throws IOException thrown if there is a problem reading the file.
320      * @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
321      *
322      */
323     public XmlReader(InputStream is, String httpContentType, boolean lenient)
324         throws IOException, XmlReaderException {
325         this(is, httpContentType, lenient, null);
326     }
327 
328     private void doLenientDetection(String httpContentType,XmlReaderException ex) throws IOException {
329         if (httpContentType!=null) {
330             if (httpContentType.startsWith("text/html")) {
331                 httpContentType = httpContentType.substring("text/html".length());
332                 httpContentType = "text/xml" + httpContentType;
333                 try {
334                     doHttpStream(ex.getInputStream(),httpContentType,true);
335                     ex = null;
336                 }
337                 catch (XmlReaderException ex2) {
338                     ex = ex2;
339                 }
340             }
341         }
342         if (ex!=null) {
343             String encoding = ex.getXmlEncoding();
344             if (encoding==null) {
345                 encoding = ex.getContentTypeEncoding();
346             }
347             if (encoding==null) {
348               encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
349             }
350             prepareReader(ex.getInputStream(),encoding);
351         }
352     }
353 
354     /***
355      * Returns the charset encoding of the XmlReader.
356      * <p>
357      * @return charset encoding.
358      *
359      */
360     public String getEncoding() {
361         return _encoding;
362     }
363 
364     public int read(char[] buf,int offset,int len) throws IOException {
365         return _reader.read(buf,offset,len);
366     }
367 
368     /***
369      * Closes the XmlReader stream.
370      * <p>
371      * @throws IOException thrown if there was a problem closing the stream.
372      *
373      */
374     public void close() throws IOException {
375         _reader.close();
376     }
377 
378     private void doRawStream(InputStream is,boolean lenient) throws IOException {
379         BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
380         String bomEnc = getBOMEncoding(pis);
381         String xmlGuessEnc =  getXMLGuessEncoding(pis);
382         String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
383         String encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, pis);
384         prepareReader(pis,encoding);
385     }
386 
387     private void doHttpStream(InputStream is,String httpContentType,boolean lenient) throws IOException {
388         BufferedInputStream pis = new BufferedInputStream(is, BUFFER_SIZE);
389         String cTMime = getContentTypeMime(httpContentType);
390         String cTEnc  = getContentTypeEncoding(httpContentType);
391         String bomEnc = getBOMEncoding(pis);
392         String xmlGuessEnc =  getXMLGuessEncoding(pis);
393         String xmlEnc = getXmlProlog(pis,xmlGuessEnc);
394         String encoding = calculateHttpEncoding(cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc, pis,lenient);
395         prepareReader(pis,encoding);
396     }
397 
398     private void prepareReader(InputStream is,String encoding) throws IOException {
399         _reader = new InputStreamReader(is,encoding);
400         _encoding = encoding;
401     }
402 
403     // InputStream is passed for XmlReaderException creation only
404     private String calculateRawEncoding(String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is) throws IOException {
405         String encoding;
406         if (bomEnc==null) {
407             if (xmlGuessEnc==null || xmlEnc==null) {
408                 encoding = (_defaultEncoding == null) ? UTF_8 : _defaultEncoding;
409             }
410             else
411             if (xmlEnc.equals(UTF_16) && (xmlGuessEnc.equals(UTF_16BE) || xmlGuessEnc.equals(UTF_16LE))) {
412                 encoding = xmlGuessEnc;
413             }
414             else {
415                 encoding = xmlEnc;
416             }
417         }
418         else
419         if (bomEnc.equals(UTF_8)) {
420             if (xmlGuessEnc!=null && !xmlGuessEnc.equals(UTF_8)) {
421                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
422                                              bomEnc,xmlGuessEnc,xmlEnc,is);
423             }
424             if (xmlEnc!=null && !xmlEnc.equals(UTF_8)) {
425                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
426                                              bomEnc,xmlGuessEnc,xmlEnc,is);
427             }
428             encoding = UTF_8;
429         }
430         else
431         if (bomEnc.equals(UTF_16BE) || bomEnc.equals(UTF_16LE)) {
432             if (xmlGuessEnc!=null && !xmlGuessEnc.equals(bomEnc)) {
433                 throw new IOException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}));
434             }
435             if (xmlEnc!=null && !xmlEnc.equals(UTF_16) && !xmlEnc.equals(bomEnc)) {
436                 throw new XmlReaderException(RAW_EX_1.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
437                                              bomEnc,xmlGuessEnc,xmlEnc,is);
438             }
439             encoding =bomEnc;
440         }
441         else {
442             throw new XmlReaderException(RAW_EX_2.format(new Object[]{bomEnc,xmlGuessEnc,xmlEnc}),
443                                          bomEnc,xmlGuessEnc,xmlEnc,is);
444         }
445         return encoding;
446     }
447 
448     // InputStream is passed for XmlReaderException creation only
449     private String calculateHttpEncoding(String cTMime, String cTEnc, String bomEnc, String xmlGuessEnc, String xmlEnc, InputStream is,boolean lenient) throws IOException {
450         String encoding;
451         if (lenient & xmlEnc!=null) {
452             encoding = xmlEnc;
453         }
454         else {
455             boolean appXml = isAppXml(cTMime);
456             boolean textXml = isTextXml(cTMime);
457             if (appXml || textXml) {
458                 if (cTEnc==null) {
459                     if (appXml) {
460                         encoding = calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc, is);
461                     }
462                     else {
463                         encoding = (_defaultEncoding == null) ? US_ASCII : _defaultEncoding;
464                     }
465                 }
466                 else
467                 if (bomEnc!=null && (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE))) {
468                     throw new XmlReaderException(HTTP_EX_1.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
469                                                  cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
470                 }
471                 else
472                 if (cTEnc.equals(UTF_16)) {
473                     if (bomEnc!=null && bomEnc.startsWith(UTF_16)) {
474                         encoding = bomEnc;
475                     }
476                     else {
477                         throw new XmlReaderException(HTTP_EX_2.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
478                                                      cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
479                     }
480                 }
481                 else {
482                     encoding = cTEnc;
483                 }
484             }
485             else {
486                 throw new XmlReaderException(HTTP_EX_3.format(new Object[]{cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc}),
487                                              cTMime,cTEnc,bomEnc,xmlGuessEnc,xmlEnc,is);
488             }
489         }
490         return encoding;
491     }
492 
493     // returns MIME type or NULL if httpContentType is NULL
494     private static String getContentTypeMime(String httpContentType) {
495         String mime = null;
496         if (httpContentType!=null) {
497             int i = httpContentType.indexOf(";");
498             mime = ((i==-1) ? httpContentType : httpContentType.substring(0,i)).trim();
499         }
500         return mime;
501     }
502 
503     private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=([.[^; ]]*)");
504 
505     // returns charset parameter value, NULL if not present, NULL if httpContentType is NULL
506     private static String getContentTypeEncoding(String httpContentType) {
507         String encoding = null;
508         if (httpContentType!=null) {
509             int i = httpContentType.indexOf(";");
510             if (i>-1) {
511                 String postMime = httpContentType.substring(i+1);
512                 Matcher m = CHARSET_PATTERN.matcher(postMime);
513                 encoding = (m.find()) ? m.group(1) : null;
514                 encoding = (encoding!=null) ? encoding.toUpperCase() : null;
515             }
516             if (encoding != null &&
517                     ((encoding.startsWith("\"") && encoding.endsWith("\"")) ||
518                      (encoding.startsWith("'") && encoding.endsWith("'"))
519                     )) {
520                 encoding = encoding.substring(1, encoding.length() - 1);
521             }
522         }
523         return encoding;
524     }
525 
526     // returns the BOM in the stream, NULL if not present,
527     // if there was BOM the in the stream it is consumed
528     private static String getBOMEncoding(BufferedInputStream is) throws IOException {
529         String encoding = null;
530         int[] bytes = new int[3];
531         is.mark(3);
532         bytes[0] = is.read();
533         bytes[1] = is.read();
534         bytes[2] = is.read();
535 
536         if (bytes[0] == 0xFE && bytes[1] == 0xFF) {
537             encoding = UTF_16BE;
538             is.reset();
539             is.read();
540             is.read();
541         }
542         else
543         if (bytes[0] == 0xFF && bytes[1] == 0xFE) {
544             encoding = UTF_16LE;
545             is.reset();
546             is.read();
547             is.read();
548         }
549         else
550         if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
551             encoding = UTF_8;
552         }
553         else {
554             is.reset();
555         }
556         return encoding;
557     }
558 
559     // returns the best guess for the encoding by looking the first bytes of the stream, '<?'
560     private static String getXMLGuessEncoding(BufferedInputStream is) throws IOException {
561         String encoding = null;
562         int[] bytes = new int[4];
563         is.mark(4);
564         bytes[0] = is.read();
565         bytes[1] = is.read();
566         bytes[2] = is.read();
567         bytes[3] = is.read();
568         is.reset();
569 
570         if (bytes[0] == 0x00 && bytes[1] == 0x3C && bytes[2] == 0x00 && bytes[3] == 0x3F) {
571                 encoding = UTF_16BE;
572         }
573         else
574         if (bytes[0] == 0x3C && bytes[1] == 0x00 && bytes[2] == 0x3F && bytes[3] == 0x00) {
575                 encoding = UTF_16LE;
576         }
577         else
578         if (bytes[0] == 0x3C && bytes[1] == 0x3F && bytes[2] == 0x78 && bytes[3] == 0x6D) {
579             encoding = UTF_8;
580         }
581         return encoding;
582     }
583 
584 
585     private static final Pattern ENCODING_PATTERN =
586         Pattern.compile("<//?xml.*encoding[//s]*=[//s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
587 
588     // returns the encoding declared in the <?xml encoding=...?>,  NULL if none
589     private static String getXmlProlog(BufferedInputStream is,String guessedEnc) throws IOException {
590         String encoding = null;
591         if (guessedEnc!=null) {
592             byte[] bytes = new byte[BUFFER_SIZE];
593             is.mark(BUFFER_SIZE);
594             int offset = 0;
595             int max = BUFFER_SIZE;
596             int c = is.read(bytes,offset,max);
597             int firstGT = -1;
598             while (c!=-1 && firstGT==-1 && offset< BUFFER_SIZE) {
599                 offset += c;
600                 max -= c;
601                 c = is.read(bytes,offset,max);
602                 firstGT = new String(bytes, 0, offset).indexOf(">");
603             }
604             if (firstGT == -1) {
605                 if (c == -1) {
606                     throw new IOException("Unexpected end of XML stream");
607                 }
608                 else {
609                     throw new IOException("XML prolog or ROOT element not found on first " + offset + " bytes");
610                 }
611             }
612             int bytesRead = offset;
613             if (bytesRead>0) {
614                 is.reset();
615                 Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes,0,firstGT + 1), guessedEnc);
616                 BufferedReader bReader = new BufferedReader(reader);
617                 StringBuffer prolog = new StringBuffer();
618                 String line = bReader.readLine();
619                 while (line != null) {
620                     prolog.append(line);
621                     line = bReader.readLine();
622                 }
623                 Matcher m = ENCODING_PATTERN.matcher(prolog);
624                 if (m.find()) {
625                     encoding = m.group(1).toUpperCase();
626                     encoding = encoding.substring(1,encoding.length()-1);
627                 }
628             }
629         }
630         return encoding;
631     }
632 
633     // indicates if the MIME type belongs to the APPLICATION XML family
634     private static boolean isAppXml(String mime) {
635         return mime!=null &&
636                (mime.equals("application/xml") ||
637                 mime.equals("application/xml-dtd") ||
638                 mime.equals("application/xml-external-parsed-entity") ||
639                 (mime.startsWith("application/") && mime.endsWith("+xml")));
640     }
641 
642     // indicates if the MIME type belongs to the TEXT XML family
643     private static boolean isTextXml(String mime) {
644         return mime!=null &&
645                (mime.equals("text/xml") ||
646                 mime.equals("text/xml-external-parsed-entity") ||
647                 (mime.startsWith("text/") && mime.endsWith("+xml")));
648     }
649 
650     private static final MessageFormat RAW_EX_1 = new MessageFormat(
651             "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch");
652 
653     private static final MessageFormat RAW_EX_2 = new MessageFormat(
654             "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM");
655 
656     private static final MessageFormat HTTP_EX_1 = new MessageFormat(
657             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL");
658 
659     private static final MessageFormat HTTP_EX_2 = new MessageFormat(
660             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch");
661 
662     private static final MessageFormat HTTP_EX_3 = new MessageFormat(
663             "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME");
664 
665 }