Formatted and cleaned up sources

This commit is contained in:
Patrick Gotthard 2014-05-13 19:28:08 +02:00
parent 6eca421a93
commit ed8691df13
93 changed files with 1089 additions and 1055 deletions

View file

@ -34,18 +34,25 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
* Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out the charset encoding of the XML document within the stream. * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out the
* charset encoding of the XML document within the stream.
* <p> * <p>
* IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a character stream. * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a
* character stream.
* <p> * <p>
* All this has to be done without consuming characters from the stream, if not the XML parser will not recognized the document as a valid XML. This is not 100% * All this has to be done without consuming characters from the stream, if not the XML parser will
* true, but it's close enough (UTF-8 BOM is not handled by all parsers right now, XmlReader handles it and things work in all parsers). * not recognized the document as a valid XML. This is not 100% true, but it's close enough (UTF-8
* BOM is not handled by all parsers right now, XmlReader handles it and things work in all
* parsers).
* <p> * <p>
* The XmlReader class handles the charset encoding of XML documents in Files, raw streams and HTTP streams by offering a wide set of constructors. * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and HTTP
* streams by offering a wide set of constructors.
* <P> * <P>
* By default the charset encoding detection is lenient, the constructor with the lenient flag can be used for an script (following HTTP MIME and XML * By default the charset encoding detection is lenient, the constructor with the lenient flag can
* specifications). All this is nicely explained by Mark Pilgrim in his blog, <a href="http://diveintomark.org/archives/2004/02/13/xml-media-types"> Determining * be used for an script (following HTTP MIME and XML specifications). All this is nicely explained
* the character encoding of a feed</a>. * by Mark Pilgrim in his blog, <a
* href="http://diveintomark.org/archives/2004/02/13/xml-media-types"> Determining the character
* encoding of a feed</a>.
* <p> * <p>
* *
* @author Alejandro Abdelnur * @author Alejandro Abdelnur
@ -80,9 +87,11 @@ public class XmlReader extends Reader {
/** /**
* Creates a Reader for a File. * Creates a Reader for a File.
* <p> * <p>
* It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also missing defaults to UTF-8. * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also
* missing defaults to UTF-8.
* <p> * <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. * It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p> * <p>
* *
* @param file File to create a Reader from. * @param file File to create a Reader from.
@ -98,7 +107,8 @@ public class XmlReader extends Reader {
* <p> * <p>
* It follows the same logic used for files. * It follows the same logic used for files.
* <p> * <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. * It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p> * <p>
* *
* @param is InputStream to create a Reader from. * @param is InputStream to create a Reader from.
@ -110,13 +120,16 @@ public class XmlReader extends Reader {
} }
/** /**
* Creates a Reader for a raw InputStream and uses the provided default encoding if none is determined. * Creates a Reader for a raw InputStream and uses the provided default encoding if none is
* determined.
* <p> * <p>
* It follows the same logic used for files. * It follows the same logic used for files.
* <p> * <p>
* If lenient detection is indicated and the detection above fails as per specifications it then attempts the following: * If lenient detection is indicated and the detection above fails as per specifications it then
* attempts the following:
* <p> * <p>
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection
* again.
* <p> * <p>
* Else if the XML prolog had a charset encoding that encoding is used. * Else if the XML prolog had a charset encoding that encoding is used.
* <p> * <p>
@ -131,7 +144,8 @@ public class XmlReader extends Reader {
* @param lenient indicates if the charset encoding detection should be relaxed. * @param lenient indicates if the charset encoding detection should be relaxed.
* @param defaultEncoding default encoding to use if one cannot be detected. * @param defaultEncoding default encoding to use if one cannot be detected.
* @throws IOException thrown if there is a problem reading the stream. * @throws IOException thrown if there is a problem reading the stream.
* @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs. * @throws XmlReaderException thrown if the charset encoding could not be determined according
* to the specs.
* *
*/ */
public XmlReader(final InputStream is, final boolean lenient, final String defaultEncoding) throws IOException, XmlReaderException { public XmlReader(final InputStream is, final boolean lenient, final String defaultEncoding) throws IOException, XmlReaderException {
@ -156,9 +170,11 @@ public class XmlReader extends Reader {
* <p> * <p>
* It follows the same logic used for files. * It follows the same logic used for files.
* <p> * <p>
* If lenient detection is indicated and the detection above fails as per specifications it then attempts the following: * If lenient detection is indicated and the detection above fails as per specifications it then
* attempts the following:
* <p> * <p>
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection
* again.
* <p> * <p>
* Else if the XML prolog had a charset encoding that encoding is used. * Else if the XML prolog had a charset encoding that encoding is used.
* <p> * <p>
@ -172,7 +188,8 @@ public class XmlReader extends Reader {
* @param is InputStream to create a Reader from. * @param is InputStream to create a Reader from.
* @param lenient indicates if the charset encoding detection should be relaxed. * @param lenient indicates if the charset encoding detection should be relaxed.
* @throws IOException thrown if there is a problem reading the stream. * @throws IOException thrown if there is a problem reading the stream.
* @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs. * @throws XmlReaderException thrown if the charset encoding could not be determined according
* to the specs.
* *
*/ */
public XmlReader(final InputStream is, final boolean lenient) throws IOException, XmlReaderException { public XmlReader(final InputStream is, final boolean lenient) throws IOException, XmlReaderException {
@ -182,11 +199,14 @@ public class XmlReader extends Reader {
/** /**
* Creates a Reader using the InputStream of a URL. * Creates a Reader using the InputStream of a URL.
* <p> * <p>
* If the URL is not of type HTTP and there is not 'content-type' header in the fetched data it uses the same logic used for Files. * If the URL is not of type HTTP and there is not 'content-type' header in the fetched data it
* uses the same logic used for Files.
* <p> * <p>
* If the URL is a HTTP Url or there is a 'content-type' header in the fetched data it uses the same logic used for an InputStream with content-type. * If the URL is a HTTP Url or there is a 'content-type' header in the fetched data it uses the
* same logic used for an InputStream with content-type.
* <p> * <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. * It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p> * <p>
* *
* @param url URL to create a Reader from. * @param url URL to create a Reader from.
@ -200,12 +220,14 @@ public class XmlReader extends Reader {
/** /**
* Creates a Reader using the InputStream of a URLConnection. * Creates a Reader using the InputStream of a URLConnection.
* <p> * <p>
* If the URLConnection is not of type HttpURLConnection and there is not 'content-type' header in the fetched data it uses the same logic used for files. * If the URLConnection is not of type HttpURLConnection and there is not 'content-type' header
* in the fetched data it uses the same logic used for files.
* <p> * <p>
* If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched data it uses the same logic used for an InputStream with * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched data it
* content-type. * uses the same logic used for an InputStream with content-type.
* <p> * <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. * It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p> * <p>
* *
* @param conn URLConnection to create a Reader from. * @param conn URLConnection to create a Reader from.
@ -245,10 +267,12 @@ public class XmlReader extends Reader {
/** /**
* Creates a Reader using an InputStream and the associated content-type header. * Creates a Reader using an InputStream and the associated content-type header.
* <p> * <p>
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not content-type encoding checks the XML prolog * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
* encoding. If there is not XML prolog encoding uses the default encoding mandated by the content-type MIME type. * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
* prolog encoding uses the default encoding mandated by the content-type MIME type.
* <p> * <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. * It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p> * <p>
* *
* @param is InputStream to create the reader from. * @param is InputStream to create the reader from.
@ -263,12 +287,15 @@ public class XmlReader extends Reader {
/** /**
* Creates a Reader using an InputStream and the associated content-type header. * Creates a Reader using an InputStream and the associated content-type header.
* <p> * <p>
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not content-type encoding checks the XML prolog * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
* encoding. If there is not XML prolog encoding uses the default encoding mandated by the content-type MIME type. * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
* prolog encoding uses the default encoding mandated by the content-type MIME type.
* <p> * <p>
* If lenient detection is indicated and the detection above fails as per specifications it then attempts the following: * If lenient detection is indicated and the detection above fails as per specifications it then
* attempts the following:
* <p> * <p>
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection
* again.
* <p> * <p>
* Else if the XML prolog had a charset encoding that encoding is used. * Else if the XML prolog had a charset encoding that encoding is used.
* <p> * <p>
@ -284,7 +311,8 @@ public class XmlReader extends Reader {
* @param lenient indicates if the charset encoding detection should be relaxed. * @param lenient indicates if the charset encoding detection should be relaxed.
* @param defaultEncoding default encoding to use if one cannot be detected. * @param defaultEncoding default encoding to use if one cannot be detected.
* @throws IOException thrown if there is a problem reading the file. * @throws IOException thrown if there is a problem reading the file.
* @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs. * @throws XmlReaderException thrown if the charset encoding could not be determined according
* to the specs.
* *
*/ */
public XmlReader(final InputStream is, final String httpContentType, final boolean lenient, final String defaultEncoding) throws IOException, public XmlReader(final InputStream is, final String httpContentType, final boolean lenient, final String defaultEncoding) throws IOException,
@ -308,12 +336,15 @@ public class XmlReader extends Reader {
/** /**
* Creates a Reader using an InputStream and the associated content-type header. * Creates a Reader using an InputStream and the associated content-type header.
* <p> * <p>
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not content-type encoding checks the XML prolog * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
* encoding. If there is not XML prolog encoding uses the default encoding mandated by the content-type MIME type. * If there is not content-type encoding checks the XML prolog encoding. If there is not XML
* prolog encoding uses the default encoding mandated by the content-type MIME type.
* <p> * <p>
* If lenient detection is indicated and the detection above fails as per specifications it then attempts the following: * If lenient detection is indicated and the detection above fails as per specifications it then
* attempts the following:
* <p> * <p>
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection
* again.
* <p> * <p>
* Else if the XML prolog had a charset encoding that encoding is used. * Else if the XML prolog had a charset encoding that encoding is used.
* <p> * <p>
@ -328,7 +359,8 @@ public class XmlReader extends Reader {
* @param httpContentType content-type header to use for the resolution of the charset encoding. * @param httpContentType content-type header to use for the resolution of the charset encoding.
* @param lenient indicates if the charset encoding detection should be relaxed. * @param lenient indicates if the charset encoding detection should be relaxed.
* @throws IOException thrown if there is a problem reading the file. * @throws IOException thrown if there is a problem reading the file.
* @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs. * @throws XmlReaderException thrown if the charset encoding could not be determined according
* to the specs.
* *
*/ */
public XmlReader(final InputStream is, final String httpContentType, final boolean lenient) throws IOException, XmlReaderException { public XmlReader(final InputStream is, final String httpContentType, final boolean lenient) throws IOException, XmlReaderException {
@ -336,7 +368,8 @@ public class XmlReader extends Reader {
} }
/** /**
* Returns the default encoding to use if none is set in HTTP content-type, XML prolog and the rules based on content-type are not adequate. * Returns the default encoding to use if none is set in HTTP content-type, XML prolog and the
* rules based on content-type are not adequate.
* <p/> * <p/>
* If it is NULL the content-type based rules are used. * If it is NULL the content-type based rules are used.
* <p/> * <p/>
@ -348,7 +381,8 @@ public class XmlReader extends Reader {
} }
/** /**
* Sets the default encoding to use if none is set in HTTP content-type, XML prolog and the rules based on content-type are not adequate. * Sets the default encoding to use if none is set in HTTP content-type, XML prolog and the
* rules based on content-type are not adequate.
* <p/> * <p/>
* If it is set to NULL the content-type based rules are used. * If it is set to NULL the content-type based rules are used.
* <p/> * <p/>

View file

@ -689,7 +689,7 @@ public class Atom10Parser extends BaseWireFeedParser {
* Parse entry from reader. * Parse entry from reader.
*/ */
public static Entry parseEntry(final Reader rd, final String baseURI, final Locale locale) throws JDOMException, IOException, IllegalArgumentException, public static Entry parseEntry(final Reader rd, final String baseURI, final Locale locale) throws JDOMException, IOException, IllegalArgumentException,
FeedException { FeedException {
// Parse entry into JDOM tree // Parse entry into JDOM tree
final SAXBuilder builder = new SAXBuilder(); final SAXBuilder builder = new SAXBuilder();

View file

@ -48,17 +48,17 @@ public class DateParser {
// parse a valid date out of a substring of the full string given the mask so we have to check // parse a valid date out of a substring of the full string given the mask so we have to check
// the most complete format first, then it fails with exception // the most complete format first, then it fails with exception
private static final String[] W3CDATETIME_MASKS = { "yyyy-MM-dd'T'HH:mm:ss.SSSz", "yyyy-MM-dd't'HH:mm:ss.SSSz", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", private static final String[] W3CDATETIME_MASKS = { "yyyy-MM-dd'T'HH:mm:ss.SSSz", "yyyy-MM-dd't'HH:mm:ss.SSSz", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
"yyyy-MM-dd't'HH:mm:ss.SSS'z'", "yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", "yyyy-MM-dd'T'HH:mm:ssZ", "yyyy-MM-dd't'HH:mm:ssZ", "yyyy-MM-dd't'HH:mm:ss.SSS'z'", "yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", "yyyy-MM-dd'T'HH:mm:ssZ", "yyyy-MM-dd't'HH:mm:ssZ",
"yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", "yyyy-MM-dd'T'HH:mmz", // together "yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", "yyyy-MM-dd'T'HH:mmz", // together
// with // with
// logic // logic
// in // in
// the // the
// parseW3CDateTime // parseW3CDateTime
// they // they
"yyyy-MM'T'HH:mmz", // handle W3C dates without time forcing them to "yyyy-MM'T'HH:mmz", // handle W3C dates without time forcing them to
// be GMT // be GMT
"yyyy'T'HH:mmz", "yyyy-MM-dd't'HH:mmz", "yyyy-MM-dd'T'HH:mm'Z'", "yyyy-MM-dd't'HH:mm'z'", "yyyy-MM-dd", "yyyy-MM", "yyyy" }; "yyyy'T'HH:mmz", "yyyy-MM-dd't'HH:mmz", "yyyy-MM-dd'T'HH:mm'Z'", "yyyy-MM-dd't'HH:mm'z'", "yyyy-MM-dd", "yyyy-MM", "yyyy" };
/** /**
* The masks used to validate and parse the input to this Atom date. These are a lot more * The masks used to validate and parse the input to this Atom date. These are a lot more
@ -67,14 +67,14 @@ public class DateParser {
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
private static final String[] masks = { "yyyy-MM-dd'T'HH:mm:ss.SSSz", "yyyy-MM-dd't'HH:mm:ss.SSSz", // invalid private static final String[] masks = { "yyyy-MM-dd'T'HH:mm:ss.SSSz", "yyyy-MM-dd't'HH:mm:ss.SSSz", // invalid
"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd't'HH:mm:ss.SSS'z'", // invalid "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd't'HH:mm:ss.SSS'z'", // invalid
"yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", // invalid "yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", // invalid
"yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", // invalid "yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", // invalid
"yyyy-MM-dd'T'HH:mmz", // invalid "yyyy-MM-dd'T'HH:mmz", // invalid
"yyyy-MM-dd't'HH:mmz", // invalid "yyyy-MM-dd't'HH:mmz", // invalid
"yyyy-MM-dd'T'HH:mm'Z'", // invalid "yyyy-MM-dd'T'HH:mm'Z'", // invalid
"yyyy-MM-dd't'HH:mm'z'", // invalid "yyyy-MM-dd't'HH:mm'z'", // invalid
"yyyy-MM-dd", "yyyy-MM", "yyyy" }; "yyyy-MM-dd", "yyyy-MM", "yyyy" };
static { static {
ADDITIONAL_MASKS = PropertiesLoader.getPropertiesLoader().getTokenizedProperty("datetime.extra.masks", "|"); ADDITIONAL_MASKS = PropertiesLoader.getPropertiesLoader().getTokenizedProperty("datetime.extra.masks", "|");

View file

@ -73,7 +73,7 @@ public class TestSyndFeedAtom03DCSyModules extends TestSyndFeedAtom03 {
assertProperty(dc.getRights(), prefix + "dc:rights"); assertProperty(dc.getRights(), prefix + "dc:rights");
} else { } else {
assertProperty(dc.getRights(), prefix + "copyright"); // in assertProperty(dc.getRights(), prefix + "copyright"); // in
// header // header
// is // is
// convenience // convenience
// method // method