Formatted and cleaned up sources

This commit is contained in:
Patrick Gotthard 2014-05-13 19:28:08 +02:00
parent 6eca421a93
commit ed8691df13
93 changed files with 1089 additions and 1055 deletions

View file

@ -34,18 +34,25 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out the charset encoding of the XML document within the stream.
* Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out the
* charset encoding of the XML document within the stream.
* <p>
* IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a character stream.
* IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a
* character stream.
* <p>
* All this has to be done without consuming characters from the stream, if not the XML parser will not recognized the document as a valid XML. This is not 100%
* true, but it's close enough (UTF-8 BOM is not handled by all parsers right now, XmlReader handles it and things work in all parsers).
* All this has to be done without consuming characters from the stream, if not the XML parser will
* not recognized the document as a valid XML. This is not 100% true, but it's close enough (UTF-8
* BOM is not handled by all parsers right now, XmlReader handles it and things work in all
* parsers).
* <p>
* The XmlReader class handles the charset encoding of XML documents in Files, raw streams and HTTP streams by offering a wide set of constructors.
* The XmlReader class handles the charset encoding of XML documents in Files, raw streams and HTTP
* streams by offering a wide set of constructors.
* <P>
* By default the charset encoding detection is lenient, the constructor with the lenient flag can be used for an script (following HTTP MIME and XML
* specifications). All this is nicely explained by Mark Pilgrim in his blog, <a href="http://diveintomark.org/archives/2004/02/13/xml-media-types"> Determining
* the character encoding of a feed</a>.
* By default the charset encoding detection is lenient, the constructor with the lenient flag can
* be used for an script (following HTTP MIME and XML specifications). All this is nicely explained
* by Mark Pilgrim in his blog, <a
* href="http://diveintomark.org/archives/2004/02/13/xml-media-types"> Determining the character
* encoding of a feed</a>.
* <p>
*
* @author Alejandro Abdelnur
@ -80,9 +87,11 @@ public class XmlReader extends Reader {
/**
* Creates a Reader for a File.
* <p>
* It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also missing defaults to UTF-8.
* It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also
* missing defaults to UTF-8.
* <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details.
* It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p>
*
* @param file File to create a Reader from.
@ -98,7 +107,8 @@ public class XmlReader extends Reader {
* <p>
* It follows the same logic used for files.
* <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details.
* It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p>
*
* @param is InputStream to create a Reader from.
@ -110,13 +120,16 @@ public class XmlReader extends Reader {
}
/**
* Creates a Reader for a raw InputStream and uses the provided default encoding if none is determined.
* Creates a Reader for a raw InputStream and uses the provided default encoding if none is
* determined.
* <p>
* It follows the same logic used for files.
* <p>
* If lenient detection is indicated and the detection above fails as per specifications it then attempts the following:
* If lenient detection is indicated and the detection above fails as per specifications it then
* attempts the following:
* <p>
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection
* again.
* <p>
* Else if the XML prolog had a charset encoding that encoding is used.
* <p>
@ -131,7 +144,8 @@ public class XmlReader extends Reader {
* @param lenient indicates if the charset encoding detection should be relaxed.
* @param defaultEncoding default encoding to use if one cannot be detected.
* @throws IOException thrown if there is a problem reading the stream.
* @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
* @throws XmlReaderException thrown if the charset encoding could not be determined according
* to the specs.
*
*/
public XmlReader(final InputStream is, final boolean lenient, final String defaultEncoding) throws IOException, XmlReaderException {
@ -156,9 +170,11 @@ public class XmlReader extends Reader {
* <p>
* It follows the same logic used for files.
* <p>
* If lenient detection is indicated and the detection above fails as per specifications it then attempts the following:
* If lenient detection is indicated and the detection above fails as per specifications it then
* attempts the following:
* <p>
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection
* again.
* <p>
* Else if the XML prolog had a charset encoding that encoding is used.
* <p>
@ -172,7 +188,8 @@ public class XmlReader extends Reader {
* @param is InputStream to create a Reader from.
* @param lenient indicates if the charset encoding detection should be relaxed.
* @throws IOException thrown if there is a problem reading the stream.
* @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
* @throws XmlReaderException thrown if the charset encoding could not be determined according
* to the specs.
*
*/
public XmlReader(final InputStream is, final boolean lenient) throws IOException, XmlReaderException {
@ -182,11 +199,14 @@ public class XmlReader extends Reader {
/**
* Creates a Reader using the InputStream of a URL.
* <p>
* If the URL is not of type HTTP and there is not 'content-type' header in the fetched data it uses the same logic used for Files.
* If the URL is not of type HTTP and there is not 'content-type' header in the fetched data it
* uses the same logic used for Files.
* <p>
* If the URL is a HTTP Url or there is a 'content-type' header in the fetched data it uses the same logic used for an InputStream with content-type.
* If the URL is a HTTP Url or there is a 'content-type' header in the fetched data it uses the
* same logic used for an InputStream with content-type.
* <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details.
* It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p>
*
* @param url URL to create a Reader from.
@ -200,12 +220,14 @@ public class XmlReader extends Reader {
/**
* Creates a Reader using the InputStream of a URLConnection.
* <p>
* If the URLConnection is not of type HttpURLConnection and there is not 'content-type' header in the fetched data it uses the same logic used for files.
* If the URLConnection is not of type HttpURLConnection and there is not 'content-type' header
* in the fetched data it uses the same logic used for files.
* <p>
* If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched data it uses the same logic used for an InputStream with
* content-type.
* If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched data it
* uses the same logic used for an InputStream with content-type.
* <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details.
* It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p>
*
* @param conn URLConnection to create a Reader from.
@ -245,10 +267,12 @@ public class XmlReader extends Reader {
/**
* Creates a Reader using an InputStream and the associated content-type header.
* <p>
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not content-type encoding checks the XML prolog
* encoding. If there is not XML prolog encoding uses the default encoding mandated by the content-type MIME type.
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
* If there is not content-type encoding checks the XML prolog encoding. If there is not XML
* prolog encoding uses the default encoding mandated by the content-type MIME type.
* <p>
* It does a lenient charset encoding detection, check the constructor with the lenient parameter for details.
* It does a lenient charset encoding detection, check the constructor with the lenient
* parameter for details.
* <p>
*
* @param is InputStream to create the reader from.
@ -263,12 +287,15 @@ public class XmlReader extends Reader {
/**
* Creates a Reader using an InputStream and the associated content-type header.
* <p>
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not content-type encoding checks the XML prolog
* encoding. If there is not XML prolog encoding uses the default encoding mandated by the content-type MIME type.
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
* If there is not content-type encoding checks the XML prolog encoding. If there is not XML
* prolog encoding uses the default encoding mandated by the content-type MIME type.
* <p>
* If lenient detection is indicated and the detection above fails as per specifications it then attempts the following:
* If lenient detection is indicated and the detection above fails as per specifications it then
* attempts the following:
* <p>
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection
* again.
* <p>
* Else if the XML prolog had a charset encoding that encoding is used.
* <p>
@ -284,7 +311,8 @@ public class XmlReader extends Reader {
* @param lenient indicates if the charset encoding detection should be relaxed.
* @param defaultEncoding default encoding to use if one cannot be detected.
* @throws IOException thrown if there is a problem reading the file.
* @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
* @throws XmlReaderException thrown if the charset encoding could not be determined according
* to the specs.
*
*/
public XmlReader(final InputStream is, final String httpContentType, final boolean lenient, final String defaultEncoding) throws IOException,
@ -308,12 +336,15 @@ public class XmlReader extends Reader {
/**
* Creates a Reader using an InputStream and the associated content-type header.
* <p>
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not content-type encoding checks the XML prolog
* encoding. If there is not XML prolog encoding uses the default encoding mandated by the content-type MIME type.
* First it checks if the stream has BOM. If there is not BOM checks the content-type encoding.
* If there is not content-type encoding checks the XML prolog encoding. If there is not XML
* prolog encoding uses the default encoding mandated by the content-type MIME type.
* <p>
* If lenient detection is indicated and the detection above fails as per specifications it then attempts the following:
* If lenient detection is indicated and the detection above fails as per specifications it then
* attempts the following:
* <p>
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again.
* If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection
* again.
* <p>
* Else if the XML prolog had a charset encoding that encoding is used.
* <p>
@ -328,7 +359,8 @@ public class XmlReader extends Reader {
* @param httpContentType content-type header to use for the resolution of the charset encoding.
* @param lenient indicates if the charset encoding detection should be relaxed.
* @throws IOException thrown if there is a problem reading the file.
* @throws XmlReaderException thrown if the charset encoding could not be determined according to the specs.
* @throws XmlReaderException thrown if the charset encoding could not be determined according
* to the specs.
*
*/
public XmlReader(final InputStream is, final String httpContentType, final boolean lenient) throws IOException, XmlReaderException {
@ -336,7 +368,8 @@ public class XmlReader extends Reader {
}
/**
* Returns the default encoding to use if none is set in HTTP content-type, XML prolog and the rules based on content-type are not adequate.
* Returns the default encoding to use if none is set in HTTP content-type, XML prolog and the
* rules based on content-type are not adequate.
* <p/>
* If it is NULL the content-type based rules are used.
* <p/>
@ -348,7 +381,8 @@ public class XmlReader extends Reader {
}
/**
* Sets the default encoding to use if none is set in HTTP content-type, XML prolog and the rules based on content-type are not adequate.
* Sets the default encoding to use if none is set in HTTP content-type, XML prolog and the
* rules based on content-type are not adequate.
* <p/>
* If it is set to NULL the content-type based rules are used.
* <p/>

View file

@ -689,7 +689,7 @@ public class Atom10Parser extends BaseWireFeedParser {
* Parse entry from reader.
*/
public static Entry parseEntry(final Reader rd, final String baseURI, final Locale locale) throws JDOMException, IOException, IllegalArgumentException,
FeedException {
FeedException {
// Parse entry into JDOM tree
final SAXBuilder builder = new SAXBuilder();

View file

@ -48,17 +48,17 @@ public class DateParser {
// parse a valid date out of a substring of the full string given the mask so we have to check
// the most complete format first, then it fails with exception
private static final String[] W3CDATETIME_MASKS = { "yyyy-MM-dd'T'HH:mm:ss.SSSz", "yyyy-MM-dd't'HH:mm:ss.SSSz", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
"yyyy-MM-dd't'HH:mm:ss.SSS'z'", "yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", "yyyy-MM-dd'T'HH:mm:ssZ", "yyyy-MM-dd't'HH:mm:ssZ",
"yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", "yyyy-MM-dd'T'HH:mmz", // together
// with
// logic
// in
// the
// parseW3CDateTime
// they
"yyyy-MM'T'HH:mmz", // handle W3C dates without time forcing them to
// be GMT
"yyyy'T'HH:mmz", "yyyy-MM-dd't'HH:mmz", "yyyy-MM-dd'T'HH:mm'Z'", "yyyy-MM-dd't'HH:mm'z'", "yyyy-MM-dd", "yyyy-MM", "yyyy" };
"yyyy-MM-dd't'HH:mm:ss.SSS'z'", "yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", "yyyy-MM-dd'T'HH:mm:ssZ", "yyyy-MM-dd't'HH:mm:ssZ",
"yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", "yyyy-MM-dd'T'HH:mmz", // together
// with
// logic
// in
// the
// parseW3CDateTime
// they
"yyyy-MM'T'HH:mmz", // handle W3C dates without time forcing them to
// be GMT
"yyyy'T'HH:mmz", "yyyy-MM-dd't'HH:mmz", "yyyy-MM-dd'T'HH:mm'Z'", "yyyy-MM-dd't'HH:mm'z'", "yyyy-MM-dd", "yyyy-MM", "yyyy" };
/**
* The masks used to validate and parse the input to this Atom date. These are a lot more
@ -67,14 +67,14 @@ public class DateParser {
*/
@SuppressWarnings("unused")
private static final String[] masks = { "yyyy-MM-dd'T'HH:mm:ss.SSSz", "yyyy-MM-dd't'HH:mm:ss.SSSz", // invalid
"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd't'HH:mm:ss.SSS'z'", // invalid
"yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", // invalid
"yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", // invalid
"yyyy-MM-dd'T'HH:mmz", // invalid
"yyyy-MM-dd't'HH:mmz", // invalid
"yyyy-MM-dd'T'HH:mm'Z'", // invalid
"yyyy-MM-dd't'HH:mm'z'", // invalid
"yyyy-MM-dd", "yyyy-MM", "yyyy" };
"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd't'HH:mm:ss.SSS'z'", // invalid
"yyyy-MM-dd'T'HH:mm:ssz", "yyyy-MM-dd't'HH:mm:ssz", // invalid
"yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd't'HH:mm:ss'z'", // invalid
"yyyy-MM-dd'T'HH:mmz", // invalid
"yyyy-MM-dd't'HH:mmz", // invalid
"yyyy-MM-dd'T'HH:mm'Z'", // invalid
"yyyy-MM-dd't'HH:mm'z'", // invalid
"yyyy-MM-dd", "yyyy-MM", "yyyy" };
static {
ADDITIONAL_MASKS = PropertiesLoader.getPropertiesLoader().getTokenizedProperty("datetime.extra.masks", "|");

View file

@ -73,7 +73,7 @@ public class TestSyndFeedAtom03DCSyModules extends TestSyndFeedAtom03 {
assertProperty(dc.getRights(), prefix + "dc:rights");
} else {
assertProperty(dc.getRights(), prefix + "copyright"); // in
// header
// header
// is
// convenience
// method