View Javadoc

1   /*
2    * Copyright 2004 Sun Microsystems, Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  package com.sun.syndication.io.impl;
18  
19  import java.text.DateFormat;
20  import java.text.SimpleDateFormat;
21  import java.text.ParsePosition;
22  import java.util.Date;
23  import java.util.TimeZone;
24  import java.util.Locale;
25  
26  /***
27   * A helper class that parses Dates out of Strings with date time in RFC822 and W3CDateTime
28   * formats plus the variants Atom (0.3) and RSS (0.9, 0.91, 0.92, 0.93, 0.94, 1.0 and 2.0)
29   * specificators added to those formats.
30   * <p/>
31   * It uses the JDK java.text.SimpleDateFormat class attemtping the parse using a mask for
32   * each one of the possible formats.
33   * <p/>
34   *
35   * @author Alejandro Abdelnur
36   *
37   */
38  public class DateParser {
39  
40      private static String[] ADDITIONAL_MASKS;
41  
42      static {
43          ADDITIONAL_MASKS = PropertiesLoader.getPropertiesLoader().getTokenizedProperty("datetime.extra.masks","|");
44      }
45  
46      // order is like this because the SimpleDateFormat.parse does not fail with exception
47      // if it can parse a valid date out of a substring of the full string given the mask
48      // so we have to check the most complete format first, then it fails with exception
49      private static final String[] RFC822_MASKS = {
50              "EEE, dd MMM yy HH:mm:ss z",
51              "EEE, dd MMM yy HH:mm z",
52              "dd MMM yy HH:mm:ss z",
53              "dd MMM yy HH:mm z"
54          };
55  
56  
57  
58      // order is like this because the SimpleDateFormat.parse does not fail with exception
59      // if it can parse a valid date out of a substring of the full string given the mask
60      // so we have to check the most complete format first, then it fails with exception
61      private static final String[] W3CDATETIME_MASKS = {
62          "yyyy-MM-dd'T'HH:mm:ss.SSSz",
63          "yyyy-MM-dd't'HH:mm:ss.SSSz",
64          "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
65          "yyyy-MM-dd't'HH:mm:ss.SSS'z'",
66          "yyyy-MM-dd'T'HH:mm:ssz",
67          "yyyy-MM-dd't'HH:mm:ssz",
68          "yyyy-MM-dd'T'HH:mm:ss'Z'",
69          "yyyy-MM-dd't'HH:mm:ss'z'",
70          "yyyy-MM-dd'T'HH:mmz",   // together with logic in the parseW3CDateTime they
71          "yyyy-MM'T'HH:mmz",      // handle W3C dates without time forcing them to be GMT
72          "yyyy'T'HH:mmz",          
73          "yyyy-MM-dd't'HH:mmz", 
74          "yyyy-MM-dd'T'HH:mm'Z'", 
75          "yyyy-MM-dd't'HH:mm'z'", 
76          "yyyy-MM-dd",
77          "yyyy-MM",
78          "yyyy"
79      };
80      
81      
82      
83        /***
84     * The masks used to validate and parse the input to this Atom date.
85     * These are a lot more forgiving than what the Atom spec allows.  
86     * The forms that are invalid according to the spec are indicated.
87     */
88    private static final String[] masks = {
89      "yyyy-MM-dd'T'HH:mm:ss.SSSz",
90      "yyyy-MM-dd't'HH:mm:ss.SSSz",                         // invalid
91      "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
92      "yyyy-MM-dd't'HH:mm:ss.SSS'z'",                       // invalid
93      "yyyy-MM-dd'T'HH:mm:ssz",
94      "yyyy-MM-dd't'HH:mm:ssz",                             // invalid
95      "yyyy-MM-dd'T'HH:mm:ss'Z'",
96      "yyyy-MM-dd't'HH:mm:ss'z'",                           // invalid
97      "yyyy-MM-dd'T'HH:mmz",                                // invalid
98      "yyyy-MM-dd't'HH:mmz",                                // invalid
99      "yyyy-MM-dd'T'HH:mm'Z'",                              // invalid
100     "yyyy-MM-dd't'HH:mm'z'",                              // invalid
101     "yyyy-MM-dd",
102     "yyyy-MM",
103     "yyyy"
104   };
105     
106     
107     
108 
109     /***
110      * Private constructor to avoid DateParser instances creation.
111      */
112     private DateParser() {
113     }
114 
115     /***
116      * Parses a Date out of a string using an array of masks.
117      * <p/>
118      * It uses the masks in order until one of them succedes or all fail.
119      * <p/>
120      *
121      * @param masks array of masks to use for parsing the string
122      * @param sDate string to parse for a date.
123      * @return the Date represented by the given string using one of the given masks.
124      * It returns <b>null</b> if it was not possible to parse the the string with any of the masks.
125      *
126      */
127     private static Date parseUsingMask(String[] masks,String sDate) {
128         sDate = (sDate!=null) ? sDate.trim() : null;
129         ParsePosition pp = null;
130         Date d = null;
131         for (int i=0;d==null && i<masks.length;i++) {
132             DateFormat df = new SimpleDateFormat(masks[i],Locale.US);
133             //df.setLenient(false);
134             df.setLenient(true);
135             try {
136                 pp = new ParsePosition(0);
137                 d = df.parse(sDate,pp);
138                 if (pp.getIndex()!=sDate.length()) {
139                     d = null;
140                 }
141                 //System.out.println("pp["+pp.getIndex()+"] s["+sDate+" m["+masks[i]+"] d["+d+"]");
142             }
143             catch (Exception ex1) {
144                 //System.out.println("s: "+sDate+" m: "+masks[i]+" d: "+null);
145             }
146         }
147         return d;
148     }
149 
150     /***
151      * Parses a Date out of a String with a date in RFC822 format.
152      * <p/>
153      * It parsers the following formats:
154      * <ul>
155      *   <li>"EEE, dd MMM yyyy HH:mm:ss z"</li>
156      *   <li>"EEE, dd MMM yyyy HH:mm z"</li>
157      *   <li>"EEE, dd MMM yy HH:mm:ss z"</li>
158      *   <li>"EEE, dd MMM yy HH:mm z"</li>
159      *   <li>"dd MMM yyyy HH:mm:ss z"</li>
160      *   <li>"dd MMM yyyy HH:mm z"</li>
161      *   <li>"dd MMM yy HH:mm:ss z"</li>
162      *   <li>"dd MMM yy HH:mm z"</li>
163      * </ul>
164      * <p/>
165      * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element.
166      * <p/>
167      * @param sDate string to parse for a date.
168      * @return the Date represented by the given RFC822 string.
169      *         It returns <b>null</b> if it was not possible to parse the given string into a Date.
170      *
171      */
172     public static Date parseRFC822(String sDate) {
173         int utIndex = sDate.indexOf(" UT");
174         if (utIndex>-1) {
175             String pre = sDate.substring(0,utIndex);
176             String post = sDate.substring(utIndex+3);
177             sDate = pre + " GMT" + post;
178         }
179         return parseUsingMask(RFC822_MASKS,sDate);
180     }
181 
182 
183     /***
184      * Parses a Date out of a String with a date in W3C date-time format.
185      * <p/>
186      * It parsers the following formats:
187      * <ul>
188      *   <li>"yyyy-MM-dd'T'HH:mm:ssz"</li>
189      *   <li>"yyyy-MM-dd'T'HH:mmz"</li>
190      *   <li>"yyyy-MM-dd"</li>
191      *   <li>"yyyy-MM"</li>
192      *   <li>"yyyy"</li>
193      * </ul>
194      * <p/>
195      * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element.
196      * <p/>
197      * @param sDate string to parse for a date.
198      * @return the Date represented by the given W3C date-time string.
199      *         It returns <b>null</b> if it was not possible to parse the given string into a Date.
200      *
201      */
202     public static Date parseW3CDateTime(String sDate) {
203         // if sDate has time on it, it injects 'GTM' before de TZ displacement to
204         // allow the SimpleDateFormat parser to parse it properly
205         int tIndex = sDate.indexOf("T");
206         if (tIndex>-1) {
207             if (sDate.endsWith("Z")) {
208                 sDate = sDate.substring(0,sDate.length()-1)+"+00:00";
209             }
210             int tzdIndex = sDate.indexOf("+",tIndex);
211             if (tzdIndex==-1) {
212                 tzdIndex = sDate.indexOf("-",tIndex);
213             }
214             if (tzdIndex>-1) {
215                 String pre = sDate.substring(0,tzdIndex);
216                 int secFraction = pre.indexOf(",");
217                 if (secFraction>-1) {
218                     pre = pre.substring(0,secFraction);
219                 }
220                 String post = sDate.substring(tzdIndex);
221                 sDate = pre + "GMT" + post;
222             }
223         }
224         else {
225             sDate += "T00:00GMT";
226         }
227         return parseUsingMask(W3CDATETIME_MASKS,sDate);
228     }
229 
230 
231     /***
232      * Parses a Date out of a String with a date in W3C date-time format or
233      * in a RFC822 format.
234      * <p>
235      * @param sDate string to parse for a date.
236      * @return the Date represented by the given W3C date-time string.
237      *         It returns <b>null</b> if it was not possible to parse the given string into a Date.
238      *
239      * */
240     public static Date parseDate(String sDate) {
241         Date d = parseW3CDateTime(sDate);
242         if (d==null) {
243             d = parseRFC822(sDate);
244             if (d==null && ADDITIONAL_MASKS.length>0) {
245                 d = parseUsingMask(ADDITIONAL_MASKS,sDate);
246             }
247         }
248         return d;
249     }
250 
251     /***
252      * create a RFC822 representation of a date.
253      * <p/>
254      * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element.
255      * <p/>
256      * @param date Date to parse
257      * @return the RFC822 represented by the given Date
258      *         It returns <b>null</b> if it was not possible to parse the date.
259      *
260      */
261     public static String formatRFC822(Date date) {
262         SimpleDateFormat dateFormater = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'",Locale.US);
263         dateFormater.setTimeZone(TimeZone.getTimeZone("GMT"));
264         return dateFormater.format(date);
265     }
266 
267     /***
268      * create a W3C Date Time representation of a date.
269      * <p/>
270      * Refer to the java.text.SimpleDateFormat javadocs for details on the format of each element.
271      * <p/>
272      * @param date Date to parse
273      * @return the W3C Date Time represented by the given Date
274      *         It returns <b>null</b> if it was not possible to parse the date.
275      *
276      */
277     public static String formatW3CDateTime(Date date) {
278         SimpleDateFormat dateFormater = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'",Locale.US);
279         dateFormater.setTimeZone(TimeZone.getTimeZone("GMT"));
280         return dateFormater.format(date);
281     }
282 
283 }