1 /* strptime.c - partial strptime() reimplementation
3 * (c) 2008 Richard Kettlewell.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 /* strptime() is here reimplemented because the FreeBSD (and older MacOS) one
31 * is broken and does not report errors properly. See TODO remarks below for
32 * some missing bits. */
40 struct locale_item_match
{
45 static const struct locale_item_match days
[] = {
63 static const struct locale_item_match months
[] = {
91 /** @brief Match a string
92 * @param buf Start of subject
93 * @param limit End of subject
94 * @param match String to match subject against
95 * @return True if match == [buf,limit) otherwise false
97 * The match is case-independent at least in ASCII.
99 static int try_match(const char *buf
,
102 /* TODO this won't work well outside single-byte encodings. A good bet is
103 * probably to convert to Unicode and then use utf32_casefold_compat() (or
104 * utf8_casefold_compat(); using compatibility matching will ensure missing
105 * accents and so on aren't a problem.
107 * en_GB and en_US will probably be in any reasonable encoding for them.
109 while(buf
< limit
&& *match
) {
110 if(tolower((unsigned char)*buf
) != tolower((unsigned char)*match
))
115 if(buf
!= limit
|| *match
)
120 /** @brief Match from table of locale-specific strings
121 * @param buf Start of subject
122 * @param limit End of subject
123 * @param lim Table of locale lookups
124 * @return Looked up value or -1
126 * The match is case-independent.
128 static int try_locale_match(const char *buf
,
130 const struct locale_item_match
*lim
) {
131 /* This is not very efficient! A (correct) built-in implementation will
132 * presumably have more direct access to locale information. */
133 while(lim
->value
!= -1) {
134 if(try_match(buf
, limit
, nl_langinfo(lim
->key
)))
141 static int try_numeric_match(const char *buf
,
148 int ch
= (unsigned char)*buf
++;
149 if(ch
>= '0' && ch
<= '9') {
151 || (n
== INT_MAX
/ 10 && ch
>= INT_MAX
% 10 + '0'))
152 return -1; /* overflow */
153 n
= 10 * n
+ ch
- '0';
157 if(n
< low
|| n
> high
)
162 static const char *my_strptime_guts(const char *buf
,
165 int fc
, mod
, spec
, next
, value
;
167 /* nl_langinfo() is allowed to trash its last return value so we copy.
168 * (We're relying on it being usable at all in multithreaded environments
170 #define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \
172 char subformat[128]; \
175 s = nl_langinfo(EITEM); \
177 s = nl_langinfo(ITEM); \
179 s = nl_langinfo(ITEM); \
182 if(strlen(s) >= sizeof subformat) \
184 strcpy(subformat, s); \
185 if(!(buf = my_strptime_guts(buf, subformat, tm))) \
190 fc
= (unsigned char)*format
++;
192 /* Get the character defining the converstion specification */
193 spec
= (unsigned char)*format
++;
194 if(spec
== 'E' || spec
== 'O') {
195 /* Oops, there's a modifier first */
197 spec
= (unsigned char)*format
++;
201 return NULL
; /* format string broken! */
202 /* See what the next directive is. The specification is written in terms
203 * of stopping the match at a character that matches the next directive.
204 * This implementation mirrors this aspect of the specification
206 next
= (unsigned char)*format
;
210 /* Next directive is whitespace, so bound the input string (at least)
212 while(*limit
&& !isspace((unsigned char)*limit
))
214 } else if(next
== '%') {
215 /* Prohibited: "The application shall ensure that there is
216 * white-space or other non-alphanumeric characters between any two
217 * conversion specifications". In fact we let alphanumerics
220 * Forbidding even %% seems a bit harsh but is consistent with the
221 * specification as written.
225 /* Next directive is a specific character, so bound the input string
226 * (at least) by that. This will work badly in the face of multibyte
227 * characters, but then the spec is vague about what kind of string
228 * we're dealing with anyway so you probably couldn't safely use them
229 * in the format string at least in any case. */
230 while(*limit
&& *limit
!= next
)
234 limit
= buf
+ strlen(buf
);
236 case 'A': case 'a': /* day name (abbrev or full) */
237 if((value
= try_locale_match(buf
, limit
, days
)) == -1)
241 case 'B': case 'b': case 'h': /* month name (abbrev or full) */
242 if((value
= try_locale_match(buf
, limit
, months
)) == -1)
244 tm
->tm_mon
= value
- 1;
246 case 'c': /* locale date+time */
247 USE_SUBFORMAT(D_T_FMT
, ERA_D_T_FMT
, "%a %b %e %H:%M:%S %Y");
249 case 'C': /* century number 0-99 */
252 case 'd': case 'e': /* day of month 1-31 */
253 if((value
= try_numeric_match(buf
, limit
, 1, 31)) == -1)
257 case 'D': /* == "%m / %d / %y" */
258 if(!(buf
= my_strptime_guts(buf
, "%m / %d / %y", tm
)))
261 case 'H': /* hour 0-23 */
262 if((value
= try_numeric_match(buf
, limit
, 0, 23)) == -1)
266 case 'I': /* hour 1-12 */
269 case 'j': /* day 1-366 */
270 if((value
= try_numeric_match(buf
, limit
, 1, 366)) == -1)
272 tm
->tm_yday
= value
- 1;
274 case 'm': /* month 1-12 */
275 if((value
= try_numeric_match(buf
, limit
, 1, 12)) == -1)
277 tm
->tm_mon
= value
- 1;
279 case 'M': /* minute 0-59 */
280 if((value
= try_numeric_match(buf
, limit
, 0, 59)) == -1)
284 case 'n': case 't': /* any whitespace */
285 goto matchwhitespace
;
286 case 'p': /* locale am/pm */
289 case 'r': /* == "%I : %M : %S %p" */
290 /* TODO actually this is locale-dependent; and we don't implement %I
291 * anyway, so it's not going to work even as it stands. */
292 if(!(buf
= my_strptime_guts(buf
, "%I : %M : %S %p", tm
)))
295 case 'R': /* == "%H : %M" */
296 if(!(buf
= my_strptime_guts(buf
, "%H : %M", tm
)))
299 case 'S': /* seconds 0-60 */
300 if((value
= try_numeric_match(buf
, limit
, 0, 60)) == -1)
304 case 'U': /* week number from Sunday 0-53 */
307 case 'w': /* day number 0-6 from Sunday */
308 if((value
= try_numeric_match(buf
, limit
, 0, 6)) == -1)
312 case 'W': /* week number from Monday 0-53 */
315 case 'x': /* locale date format */
316 USE_SUBFORMAT(D_FMT
, ERA_D_FMT
, "%m/%d/%y");
318 case 'X': /* locale time format */
319 USE_SUBFORMAT(T_FMT
, ERA_T_FMT
, "%H:%M:%S");
321 case 'y': /* year mod 100 */
322 if((value
= try_numeric_match(buf
, limit
, 0, INT_MAX
)) == -1)
324 if(value
>= 0 && value
<= 68)
325 value
= 2000 + value
;
326 else if(value
>= 69 && value
<= 99)
327 value
= 1900 + value
;
328 tm
->tm_year
= value
- 1900;
331 if((value
= try_numeric_match(buf
, limit
, 1, INT_MAX
)) == -1)
333 tm
->tm_year
= value
- 1900;
338 /* The spec is a bit vague about what to do with invalid format
339 * strings. We return NULL immediately and hope someone will
344 } else if(isspace(fc
)) {
346 /* Any format whitespace matches any number of input whitespace
347 * characters. The directive can formally contain more than one
348 * whitespace character; for the second and subsequent ones we'll match 0
349 * characters from the input. */
350 while(isspace((unsigned char)*buf
))
354 /* Non-% non-whitespace characters must match themselves exactly */
355 if(fc
!= (unsigned char)*buf
++)
359 /* When we run out of format string we return a pointer to the rest of the
364 /** @brief Reimplementation of strptime()
365 * @param buf Input buffer
366 * @param format Format string
367 * @param tm Where to put result
368 * @return Pointer to first unparsed input character, or NULL on error
371 * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
373 char *my_strptime(const char *buf
,
376 /* Whether to overwrite or update is unspecified (rather bizarrely). This
377 * implementation does not overwrites, as xgetdate() depends on this
380 if(!(buf
= my_strptime_guts(buf
, format
, tm
)))
382 /* TODO various things we could/should do:
383 * - infer day/month from %j+year
384 * - infer day/month from %U/%W+%w/%a+year
385 * - infer hour from %p+%I
386 * - fill wday/yday from other fields