Commit | Line | Data |
---|---|---|
477f956c RK |
1 | /* strptime.c - partial strptime() reimplementation |
2 | * | |
cca89d7c | 3 | * Copyright (c) 2008, 2011, 2013 Richard Kettlewell. |
477f956c RK |
4 | * All rights reserved. |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * 1. Redistributions of source code must retain the above copyright | |
10 | * notice, this list of conditions and the following disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the distribution. | |
14 | * 3. The name of the author may not be used to endorse or promote products | |
15 | * derived from this software without specific prior written permission. | |
16 | * | |
17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
27 | * SUCH DAMAGE. | |
28 | */ | |
1a164e63 RK |
29 | /** @file lib/strptime.c |
30 | * @brief strptime() reimplementation | |
31 | * | |
32 | * strptime() is here reimplemented because the FreeBSD (and older MacOS) one | |
477f956c | 33 | * is broken and does not report errors properly. See TODO remarks below for |
1a164e63 RK |
34 | * some missing bits. |
35 | */ | |
477f956c | 36 | |
cca89d7c RK |
37 | #if HAVE_CONFIG_H |
38 | # include <config.h> | |
39 | #endif | |
40 | ||
477f956c RK |
41 | #include <ctype.h> |
42 | #include <limits.h> | |
43 | #include <string.h> | |
cca89d7c RK |
44 | #if HAVE_LANGINFO_H |
45 | # include <langinfo.h> | |
46 | #endif | |
477f956c RK |
47 | #include "strptime.h" |
48 | ||
cca89d7c RK |
49 | #if !HAVE_LANGINFO_H |
50 | /* Fake plastic langinfo. Primarily for Windows. | |
51 | * TODO WIN32 can we get these values out of the win32 api instead? */ | |
52 | typedef enum { | |
53 | DAY_1, | |
54 | DAY_2, | |
55 | DAY_3, | |
56 | DAY_4, | |
57 | DAY_5, | |
58 | DAY_6, | |
59 | DAY_7, | |
60 | ABDAY_1, | |
61 | ABDAY_2, | |
62 | ABDAY_3, | |
63 | ABDAY_4, | |
64 | ABDAY_5, | |
65 | ABDAY_6, | |
66 | ABDAY_7, | |
67 | MON_1, | |
68 | MON_2, | |
69 | MON_3, | |
70 | MON_4, | |
71 | MON_5, | |
72 | MON_6, | |
73 | MON_7, | |
74 | MON_8, | |
75 | MON_9, | |
76 | MON_10, | |
77 | MON_11, | |
78 | MON_12, | |
79 | ABMON_1, | |
80 | ABMON_2, | |
81 | ABMON_3, | |
82 | ABMON_4, | |
83 | ABMON_5, | |
84 | ABMON_6, | |
85 | ABMON_7, | |
86 | ABMON_8, | |
87 | ABMON_9, | |
88 | ABMON_10, | |
89 | ABMON_11, | |
90 | ABMON_12, | |
91 | D_FMT, | |
92 | T_FMT, | |
93 | D_T_FMT, | |
94 | ERA_D_FMT, | |
95 | ERA_T_FMT, | |
96 | ERA_D_T_FMT, | |
97 | } nl_item; | |
98 | ||
99 | const char *nl_langinfo(nl_item item) { | |
100 | switch(item) { | |
101 | case DAY_1: return "Sunday"; | |
102 | case DAY_2: return "Monday"; | |
103 | case DAY_3: return "Tuesday"; | |
104 | case DAY_4: return "Wednesday"; | |
105 | case DAY_5: return "Thursday"; | |
106 | case DAY_6: return "Friday"; | |
107 | case DAY_7: return "Saturday"; | |
108 | case ABDAY_1: return "Sun"; | |
109 | case ABDAY_2: return "Mon"; | |
110 | case ABDAY_3: return "Tue"; | |
111 | case ABDAY_4: return "Wed"; | |
112 | case ABDAY_5: return "Thu"; | |
113 | case ABDAY_6: return "Fri"; | |
114 | case ABDAY_7: return "Sat"; | |
115 | case MON_1: return "January"; | |
116 | case MON_2: return "February"; | |
117 | case MON_3: return "March"; | |
118 | case MON_4: return "April"; | |
119 | case MON_5: return "May"; | |
120 | case MON_6: return "June"; | |
121 | case MON_7: return "July"; | |
122 | case MON_8: return "August"; | |
123 | case MON_9: return "September"; | |
124 | case MON_10: return "October"; | |
125 | case MON_11: return "November"; | |
126 | case MON_12: return "December"; | |
127 | case ABMON_1: return "Jan"; | |
128 | case ABMON_2: return "Feb"; | |
129 | case ABMON_3: return "Mar"; | |
130 | case ABMON_4: return "Apr"; | |
131 | case ABMON_5: return "May"; | |
132 | case ABMON_6: return "Jun"; | |
133 | case ABMON_7: return "Jul"; | |
134 | case ABMON_8: return "Aug"; | |
135 | case ABMON_9: return "Sep"; | |
136 | case ABMON_10: return "Oct"; | |
137 | case ABMON_11: return "Nov"; | |
138 | case ABMON_12: return "Dec"; | |
139 | case D_FMT: return "%d/%m/%y"; | |
140 | case T_FMT: return "%H:%M:%S"; | |
141 | case D_T_FMT: return "%a %d %b %Y %H:%M:%S %Z"; | |
142 | case ERA_D_FMT: return ""; | |
143 | case ERA_T_FMT: return ""; | |
144 | case ERA_D_T_FMT: return ""; | |
145 | default: return 0; | |
146 | } | |
147 | } | |
148 | #endif | |
149 | ||
598b07b7 | 150 | /** @brief Lookup table entry for locale-specific strings */ |
477f956c | 151 | struct locale_item_match { |
598b07b7 | 152 | /** @brief Locale key to try */ |
477f956c | 153 | nl_item key; |
598b07b7 RK |
154 | |
155 | /** @brief Value to return if value of @ref key matches subject string */ | |
477f956c RK |
156 | int value; |
157 | }; | |
158 | ||
159 | static const struct locale_item_match days[] = { | |
160 | { DAY_1, 0 }, | |
161 | { DAY_2, 1 }, | |
162 | { DAY_3, 2 }, | |
163 | { DAY_4, 3 }, | |
164 | { DAY_5, 4 }, | |
165 | { DAY_6, 5 }, | |
166 | { DAY_7, 6 }, | |
167 | { ABDAY_1, 0 }, | |
168 | { ABDAY_2, 1 }, | |
169 | { ABDAY_3, 2 }, | |
170 | { ABDAY_4, 3 }, | |
171 | { ABDAY_5, 4 }, | |
172 | { ABDAY_6, 5 }, | |
173 | { ABDAY_7, 6 }, | |
174 | { -1, -1 } | |
175 | }; | |
176 | ||
177 | static const struct locale_item_match months[] = { | |
178 | { MON_1, 1 }, | |
179 | { MON_2, 2 }, | |
180 | { MON_3, 3 }, | |
181 | { MON_4, 4 }, | |
182 | { MON_5, 5 }, | |
183 | { MON_6, 6 }, | |
184 | { MON_7, 7 }, | |
185 | { MON_8, 8 }, | |
186 | { MON_9, 9 }, | |
187 | { MON_10, 10 }, | |
188 | { MON_11, 11 }, | |
189 | { MON_12, 12 }, | |
190 | { ABMON_1, 1 }, | |
191 | { ABMON_2, 2 }, | |
192 | { ABMON_3, 3 }, | |
193 | { ABMON_4, 4 }, | |
194 | { ABMON_5, 5 }, | |
195 | { ABMON_6, 6 }, | |
196 | { ABMON_7, 7 }, | |
197 | { ABMON_8, 8 }, | |
198 | { ABMON_9, 9 }, | |
199 | { ABMON_10, 10 }, | |
200 | { ABMON_11, 11 }, | |
201 | { ABMON_12, 12 }, | |
202 | { -1, -1 }, | |
203 | }; | |
204 | ||
205 | /** @brief Match a string | |
206 | * @param buf Start of subject | |
207 | * @param limit End of subject | |
208 | * @param match String to match subject against | |
209 | * @return True if match == [buf,limit) otherwise false | |
210 | * | |
211 | * The match is case-independent at least in ASCII. | |
212 | */ | |
213 | static int try_match(const char *buf, | |
214 | const char *limit, | |
215 | const char *match) { | |
216 | /* TODO this won't work well outside single-byte encodings. A good bet is | |
217 | * probably to convert to Unicode and then use utf32_casefold_compat() (or | |
218 | * utf8_casefold_compat(); using compatibility matching will ensure missing | |
219 | * accents and so on aren't a problem. | |
220 | * | |
221 | * en_GB and en_US will probably be in any reasonable encoding for them. | |
222 | */ | |
223 | while(buf < limit && *match) { | |
224 | if(tolower((unsigned char)*buf) != tolower((unsigned char)*match)) | |
225 | return 0; | |
226 | ++buf; | |
227 | ++match; | |
228 | } | |
229 | if(buf != limit || *match) | |
230 | return 0; | |
231 | return 1; | |
232 | } | |
233 | ||
234 | /** @brief Match from table of locale-specific strings | |
235 | * @param buf Start of subject | |
236 | * @param limit End of subject | |
237 | * @param lim Table of locale lookups | |
238 | * @return Looked up value or -1 | |
239 | * | |
240 | * The match is case-independent. | |
241 | */ | |
242 | static int try_locale_match(const char *buf, | |
243 | const char *limit, | |
244 | const struct locale_item_match *lim) { | |
245 | /* This is not very efficient! A (correct) built-in implementation will | |
246 | * presumably have more direct access to locale information. */ | |
247 | while(lim->value != -1) { | |
248 | if(try_match(buf, limit, nl_langinfo(lim->key))) | |
249 | return lim->value; | |
250 | ++lim; | |
251 | } | |
252 | return -1; | |
253 | } | |
254 | ||
255 | static int try_numeric_match(const char *buf, | |
256 | const char *limit, | |
257 | unsigned low, | |
258 | unsigned high) { | |
259 | unsigned n = 0; | |
260 | ||
261 | while(buf < limit) { | |
262 | int ch = (unsigned char)*buf++; | |
263 | if(ch >= '0' && ch <= '9') { | |
264 | if(n > INT_MAX / 10 | |
265 | || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0')) | |
266 | return -1; /* overflow */ | |
267 | n = 10 * n + ch - '0'; | |
268 | } else | |
269 | return -1; | |
270 | } | |
271 | if(n < low || n > high) | |
272 | return -1; | |
273 | return (int)n; | |
274 | } | |
275 | ||
276 | static const char *my_strptime_guts(const char *buf, | |
277 | const char *format, | |
278 | struct tm *tm) { | |
279 | int fc, mod, spec, next, value; | |
280 | const char *limit; | |
281 | /* nl_langinfo() is allowed to trash its last return value so we copy. | |
282 | * (We're relying on it being usable at all in multithreaded environments | |
283 | * though.) */ | |
284 | #define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \ | |
285 | const char *s; \ | |
286 | char subformat[128]; \ | |
287 | \ | |
288 | if(mod == 'E') { \ | |
289 | s = nl_langinfo(EITEM); \ | |
290 | if(!s || !*s) \ | |
291 | s = nl_langinfo(ITEM); \ | |
292 | } else \ | |
293 | s = nl_langinfo(ITEM); \ | |
294 | if(!s || !*s) \ | |
295 | s = DEF; \ | |
296 | if(strlen(s) >= sizeof subformat) \ | |
297 | s = DEF; \ | |
298 | strcpy(subformat, s); \ | |
299 | if(!(buf = my_strptime_guts(buf, subformat, tm))) \ | |
300 | return NULL; \ | |
301 | } while(0) | |
302 | ||
303 | while(*format) { | |
304 | fc = (unsigned char)*format++; | |
305 | if(fc == '%') { | |
306 | /* Get the character defining the converstion specification */ | |
307 | spec = (unsigned char)*format++; | |
308 | if(spec == 'E' || spec == 'O') { | |
309 | /* Oops, there's a modifier first */ | |
310 | mod = spec; | |
311 | spec = (unsigned char)*format++; | |
312 | } else | |
313 | mod = 0; | |
314 | if(!spec) | |
315 | return NULL; /* format string broken! */ | |
316 | /* See what the next directive is. The specification is written in terms | |
317 | * of stopping the match at a character that matches the next directive. | |
318 | * This implementation mirrors this aspect of the specification | |
319 | * directly. */ | |
320 | next = (unsigned char)*format; | |
321 | if(next) { | |
322 | limit = buf; | |
323 | if(isspace(next)) { | |
324 | /* Next directive is whitespace, so bound the input string (at least) | |
325 | * by that */ | |
326 | while(*limit && !isspace((unsigned char)*limit)) | |
327 | ++limit; | |
328 | } else if(next == '%') { | |
329 | /* Prohibited: "The application shall ensure that there is | |
330 | * white-space or other non-alphanumeric characters between any two | |
331 | * conversion specifications". In fact we let alphanumerics | |
332 | * through. | |
333 | * | |
334 | * Forbidding even %% seems a bit harsh but is consistent with the | |
335 | * specification as written. | |
336 | */ | |
337 | return NULL; | |
338 | } else { | |
339 | /* Next directive is a specific character, so bound the input string | |
340 | * (at least) by that. This will work badly in the face of multibyte | |
341 | * characters, but then the spec is vague about what kind of string | |
342 | * we're dealing with anyway so you probably couldn't safely use them | |
343 | * in the format string at least in any case. */ | |
344 | while(*limit && *limit != next) | |
345 | ++limit; | |
346 | } | |
347 | } else | |
348 | limit = buf + strlen(buf); | |
349 | switch(spec) { | |
350 | case 'A': case 'a': /* day name (abbrev or full) */ | |
351 | if((value = try_locale_match(buf, limit, days)) == -1) | |
352 | return NULL; | |
353 | tm->tm_wday = value; | |
354 | break; | |
355 | case 'B': case 'b': case 'h': /* month name (abbrev or full) */ | |
356 | if((value = try_locale_match(buf, limit, months)) == -1) | |
357 | return NULL; | |
358 | tm->tm_mon = value - 1; | |
359 | break; | |
360 | case 'c': /* locale date+time */ | |
361 | USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y"); | |
362 | break; | |
363 | case 'C': /* century number 0-99 */ | |
364 | /* TODO */ | |
365 | return NULL; | |
366 | case 'd': case 'e': /* day of month 1-31 */ | |
367 | if((value = try_numeric_match(buf, limit, 1, 31)) == -1) | |
368 | return NULL; | |
369 | tm->tm_mday = value; | |
370 | break; | |
371 | case 'D': /* == "%m / %d / %y" */ | |
372 | if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm))) | |
373 | return NULL; | |
374 | break; | |
375 | case 'H': /* hour 0-23 */ | |
376 | if((value = try_numeric_match(buf, limit, 0, 23)) == -1) | |
377 | return NULL; | |
378 | tm->tm_hour = value; | |
379 | break; | |
380 | case 'I': /* hour 1-12 */ | |
381 | /* TODO */ | |
382 | return NULL; | |
383 | case 'j': /* day 1-366 */ | |
384 | if((value = try_numeric_match(buf, limit, 1, 366)) == -1) | |
385 | return NULL; | |
386 | tm->tm_yday = value - 1; | |
387 | return NULL; | |
388 | case 'm': /* month 1-12 */ | |
389 | if((value = try_numeric_match(buf, limit, 1, 12)) == -1) | |
390 | return NULL; | |
391 | tm->tm_mon = value - 1; | |
392 | break; | |
393 | case 'M': /* minute 0-59 */ | |
394 | if((value = try_numeric_match(buf, limit, 0, 59)) == -1) | |
395 | return NULL; | |
396 | tm->tm_min = value; | |
397 | break; | |
398 | case 'n': case 't': /* any whitespace */ | |
399 | goto matchwhitespace; | |
400 | case 'p': /* locale am/pm */ | |
401 | /* TODO */ | |
402 | return NULL; | |
403 | case 'r': /* == "%I : %M : %S %p" */ | |
404 | /* TODO actually this is locale-dependent; and we don't implement %I | |
405 | * anyway, so it's not going to work even as it stands. */ | |
406 | if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm))) | |
407 | return NULL; | |
408 | break; | |
409 | case 'R': /* == "%H : %M" */ | |
410 | if(!(buf = my_strptime_guts(buf, "%H : %M", tm))) | |
411 | return NULL; | |
412 | break; | |
413 | case 'S': /* seconds 0-60 */ | |
414 | if((value = try_numeric_match(buf, limit, 0, 60)) == -1) | |
415 | return NULL; | |
416 | tm->tm_sec = value; | |
417 | break; | |
418 | case 'U': /* week number from Sunday 0-53 */ | |
419 | /* TODO */ | |
420 | return NULL; | |
421 | case 'w': /* day number 0-6 from Sunday */ | |
422 | if((value = try_numeric_match(buf, limit, 0, 6)) == -1) | |
423 | return NULL; | |
424 | tm->tm_wday = value; | |
425 | break; | |
426 | case 'W': /* week number from Monday 0-53 */ | |
427 | /* TODO */ | |
428 | return NULL; | |
429 | case 'x': /* locale date format */ | |
430 | USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y"); | |
431 | break; | |
432 | case 'X': /* locale time format */ | |
433 | USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S"); | |
434 | break; | |
435 | case 'y': /* year mod 100 */ | |
436 | if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1) | |
437 | return NULL; | |
438 | if(value >= 0 && value <= 68) | |
439 | value = 2000 + value; | |
440 | else if(value >= 69 && value <= 99) | |
441 | value = 1900 + value; | |
442 | tm->tm_year = value - 1900; | |
443 | break; | |
444 | case 'Y': /* year */ | |
445 | if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1) | |
446 | return NULL; | |
447 | tm->tm_year = value - 1900; | |
448 | break; | |
449 | case '%': | |
450 | goto matchself; | |
451 | default: | |
452 | /* The spec is a bit vague about what to do with invalid format | |
453 | * strings. We return NULL immediately and hope someone will | |
454 | * notice. */ | |
455 | return NULL; | |
456 | } | |
457 | buf = limit; | |
458 | } else if(isspace(fc)) { | |
459 | matchwhitespace: | |
460 | /* Any format whitespace matches any number of input whitespace | |
461 | * characters. The directive can formally contain more than one | |
462 | * whitespace character; for the second and subsequent ones we'll match 0 | |
463 | * characters from the input. */ | |
464 | while(isspace((unsigned char)*buf)) | |
465 | ++buf; | |
466 | } else { | |
467 | matchself: | |
468 | /* Non-% non-whitespace characters must match themselves exactly */ | |
469 | if(fc != (unsigned char)*buf++) | |
470 | return NULL; | |
471 | } | |
472 | } | |
473 | /* When we run out of format string we return a pointer to the rest of the | |
474 | * input. */ | |
475 | return buf; | |
476 | } | |
477 | ||
478 | /** @brief Reimplementation of strptime() | |
479 | * @param buf Input buffer | |
480 | * @param format Format string | |
481 | * @param tm Where to put result | |
482 | * @return Pointer to first unparsed input character, or NULL on error | |
483 | * | |
484 | * Based on <a | |
485 | * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>. | |
486 | */ | |
487 | char *my_strptime(const char *buf, | |
488 | const char *format, | |
489 | struct tm *tm) { | |
490 | /* Whether to overwrite or update is unspecified (rather bizarrely). This | |
491 | * implementation does not overwrites, as xgetdate() depends on this | |
492 | * behavior. */ | |
493 | ||
494 | if(!(buf = my_strptime_guts(buf, format, tm))) | |
495 | return NULL; | |
496 | /* TODO various things we could/should do: | |
497 | * - infer day/month from %j+year | |
498 | * - infer day/month from %U/%W+%w/%a+year | |
499 | * - infer hour from %p+%I | |
500 | * - fill wday/yday from other fields | |
501 | */ | |
502 | return (char *)buf; | |
503 | } | |
504 | ||
505 | /* | |
506 | Local Variables: | |
507 | c-basic-offset:2 | |
508 | comment-column:40 | |
509 | fill-column:79 | |
510 | indent-tabs-mode:nil | |
511 | End: | |
512 | */ |