Commit | Line | Data |
---|---|---|
477f956c RK |
1 | /* strptime.c - partial strptime() reimplementation |
2 | * | |
3 | * (c) 2008 Richard Kettlewell. | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * 1. Redistributions of source code must retain the above copyright | |
10 | * notice, this list of conditions and the following disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the distribution. | |
14 | * 3. The name of the author may not be used to endorse or promote products | |
15 | * derived from this software without specific prior written permission. | |
16 | * | |
17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
27 | * SUCH DAMAGE. | |
28 | */ | |
1a164e63 RK |
29 | /** @file lib/strptime.c |
30 | * @brief strptime() reimplementation | |
31 | * | |
32 | * strptime() is here reimplemented because the FreeBSD (and older MacOS) one | |
477f956c | 33 | * is broken and does not report errors properly. See TODO remarks below for |
1a164e63 RK |
34 | * some missing bits. |
35 | */ | |
477f956c RK |
36 | |
37 | #include <ctype.h> | |
38 | #include <limits.h> | |
39 | #include <string.h> | |
40 | #include <langinfo.h> | |
41 | #include "strptime.h" | |
42 | ||
598b07b7 | 43 | /** @brief Lookup table entry for locale-specific strings */ |
477f956c | 44 | struct locale_item_match { |
598b07b7 | 45 | /** @brief Locale key to try */ |
477f956c | 46 | nl_item key; |
598b07b7 RK |
47 | |
48 | /** @brief Value to return if value of @ref key matches subject string */ | |
477f956c RK |
49 | int value; |
50 | }; | |
51 | ||
52 | static const struct locale_item_match days[] = { | |
53 | { DAY_1, 0 }, | |
54 | { DAY_2, 1 }, | |
55 | { DAY_3, 2 }, | |
56 | { DAY_4, 3 }, | |
57 | { DAY_5, 4 }, | |
58 | { DAY_6, 5 }, | |
59 | { DAY_7, 6 }, | |
60 | { ABDAY_1, 0 }, | |
61 | { ABDAY_2, 1 }, | |
62 | { ABDAY_3, 2 }, | |
63 | { ABDAY_4, 3 }, | |
64 | { ABDAY_5, 4 }, | |
65 | { ABDAY_6, 5 }, | |
66 | { ABDAY_7, 6 }, | |
67 | { -1, -1 } | |
68 | }; | |
69 | ||
70 | static const struct locale_item_match months[] = { | |
71 | { MON_1, 1 }, | |
72 | { MON_2, 2 }, | |
73 | { MON_3, 3 }, | |
74 | { MON_4, 4 }, | |
75 | { MON_5, 5 }, | |
76 | { MON_6, 6 }, | |
77 | { MON_7, 7 }, | |
78 | { MON_8, 8 }, | |
79 | { MON_9, 9 }, | |
80 | { MON_10, 10 }, | |
81 | { MON_11, 11 }, | |
82 | { MON_12, 12 }, | |
83 | { ABMON_1, 1 }, | |
84 | { ABMON_2, 2 }, | |
85 | { ABMON_3, 3 }, | |
86 | { ABMON_4, 4 }, | |
87 | { ABMON_5, 5 }, | |
88 | { ABMON_6, 6 }, | |
89 | { ABMON_7, 7 }, | |
90 | { ABMON_8, 8 }, | |
91 | { ABMON_9, 9 }, | |
92 | { ABMON_10, 10 }, | |
93 | { ABMON_11, 11 }, | |
94 | { ABMON_12, 12 }, | |
95 | { -1, -1 }, | |
96 | }; | |
97 | ||
98 | /** @brief Match a string | |
99 | * @param buf Start of subject | |
100 | * @param limit End of subject | |
101 | * @param match String to match subject against | |
102 | * @return True if match == [buf,limit) otherwise false | |
103 | * | |
104 | * The match is case-independent at least in ASCII. | |
105 | */ | |
106 | static int try_match(const char *buf, | |
107 | const char *limit, | |
108 | const char *match) { | |
109 | /* TODO this won't work well outside single-byte encodings. A good bet is | |
110 | * probably to convert to Unicode and then use utf32_casefold_compat() (or | |
111 | * utf8_casefold_compat(); using compatibility matching will ensure missing | |
112 | * accents and so on aren't a problem. | |
113 | * | |
114 | * en_GB and en_US will probably be in any reasonable encoding for them. | |
115 | */ | |
116 | while(buf < limit && *match) { | |
117 | if(tolower((unsigned char)*buf) != tolower((unsigned char)*match)) | |
118 | return 0; | |
119 | ++buf; | |
120 | ++match; | |
121 | } | |
122 | if(buf != limit || *match) | |
123 | return 0; | |
124 | return 1; | |
125 | } | |
126 | ||
127 | /** @brief Match from table of locale-specific strings | |
128 | * @param buf Start of subject | |
129 | * @param limit End of subject | |
130 | * @param lim Table of locale lookups | |
131 | * @return Looked up value or -1 | |
132 | * | |
133 | * The match is case-independent. | |
134 | */ | |
135 | static int try_locale_match(const char *buf, | |
136 | const char *limit, | |
137 | const struct locale_item_match *lim) { | |
138 | /* This is not very efficient! A (correct) built-in implementation will | |
139 | * presumably have more direct access to locale information. */ | |
140 | while(lim->value != -1) { | |
141 | if(try_match(buf, limit, nl_langinfo(lim->key))) | |
142 | return lim->value; | |
143 | ++lim; | |
144 | } | |
145 | return -1; | |
146 | } | |
147 | ||
148 | static int try_numeric_match(const char *buf, | |
149 | const char *limit, | |
150 | unsigned low, | |
151 | unsigned high) { | |
152 | unsigned n = 0; | |
153 | ||
154 | while(buf < limit) { | |
155 | int ch = (unsigned char)*buf++; | |
156 | if(ch >= '0' && ch <= '9') { | |
157 | if(n > INT_MAX / 10 | |
158 | || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0')) | |
159 | return -1; /* overflow */ | |
160 | n = 10 * n + ch - '0'; | |
161 | } else | |
162 | return -1; | |
163 | } | |
164 | if(n < low || n > high) | |
165 | return -1; | |
166 | return (int)n; | |
167 | } | |
168 | ||
169 | static const char *my_strptime_guts(const char *buf, | |
170 | const char *format, | |
171 | struct tm *tm) { | |
172 | int fc, mod, spec, next, value; | |
173 | const char *limit; | |
174 | /* nl_langinfo() is allowed to trash its last return value so we copy. | |
175 | * (We're relying on it being usable at all in multithreaded environments | |
176 | * though.) */ | |
177 | #define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \ | |
178 | const char *s; \ | |
179 | char subformat[128]; \ | |
180 | \ | |
181 | if(mod == 'E') { \ | |
182 | s = nl_langinfo(EITEM); \ | |
183 | if(!s || !*s) \ | |
184 | s = nl_langinfo(ITEM); \ | |
185 | } else \ | |
186 | s = nl_langinfo(ITEM); \ | |
187 | if(!s || !*s) \ | |
188 | s = DEF; \ | |
189 | if(strlen(s) >= sizeof subformat) \ | |
190 | s = DEF; \ | |
191 | strcpy(subformat, s); \ | |
192 | if(!(buf = my_strptime_guts(buf, subformat, tm))) \ | |
193 | return NULL; \ | |
194 | } while(0) | |
195 | ||
196 | while(*format) { | |
197 | fc = (unsigned char)*format++; | |
198 | if(fc == '%') { | |
199 | /* Get the character defining the converstion specification */ | |
200 | spec = (unsigned char)*format++; | |
201 | if(spec == 'E' || spec == 'O') { | |
202 | /* Oops, there's a modifier first */ | |
203 | mod = spec; | |
204 | spec = (unsigned char)*format++; | |
205 | } else | |
206 | mod = 0; | |
207 | if(!spec) | |
208 | return NULL; /* format string broken! */ | |
209 | /* See what the next directive is. The specification is written in terms | |
210 | * of stopping the match at a character that matches the next directive. | |
211 | * This implementation mirrors this aspect of the specification | |
212 | * directly. */ | |
213 | next = (unsigned char)*format; | |
214 | if(next) { | |
215 | limit = buf; | |
216 | if(isspace(next)) { | |
217 | /* Next directive is whitespace, so bound the input string (at least) | |
218 | * by that */ | |
219 | while(*limit && !isspace((unsigned char)*limit)) | |
220 | ++limit; | |
221 | } else if(next == '%') { | |
222 | /* Prohibited: "The application shall ensure that there is | |
223 | * white-space or other non-alphanumeric characters between any two | |
224 | * conversion specifications". In fact we let alphanumerics | |
225 | * through. | |
226 | * | |
227 | * Forbidding even %% seems a bit harsh but is consistent with the | |
228 | * specification as written. | |
229 | */ | |
230 | return NULL; | |
231 | } else { | |
232 | /* Next directive is a specific character, so bound the input string | |
233 | * (at least) by that. This will work badly in the face of multibyte | |
234 | * characters, but then the spec is vague about what kind of string | |
235 | * we're dealing with anyway so you probably couldn't safely use them | |
236 | * in the format string at least in any case. */ | |
237 | while(*limit && *limit != next) | |
238 | ++limit; | |
239 | } | |
240 | } else | |
241 | limit = buf + strlen(buf); | |
242 | switch(spec) { | |
243 | case 'A': case 'a': /* day name (abbrev or full) */ | |
244 | if((value = try_locale_match(buf, limit, days)) == -1) | |
245 | return NULL; | |
246 | tm->tm_wday = value; | |
247 | break; | |
248 | case 'B': case 'b': case 'h': /* month name (abbrev or full) */ | |
249 | if((value = try_locale_match(buf, limit, months)) == -1) | |
250 | return NULL; | |
251 | tm->tm_mon = value - 1; | |
252 | break; | |
253 | case 'c': /* locale date+time */ | |
254 | USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y"); | |
255 | break; | |
256 | case 'C': /* century number 0-99 */ | |
257 | /* TODO */ | |
258 | return NULL; | |
259 | case 'd': case 'e': /* day of month 1-31 */ | |
260 | if((value = try_numeric_match(buf, limit, 1, 31)) == -1) | |
261 | return NULL; | |
262 | tm->tm_mday = value; | |
263 | break; | |
264 | case 'D': /* == "%m / %d / %y" */ | |
265 | if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm))) | |
266 | return NULL; | |
267 | break; | |
268 | case 'H': /* hour 0-23 */ | |
269 | if((value = try_numeric_match(buf, limit, 0, 23)) == -1) | |
270 | return NULL; | |
271 | tm->tm_hour = value; | |
272 | break; | |
273 | case 'I': /* hour 1-12 */ | |
274 | /* TODO */ | |
275 | return NULL; | |
276 | case 'j': /* day 1-366 */ | |
277 | if((value = try_numeric_match(buf, limit, 1, 366)) == -1) | |
278 | return NULL; | |
279 | tm->tm_yday = value - 1; | |
280 | return NULL; | |
281 | case 'm': /* month 1-12 */ | |
282 | if((value = try_numeric_match(buf, limit, 1, 12)) == -1) | |
283 | return NULL; | |
284 | tm->tm_mon = value - 1; | |
285 | break; | |
286 | case 'M': /* minute 0-59 */ | |
287 | if((value = try_numeric_match(buf, limit, 0, 59)) == -1) | |
288 | return NULL; | |
289 | tm->tm_min = value; | |
290 | break; | |
291 | case 'n': case 't': /* any whitespace */ | |
292 | goto matchwhitespace; | |
293 | case 'p': /* locale am/pm */ | |
294 | /* TODO */ | |
295 | return NULL; | |
296 | case 'r': /* == "%I : %M : %S %p" */ | |
297 | /* TODO actually this is locale-dependent; and we don't implement %I | |
298 | * anyway, so it's not going to work even as it stands. */ | |
299 | if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm))) | |
300 | return NULL; | |
301 | break; | |
302 | case 'R': /* == "%H : %M" */ | |
303 | if(!(buf = my_strptime_guts(buf, "%H : %M", tm))) | |
304 | return NULL; | |
305 | break; | |
306 | case 'S': /* seconds 0-60 */ | |
307 | if((value = try_numeric_match(buf, limit, 0, 60)) == -1) | |
308 | return NULL; | |
309 | tm->tm_sec = value; | |
310 | break; | |
311 | case 'U': /* week number from Sunday 0-53 */ | |
312 | /* TODO */ | |
313 | return NULL; | |
314 | case 'w': /* day number 0-6 from Sunday */ | |
315 | if((value = try_numeric_match(buf, limit, 0, 6)) == -1) | |
316 | return NULL; | |
317 | tm->tm_wday = value; | |
318 | break; | |
319 | case 'W': /* week number from Monday 0-53 */ | |
320 | /* TODO */ | |
321 | return NULL; | |
322 | case 'x': /* locale date format */ | |
323 | USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y"); | |
324 | break; | |
325 | case 'X': /* locale time format */ | |
326 | USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S"); | |
327 | break; | |
328 | case 'y': /* year mod 100 */ | |
329 | if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1) | |
330 | return NULL; | |
331 | if(value >= 0 && value <= 68) | |
332 | value = 2000 + value; | |
333 | else if(value >= 69 && value <= 99) | |
334 | value = 1900 + value; | |
335 | tm->tm_year = value - 1900; | |
336 | break; | |
337 | case 'Y': /* year */ | |
338 | if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1) | |
339 | return NULL; | |
340 | tm->tm_year = value - 1900; | |
341 | break; | |
342 | case '%': | |
343 | goto matchself; | |
344 | default: | |
345 | /* The spec is a bit vague about what to do with invalid format | |
346 | * strings. We return NULL immediately and hope someone will | |
347 | * notice. */ | |
348 | return NULL; | |
349 | } | |
350 | buf = limit; | |
351 | } else if(isspace(fc)) { | |
352 | matchwhitespace: | |
353 | /* Any format whitespace matches any number of input whitespace | |
354 | * characters. The directive can formally contain more than one | |
355 | * whitespace character; for the second and subsequent ones we'll match 0 | |
356 | * characters from the input. */ | |
357 | while(isspace((unsigned char)*buf)) | |
358 | ++buf; | |
359 | } else { | |
360 | matchself: | |
361 | /* Non-% non-whitespace characters must match themselves exactly */ | |
362 | if(fc != (unsigned char)*buf++) | |
363 | return NULL; | |
364 | } | |
365 | } | |
366 | /* When we run out of format string we return a pointer to the rest of the | |
367 | * input. */ | |
368 | return buf; | |
369 | } | |
370 | ||
371 | /** @brief Reimplementation of strptime() | |
372 | * @param buf Input buffer | |
373 | * @param format Format string | |
374 | * @param tm Where to put result | |
375 | * @return Pointer to first unparsed input character, or NULL on error | |
376 | * | |
377 | * Based on <a | |
378 | * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>. | |
379 | */ | |
380 | char *my_strptime(const char *buf, | |
381 | const char *format, | |
382 | struct tm *tm) { | |
383 | /* Whether to overwrite or update is unspecified (rather bizarrely). This | |
384 | * implementation does not overwrites, as xgetdate() depends on this | |
385 | * behavior. */ | |
386 | ||
387 | if(!(buf = my_strptime_guts(buf, format, tm))) | |
388 | return NULL; | |
389 | /* TODO various things we could/should do: | |
390 | * - infer day/month from %j+year | |
391 | * - infer day/month from %U/%W+%w/%a+year | |
392 | * - infer hour from %p+%I | |
393 | * - fill wday/yday from other fields | |
394 | */ | |
395 | return (char *)buf; | |
396 | } | |
397 | ||
398 | /* | |
399 | Local Variables: | |
400 | c-basic-offset:2 | |
401 | comment-column:40 | |
402 | fill-column:79 | |
403 | indent-tabs-mode:nil | |
404 | End: | |
405 | */ |