[disorder] / lib / strptime.c

/* strptime.c - partial strptime() reimplementation
 *
 * Copyright (c) 2008, 2011, 2013 Richard Kettlewell.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
/** @file lib/strptime.c
 * @brief strptime() reimplementation
 *
 * strptime() is here reimplemented because the FreeBSD (and older MacOS) one
 * is broken and does not report errors properly.  See TODO remarks below for
 * some missing bits.
 */

#if HAVE_CONFIG_H
# include <config.h>
#endif

#include <ctype.h>
#include <limits.h>
#include <string.h>
#if HAVE_LANGINFO_H
# include <langinfo.h>
#endif
#include "strptime.h"

#if !HAVE_LANGINFO_H
/* Fake plastic langinfo.  Primarily for Windows.
 * TODO WIN32 can we get these values out of the win32 api instead? */
typedef enum {
  DAY_1,
  DAY_2,
  DAY_3,
  DAY_4,
  DAY_5,
  DAY_6,
  DAY_7,
  ABDAY_1,
  ABDAY_2,
  ABDAY_3,
  ABDAY_4,
  ABDAY_5,
  ABDAY_6,
  ABDAY_7,
  MON_1,
  MON_2,
  MON_3,
  MON_4,
  MON_5,
  MON_6,
  MON_7,
  MON_8,
  MON_9,
  MON_10,
  MON_11,
  MON_12,
  ABMON_1,
  ABMON_2,
  ABMON_3,
  ABMON_4,
  ABMON_5,
  ABMON_6,
  ABMON_7,
  ABMON_8,
  ABMON_9,
  ABMON_10,
  ABMON_11,
  ABMON_12,
  D_FMT,
  T_FMT,
  D_T_FMT,
  ERA_D_FMT,
  ERA_T_FMT,
  ERA_D_T_FMT,
} nl_item;

const char *nl_langinfo(nl_item item) {
  switch(item) {
  case DAY_1: return "Sunday";
  case DAY_2: return "Monday";
  case DAY_3: return "Tuesday";
  case DAY_4: return "Wednesday";
  case DAY_5: return "Thursday";
  case DAY_6: return "Friday";
  case DAY_7: return "Saturday";
  case ABDAY_1: return "Sun";
  case ABDAY_2: return "Mon";
  case ABDAY_3: return "Tue";
  case ABDAY_4: return "Wed";
  case ABDAY_5: return "Thu";
  case ABDAY_6: return "Fri";
  case ABDAY_7: return "Sat";
  case MON_1: return "January";
  case MON_2: return "February";
  case MON_3: return "March";
  case MON_4: return "April";
  case MON_5: return "May";
  case MON_6: return "June";
  case MON_7: return "July";
  case MON_8: return "August";
  case MON_9: return "September";
  case MON_10: return "October";
  case MON_11: return "November";
  case MON_12: return "December";
  case ABMON_1: return "Jan";
  case ABMON_2: return "Feb";
  case ABMON_3: return "Mar";
  case ABMON_4: return "Apr";
  case ABMON_5: return "May";
  case ABMON_6: return "Jun";
  case ABMON_7: return "Jul";
  case ABMON_8: return "Aug";
  case ABMON_9: return "Sep";
  case ABMON_10: return "Oct";
  case ABMON_11: return "Nov";
  case ABMON_12: return "Dec";
  case D_FMT: return "%d/%m/%y";
  case T_FMT: return "%H:%M:%S";
  case D_T_FMT: return "%a %d %b %Y %H:%M:%S %Z";
  case ERA_D_FMT: return "";
  case ERA_T_FMT: return "";
  case ERA_D_T_FMT: return "";
  default: return 0;
  }
}
#endif

/** @brief Lookup table entry for locale-specific strings */
struct locale_item_match {
  /** @brief Locale key to try */
  nl_item key;

  /** @brief Value to return if value of @ref key matches subject string */
  int value;
};

static const struct locale_item_match days[] = {
  { DAY_1, 0 },
  { DAY_2, 1 },
  { DAY_3, 2 },
  { DAY_4, 3 },
  { DAY_5, 4 },
  { DAY_6, 5 },
  { DAY_7, 6 },
  { ABDAY_1, 0 },
  { ABDAY_2, 1 },
  { ABDAY_3, 2 },
  { ABDAY_4, 3 },
  { ABDAY_5, 4 },
  { ABDAY_6, 5 },
  { ABDAY_7, 6 },
  { -1, -1 }
};

static const struct locale_item_match months[] = {
  { MON_1, 1 },
  { MON_2, 2 },
  { MON_3, 3 },
  { MON_4, 4 },
  { MON_5, 5 },
  { MON_6, 6 },
  { MON_7, 7 },
  { MON_8, 8 },
  { MON_9, 9 },
  { MON_10, 10 },
  { MON_11, 11 },
  { MON_12, 12 },
  { ABMON_1, 1 },
  { ABMON_2, 2 },
  { ABMON_3, 3 },
  { ABMON_4, 4 },
  { ABMON_5, 5 },
  { ABMON_6, 6 },
  { ABMON_7, 7 },
  { ABMON_8, 8 },
  { ABMON_9, 9 },
  { ABMON_10, 10 },
  { ABMON_11, 11 },
  { ABMON_12, 12 },
  { -1, -1 },
};

/** @brief Match a string
 * @param buf Start of subject
 * @param limit End of subject
 * @param match String to match subject against
 * @return True if match == [buf,limit) otherwise false
 *
 * The match is case-independent at least in ASCII.
 */
static int try_match(const char *buf,
                     const char *limit,
                     const char *match) {
  /* TODO this won't work well outside single-byte encodings.  A good bet is
   * probably to convert to Unicode and then use utf32_casefold_compat() (or
   * utf8_casefold_compat(); using compatibility matching will ensure missing
   * accents and so on aren't a problem.
   *
   * en_GB and en_US will probably be in any reasonable encoding for them.
   */
  while(buf < limit && *match) {
    if(tolower((unsigned char)*buf) != tolower((unsigned char)*match))
      return 0;
    ++buf;
    ++match;
  }
  if(buf != limit || *match)
    return 0;
  return 1;
}

/** @brief Match from table of locale-specific strings
 * @param buf Start of subject
 * @param limit End of subject
 * @param lim Table of locale lookups
 * @return Looked up value or -1
 *
 * The match is case-independent.
 */
static int try_locale_match(const char *buf,
                            const char *limit,
                            const struct locale_item_match *lim) {
  /* This is not very efficient!  A (correct) built-in implementation will
   * presumably have more direct access to locale information. */
  while(lim->value != -1) {
    if(try_match(buf, limit, nl_langinfo(lim->key)))
      return lim->value;
    ++lim;
  }
  return -1;
}

static int try_numeric_match(const char *buf,
                             const char *limit,
                             unsigned low,
                             unsigned high) {
  unsigned n = 0;

  while(buf < limit) {
    int ch = (unsigned char)*buf++;
    if(ch >= '0' && ch <= '9') {
      if(n > INT_MAX / 10
         || (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0'))
        return -1;                      /* overflow */
      n = 10 * n + ch - '0';
    } else
      return -1;
  }
  if(n < low || n > high)
    return -1;
  return (int)n;
}

static const char *my_strptime_guts(const char *buf,
                                    const char *format,
                                    struct tm *tm) {
  int fc, mod, spec, next, value;
  const char *limit;
  /* nl_langinfo() is allowed to trash its last return value so we copy.
   * (We're relying on it being usable at all in multithreaded environments
   * though.) */
#define USE_SUBFORMAT(ITEM, EITEM, DEF) do {            \
  const char *s;                                        \
  char subformat[128];                                  \
                                                        \
  if(mod == 'E') {                                      \
    s = nl_langinfo(EITEM);                             \
    if(!s || !*s)                                       \
      s = nl_langinfo(ITEM);                            \
  } else                                                \
    s = nl_langinfo(ITEM);                              \
  if(!s || !*s)                                         \
    s = DEF;                                            \
  if(strlen(s) >= sizeof subformat)                     \
    s = DEF;                                            \
  strcpy(subformat, s);                                 \
  if(!(buf = my_strptime_guts(buf, subformat, tm)))     \
    return NULL;                                        \
} while(0)

  while(*format) {
    fc = (unsigned char)*format++;
    if(fc == '%') {
      /* Get the character defining the converstion specification */
      spec = (unsigned char)*format++;
      if(spec == 'E' || spec == 'O') {
        /* Oops, there's a modifier first */
        mod = spec;
        spec = (unsigned char)*format++;
      } else
        mod = 0;
      if(!spec)
        return NULL;                    /* format string broken! */
      /* See what the next directive is.  The specification is written in terms
       * of stopping the match at a character that matches the next directive.
       * This implementation mirrors this aspect of the specification
       * directly. */
      next = (unsigned char)*format;
      if(next) {
        limit = buf;
        if(isspace(next)) {
          /* Next directive is whitespace, so bound the input string (at least)
           * by that */
          while(*limit && !isspace((unsigned char)*limit))
            ++limit;
        } else if(next == '%') {
          /* Prohibited: "The application shall ensure that there is
           * white-space or other non-alphanumeric characters between any two
           * conversion specifications".  In fact we let alphanumerics
           * through.
           *
           * Forbidding even %% seems a bit harsh but is consistent with the
           * specification as written.
           */
          return NULL;
        } else {
          /* Next directive is a specific character, so bound the input string
           * (at least) by that.  This will work badly in the face of multibyte
           * characters, but then the spec is vague about what kind of string
           * we're dealing with anyway so you probably couldn't safely use them
           * in the format string at least in any case. */
          while(*limit && *limit != next)
            ++limit;
        }
      } else
        limit = buf + strlen(buf);
      switch(spec) {
      case 'A': case 'a':               /* day name (abbrev or full) */
        if((value = try_locale_match(buf, limit, days)) == -1)
          return NULL;
        tm->tm_wday = value;
        break;
      case 'B': case 'b': case 'h':     /* month name (abbrev or full) */
        if((value = try_locale_match(buf, limit, months)) == -1)
          return NULL;
        tm->tm_mon = value - 1;
        break;
      case 'c':                         /* locale date+time */
        USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y");
        break;
      case 'C':                         /* century number 0-99 */
        /* TODO  */
        return NULL;
      case 'd': case 'e':               /* day of month 1-31 */
        if((value = try_numeric_match(buf, limit, 1, 31)) == -1)
          return NULL;
        tm->tm_mday = value;
        break;
      case 'D':                         /* == "%m / %d / %y" */
        if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm)))
          return NULL;
        break;
      case 'H':                         /* hour 0-23 */
        if((value = try_numeric_match(buf, limit, 0, 23)) == -1)
          return NULL;
        tm->tm_hour = value;
        break;
      case 'I':                         /* hour 1-12 */
        /* TODO */ 
        return NULL;
      case 'j':                         /* day 1-366 */
        if((value = try_numeric_match(buf, limit, 1, 366)) == -1)
          return NULL;
        tm->tm_yday = value - 1;
        return NULL;
      case 'm':                         /* month 1-12 */
        if((value = try_numeric_match(buf, limit, 1, 12)) == -1)
          return NULL;
        tm->tm_mon = value - 1;
        break;
      case 'M':                         /* minute 0-59 */
        if((value = try_numeric_match(buf, limit, 0, 59)) == -1)
          return NULL;
        tm->tm_min = value;
        break;
      case 'n': case 't':               /* any whitespace */
        goto matchwhitespace;
      case 'p':                         /* locale am/pm */
        /* TODO */
        return NULL;
      case 'r':                         /* == "%I : %M : %S %p" */
        /* TODO actually this is locale-dependent; and we don't implement %I
         * anyway, so it's not going to work even as it stands. */
        if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm)))
          return NULL;
        break;
      case 'R':                         /* == "%H : %M" */
        if(!(buf = my_strptime_guts(buf, "%H : %M", tm)))
          return NULL;
        break;
      case 'S':                         /* seconds 0-60 */
        if((value = try_numeric_match(buf, limit, 0, 60)) == -1)
          return NULL;
        tm->tm_sec = value;
        break;
      case 'U':                         /* week number from Sunday 0-53 */
        /* TODO */
        return NULL;
      case 'w':                         /* day number 0-6 from Sunday */
        if((value = try_numeric_match(buf, limit, 0, 6)) == -1)
          return NULL;
        tm->tm_wday = value;
        break;
      case 'W':                         /* week number from Monday 0-53 */
        /* TODO */ 
        return NULL;
      case 'x':                         /* locale date format */
        USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y");
        break;
      case 'X':                         /* locale time format */
        USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S");
        break;
      case 'y':                         /* year mod 100 */
        if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1)
          return NULL;
        if(value >= 0 && value <= 68)
          value = 2000 + value;
        else if(value >= 69 && value <= 99)
          value = 1900 + value;
        tm->tm_year = value - 1900;
        break;
      case 'Y':                         /* year */
        if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1)
          return NULL;
        tm->tm_year = value - 1900;
        break;
      case '%':
        goto matchself;
      default:
        /* The spec is a bit vague about what to do with invalid format
         * strings.  We return NULL immediately and hope someone will
         * notice. */
        return NULL;
      }
      buf = limit;
    } else if(isspace(fc)) {
    matchwhitespace:
      /* Any format whitespace matches any number of input whitespace
       * characters.  The directive can formally contain more than one
       * whitespace character; for the second and subsequent ones we'll match 0
       * characters from the input. */
      while(isspace((unsigned char)*buf))
        ++buf;
    } else {
    matchself:
      /* Non-% non-whitespace characters must match themselves exactly */
      if(fc != (unsigned char)*buf++)
        return NULL;
    }
  }
  /* When we run out of format string we return a pointer to the rest of the
   * input. */
  return buf;
}

/** @brief Reimplementation of strptime()
 * @param buf Input buffer
 * @param format Format string
 * @param tm Where to put result
 * @return Pointer to first unparsed input character, or NULL on error
 *
 * Based on <a
 * href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
 */
char *my_strptime(const char *buf,
                  const char *format,
                  struct tm *tm) {
  /* Whether to overwrite or update is unspecified (rather bizarrely).  This
   * implementation does not overwrites, as xgetdate() depends on this
   * behavior. */

  if(!(buf = my_strptime_guts(buf, format, tm)))
    return NULL;
  /* TODO various things we could/should do: 
   * - infer day/month from %j+year
   * - infer day/month from %U/%W+%w/%a+year
   * - infer hour from %p+%I
   * - fill wday/yday from other fields
   */
  return (char *)buf;
}

/*
Local Variables:
c-basic-offset:2
comment-column:40
fill-column:79
indent-tabs-mode:nil
End:
*/
Commit	Line	Data
477f956c RK	1	/* strptime.c - partial strptime() reimplementation
477f956c RK	2	*
cca89d7c	3	* Copyright (c) 2008, 2011, 2013 Richard Kettlewell.
477f956c RK	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms, with or without
	7	* modification, are permitted provided that the following conditions
	8	* are met:
	9	* 1. Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* 2. Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* 3. The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	18	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	19	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	20	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	21	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	22	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	23	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	24	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	25	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	26	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	27	* SUCH DAMAGE.
	28	*/
1a164e63 RK	29	/** @file lib/strptime.c
	30	* @brief strptime() reimplementation
	31	*
	32	* strptime() is here reimplemented because the FreeBSD (and older MacOS) one
477f956c	33	* is broken and does not report errors properly. See TODO remarks below for
1a164e63 RK	34	* some missing bits.
1a164e63 RK	35	*/
477f956c	36
cca89d7c RK	37	#if HAVE_CONFIG_H
	38	# include <config.h>
	39	#endif
	40
477f956c RK	41	#include <ctype.h>
	42	#include <limits.h>
	43	#include <string.h>
cca89d7c RK	44	#if HAVE_LANGINFO_H
	45	# include <langinfo.h>
	46	#endif
477f956c RK	47	#include "strptime.h"
477f956c RK	48
cca89d7c RK	49	#if !HAVE_LANGINFO_H
	50	/* Fake plastic langinfo. Primarily for Windows.
	51	* TODO WIN32 can we get these values out of the win32 api instead? */
	52	typedef enum {
	53	DAY_1,
	54	DAY_2,
	55	DAY_3,
	56	DAY_4,
	57	DAY_5,
	58	DAY_6,
	59	DAY_7,
	60	ABDAY_1,
	61	ABDAY_2,
	62	ABDAY_3,
	63	ABDAY_4,
	64	ABDAY_5,
	65	ABDAY_6,
	66	ABDAY_7,
	67	MON_1,
	68	MON_2,
	69	MON_3,
	70	MON_4,
	71	MON_5,
	72	MON_6,
	73	MON_7,
	74	MON_8,
	75	MON_9,
	76	MON_10,
	77	MON_11,
	78	MON_12,
	79	ABMON_1,
	80	ABMON_2,
	81	ABMON_3,
	82	ABMON_4,
	83	ABMON_5,
	84	ABMON_6,
	85	ABMON_7,
	86	ABMON_8,
	87	ABMON_9,
	88	ABMON_10,
	89	ABMON_11,
	90	ABMON_12,
	91	D_FMT,
	92	T_FMT,
	93	D_T_FMT,
	94	ERA_D_FMT,
	95	ERA_T_FMT,
	96	ERA_D_T_FMT,
	97	} nl_item;
	98
	99	const char *nl_langinfo(nl_item item) {
	100	switch(item) {
	101	case DAY_1: return "Sunday";
	102	case DAY_2: return "Monday";
	103	case DAY_3: return "Tuesday";
	104	case DAY_4: return "Wednesday";
	105	case DAY_5: return "Thursday";
	106	case DAY_6: return "Friday";
	107	case DAY_7: return "Saturday";
	108	case ABDAY_1: return "Sun";
	109	case ABDAY_2: return "Mon";
	110	case ABDAY_3: return "Tue";
	111	case ABDAY_4: return "Wed";
	112	case ABDAY_5: return "Thu";
113	case ABDAY_6: return "Fri";
114	case ABDAY_7: return "Sat";
115	case MON_1: return "January";
116	case MON_2: return "February";
117	case MON_3: return "March";
118	case MON_4: return "April";
119	case MON_5: return "May";
120	case MON_6: return "June";
121	case MON_7: return "July";
122	case MON_8: return "August";
123	case MON_9: return "September";
124	case MON_10: return "October";
125	case MON_11: return "November";
126	case MON_12: return "December";
127	case ABMON_1: return "Jan";
128	case ABMON_2: return "Feb";
129	case ABMON_3: return "Mar";
130	case ABMON_4: return "Apr";
131	case ABMON_5: return "May";
132	case ABMON_6: return "Jun";
133	case ABMON_7: return "Jul";
134	case ABMON_8: return "Aug";
135	case ABMON_9: return "Sep";
136	case ABMON_10: return "Oct";
137	case ABMON_11: return "Nov";
138	case ABMON_12: return "Dec";
139	case D_FMT: return "%d/%m/%y";
140	case T_FMT: return "%H:%M:%S";
141	case D_T_FMT: return "%a %d %b %Y %H:%M:%S %Z";
142	case ERA_D_FMT: return "";
143	case ERA_T_FMT: return "";
144	case ERA_D_T_FMT: return "";
145	default: return 0;
146	}
147	}
148	#endif
149
598b07b7	150	/** @brief Lookup table entry for locale-specific strings */
477f956c	151	struct locale_item_match {
598b07b7	152	/** @brief Locale key to try */
477f956c	153	nl_item key;
598b07b7 RK	154
598b07b7 RK	155	/** @brief Value to return if value of @ref key matches subject string */
477f956c RK	156	int value;
	157	};
	158
	159	static const struct locale_item_match days[] = {
	160	{ DAY_1, 0 },
	161	{ DAY_2, 1 },
	162	{ DAY_3, 2 },
	163	{ DAY_4, 3 },
	164	{ DAY_5, 4 },
	165	{ DAY_6, 5 },
	166	{ DAY_7, 6 },
	167	{ ABDAY_1, 0 },
	168	{ ABDAY_2, 1 },
	169	{ ABDAY_3, 2 },
	170	{ ABDAY_4, 3 },
	171	{ ABDAY_5, 4 },
	172	{ ABDAY_6, 5 },
	173	{ ABDAY_7, 6 },
	174	{ -1, -1 }
	175	};
	176
	177	static const struct locale_item_match months[] = {
	178	{ MON_1, 1 },
	179	{ MON_2, 2 },
	180	{ MON_3, 3 },
	181	{ MON_4, 4 },
	182	{ MON_5, 5 },
	183	{ MON_6, 6 },
	184	{ MON_7, 7 },
	185	{ MON_8, 8 },
	186	{ MON_9, 9 },
	187	{ MON_10, 10 },
	188	{ MON_11, 11 },
	189	{ MON_12, 12 },
	190	{ ABMON_1, 1 },
	191	{ ABMON_2, 2 },
	192	{ ABMON_3, 3 },
	193	{ ABMON_4, 4 },
	194	{ ABMON_5, 5 },
	195	{ ABMON_6, 6 },
	196	{ ABMON_7, 7 },
	197	{ ABMON_8, 8 },
	198	{ ABMON_9, 9 },
	199	{ ABMON_10, 10 },
	200	{ ABMON_11, 11 },
	201	{ ABMON_12, 12 },
	202	{ -1, -1 },
	203	};
	204
	205	/** @brief Match a string
	206	* @param buf Start of subject
	207	* @param limit End of subject
	208	* @param match String to match subject against
	209	* @return True if match == [buf,limit) otherwise false
	210	*
	211	* The match is case-independent at least in ASCII.
	212	*/
	213	static int try_match(const char *buf,
	214	const char *limit,
	215	const char *match) {
	216	/* TODO this won't work well outside single-byte encodings. A good bet is
	217	* probably to convert to Unicode and then use utf32_casefold_compat() (or
	218	* utf8_casefold_compat(); using compatibility matching will ensure missing
	219	* accents and so on aren't a problem.
220	*
221	* en_GB and en_US will probably be in any reasonable encoding for them.
222	*/
223	while(buf < limit && *match) {
224	if(tolower((unsigned char)buf) != tolower((unsigned char)match))
225	return 0;
226	++buf;
227	++match;
228	}
229	if(buf != limit \|\| *match)
230	return 0;
231	return 1;
232	}
233
234	/** @brief Match from table of locale-specific strings
235	* @param buf Start of subject
236	* @param limit End of subject
237	* @param lim Table of locale lookups
238	* @return Looked up value or -1
239	*
240	* The match is case-independent.
241	*/
242	static int try_locale_match(const char *buf,
243	const char *limit,
244	const struct locale_item_match *lim) {
245	/* This is not very efficient! A (correct) built-in implementation will
246	* presumably have more direct access to locale information. */
247	while(lim->value != -1) {
248	if(try_match(buf, limit, nl_langinfo(lim->key)))
249	return lim->value;
250	++lim;
251	}
252	return -1;
253	}
254
255	static int try_numeric_match(const char *buf,
256	const char *limit,
257	unsigned low,
258	unsigned high) {
259	unsigned n = 0;
260
261	while(buf < limit) {
262	int ch = (unsigned char)*buf++;
263	if(ch >= '0' && ch <= '9') {
264	if(n > INT_MAX / 10
265	\|\| (n == INT_MAX / 10 && ch >= INT_MAX % 10 + '0'))
266	return -1; /* overflow */
267	n = 10 * n + ch - '0';
268	} else
269	return -1;
270	}
271	if(n < low \|\| n > high)
272	return -1;
273	return (int)n;
274	}
275
276	static const char my_strptime_guts(const char buf,
277	const char *format,
278	struct tm *tm) {
279	int fc, mod, spec, next, value;
280	const char *limit;
281	/* nl_langinfo() is allowed to trash its last return value so we copy.
282	* (We're relying on it being usable at all in multithreaded environments
283	* though.) */
284	#define USE_SUBFORMAT(ITEM, EITEM, DEF) do { \
285	const char *s; \
286	char subformat[128]; \
287	\
288	if(mod == 'E') { \
289	s = nl_langinfo(EITEM); \
290	if(!s \|\| !*s) \
291	s = nl_langinfo(ITEM); \
292	} else \
293	s = nl_langinfo(ITEM); \
294	if(!s \|\| !*s) \
295	s = DEF; \
296	if(strlen(s) >= sizeof subformat) \
297	s = DEF; \
298	strcpy(subformat, s); \
299	if(!(buf = my_strptime_guts(buf, subformat, tm))) \
300	return NULL; \
301	} while(0)
302
303	while(*format) {
304	fc = (unsigned char)*format++;
305	if(fc == '%') {
306	/* Get the character defining the converstion specification */
307	spec = (unsigned char)*format++;
308	if(spec == 'E' \|\| spec == 'O') {
309	/* Oops, there's a modifier first */
310	mod = spec;
311	spec = (unsigned char)*format++;
312	} else
313	mod = 0;
314	if(!spec)
315	return NULL; /* format string broken! */
316	/* See what the next directive is. The specification is written in terms
317	* of stopping the match at a character that matches the next directive.
318	* This implementation mirrors this aspect of the specification
319	* directly. */
320	next = (unsigned char)*format;
321	if(next) {
322	limit = buf;
323	if(isspace(next)) {
324	/* Next directive is whitespace, so bound the input string (at least)
325	* by that */
326	while(limit && !isspace((unsigned char)limit))
327	++limit;
328	} else if(next == '%') {
329	/* Prohibited: "The application shall ensure that there is
330	* white-space or other non-alphanumeric characters between any two
331	* conversion specifications". In fact we let alphanumerics
332	* through.
333	*
334	* Forbidding even %% seems a bit harsh but is consistent with the
335	* specification as written.
336	*/
337	return NULL;
338	} else {
339	/* Next directive is a specific character, so bound the input string
340	* (at least) by that. This will work badly in the face of multibyte
341	* characters, but then the spec is vague about what kind of string
342	* we're dealing with anyway so you probably couldn't safely use them
343	* in the format string at least in any case. */
344	while(limit && limit != next)
345	++limit;
346	}
347	} else
348	limit = buf + strlen(buf);
349	switch(spec) {
350	case 'A': case 'a': /* day name (abbrev or full) */
351	if((value = try_locale_match(buf, limit, days)) == -1)
352	return NULL;
353	tm->tm_wday = value;
354	break;
355	case 'B': case 'b': case 'h': /* month name (abbrev or full) */
356	if((value = try_locale_match(buf, limit, months)) == -1)
357	return NULL;
358	tm->tm_mon = value - 1;
359	break;
360	case 'c': /* locale date+time */
361	USE_SUBFORMAT(D_T_FMT, ERA_D_T_FMT, "%a %b %e %H:%M:%S %Y");
362	break;
363	case 'C': /* century number 0-99 */
364	/* TODO */
365	return NULL;
366	case 'd': case 'e': /* day of month 1-31 */
367	if((value = try_numeric_match(buf, limit, 1, 31)) == -1)
368	return NULL;
369	tm->tm_mday = value;
370	break;
371	case 'D': /* == "%m / %d / %y" */
372	if(!(buf = my_strptime_guts(buf, "%m / %d / %y", tm)))
373	return NULL;
374	break;
375	case 'H': /* hour 0-23 */
376	if((value = try_numeric_match(buf, limit, 0, 23)) == -1)
377	return NULL;
378	tm->tm_hour = value;
379	break;
380	case 'I': /* hour 1-12 */
381	/* TODO */
382	return NULL;
383	case 'j': /* day 1-366 */
384	if((value = try_numeric_match(buf, limit, 1, 366)) == -1)
385	return NULL;
386	tm->tm_yday = value - 1;
387	return NULL;
388	case 'm': /* month 1-12 */
389	if((value = try_numeric_match(buf, limit, 1, 12)) == -1)
390	return NULL;
391	tm->tm_mon = value - 1;
392	break;
393	case 'M': /* minute 0-59 */
394	if((value = try_numeric_match(buf, limit, 0, 59)) == -1)
395	return NULL;
396	tm->tm_min = value;
397	break;
398	case 'n': case 't': /* any whitespace */
399	goto matchwhitespace;
400	case 'p': /* locale am/pm */
401	/* TODO */
402	return NULL;
403	case 'r': /* == "%I : %M : %S %p" */
404	/* TODO actually this is locale-dependent; and we don't implement %I
405	* anyway, so it's not going to work even as it stands. */
406	if(!(buf = my_strptime_guts(buf, "%I : %M : %S %p", tm)))
407	return NULL;
408	break;
409	case 'R': /* == "%H : %M" */
410	if(!(buf = my_strptime_guts(buf, "%H : %M", tm)))
411	return NULL;
412	break;
413	case 'S': /* seconds 0-60 */
414	if((value = try_numeric_match(buf, limit, 0, 60)) == -1)
415	return NULL;
416	tm->tm_sec = value;
417	break;
418	case 'U': /* week number from Sunday 0-53 */
419	/* TODO */
420	return NULL;
421	case 'w': /* day number 0-6 from Sunday */
422	if((value = try_numeric_match(buf, limit, 0, 6)) == -1)
423	return NULL;
424	tm->tm_wday = value;
425	break;
426	case 'W': /* week number from Monday 0-53 */
427	/* TODO */
428	return NULL;
429	case 'x': /* locale date format */
430	USE_SUBFORMAT(D_FMT, ERA_D_FMT, "%m/%d/%y");
431	break;
432	case 'X': /* locale time format */
433	USE_SUBFORMAT(T_FMT, ERA_T_FMT, "%H:%M:%S");
434	break;
435	case 'y': /* year mod 100 */
436	if((value = try_numeric_match(buf, limit, 0, INT_MAX)) == -1)
437	return NULL;
438	if(value >= 0 && value <= 68)
439	value = 2000 + value;
440	else if(value >= 69 && value <= 99)
441	value = 1900 + value;
442	tm->tm_year = value - 1900;
443	break;
444	case 'Y': /* year */
445	if((value = try_numeric_match(buf, limit, 1, INT_MAX)) == -1)
446	return NULL;
447	tm->tm_year = value - 1900;
448	break;
449	case '%':
450	goto matchself;
451	default:
452	/* The spec is a bit vague about what to do with invalid format
453	* strings. We return NULL immediately and hope someone will
454	* notice. */
455	return NULL;
456	}
457	buf = limit;
458	} else if(isspace(fc)) {
459	matchwhitespace:
460	/* Any format whitespace matches any number of input whitespace
461	* characters. The directive can formally contain more than one
462	* whitespace character; for the second and subsequent ones we'll match 0
463	* characters from the input. */
464	while(isspace((unsigned char)*buf))
465	++buf;
466	} else {
467	matchself:
468	/* Non-% non-whitespace characters must match themselves exactly */
469	if(fc != (unsigned char)*buf++)
470	return NULL;
471	}
472	}
473	/* When we run out of format string we return a pointer to the rest of the
474	* input. */
475	return buf;
476	}
477
478	/** @brief Reimplementation of strptime()
479	* @param buf Input buffer
480	* @param format Format string
481	* @param tm Where to put result
482	* @return Pointer to first unparsed input character, or NULL on error
483	*
484	* Based on <a
485	* href="http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html">http://www.opengroup.org/onlinepubs/009695399/functions/strptime.html</a>.
486	*/
487	char my_strptime(const char buf,
488	const char *format,
489	struct tm *tm) {
490	/* Whether to overwrite or update is unspecified (rather bizarrely). This
491	* implementation does not overwrites, as xgetdate() depends on this
492	* behavior. */
493
494	if(!(buf = my_strptime_guts(buf, format, tm)))
495	return NULL;
496	/* TODO various things we could/should do:
497	* - infer day/month from %j+year
498	* - infer day/month from %U/%W+%w/%a+year
499	* - infer hour from %p+%I
500	* - fill wday/yday from other fields
501	*/
502	return (char *)buf;
503	}
504
505	/*
506	Local Variables:
507	c-basic-offset:2
508	comment-column:40
509	fill-column:79
510	indent-tabs-mode:nil
511	End:
512	*/