From 7e976207d0b19baf112b216d64a8cf6a8af1069f Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 22 Apr 2004 17:27:05 +0000 Subject: [PATCH] Support the locale-supplied character set where appropriate. It's used for converting command-line -C directives into Unicode; it's used for outputting Unicode strings to stderr in error messages; and it's used as the default character set for input files (although I'd be inclined to recommend everyone use \cfg{input-charset} in all their source files to ensure their portability). git-svn-id: svn://svn.tartarus.org/sgt/halibut@4114 cda61777-01e9-0310-a592-d414129be87e --- error.c | 26 ++++++++++++++++---------- halibut.h | 8 ++------ index.c | 2 +- main.c | 5 ++++- misc.c | 2 +- ustring.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 88 insertions(+), 19 deletions(-) diff --git a/error.c b/error.c index abd8d61..42eea37 100644 --- a/error.c +++ b/error.c @@ -15,7 +15,6 @@ static void do_error(int code, va_list ap) { char error[1024]; - char auxbuf[256]; char c; char *sp, *sp2; wchar_t *wsp; @@ -82,18 +81,20 @@ static void do_error(int code, va_list ap) { break; case err_badparatype: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); fpos = *va_arg(ap, filepos *); sprintf(error, "command `%.200s' unrecognised at start of" " paragraph", sp); flags = FILEPOS; + sfree(sp); break; case err_badmidcmd: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); fpos = *va_arg(ap, filepos *); sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp); flags = FILEPOS; + sfree(sp); break; case err_unexbrace: fpos = *va_arg(ap, filepos *); @@ -138,23 +139,26 @@ static void do_error(int code, va_list ap) { case err_nosuchkw: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "unable to resolve cross-reference to `%.200s'", sp); flags = FILEPOS; + sfree(sp); break; case err_multiBR: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp); flags = FILEPOS; + sfree(sp); break; case err_nosuchidxtag: + fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp); - flags = 0; - /* FIXME: need to get a filepos to here somehow */ + sfree(sp); + flags = FILEPOS; break; case err_cantopenw: sp = va_arg(ap, char *); @@ -164,9 +168,10 @@ static void do_error(int code, va_list ap) { case err_macroexists: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "macro `%.200s' already defined", sp); flags = FILEPOS; + sfree(sp); break; case err_sectjump: fpos = *va_arg(ap, filepos *); @@ -185,10 +190,11 @@ static void do_error(int code, va_list ap) { fpos = *va_arg(ap, filepos *); fpos2 = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "paragraph keyword `%.200s' already defined at ", sp); sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line); flags = FILEPOS; + sfree(sp); break; case err_misplacedlcont: fpos = *va_arg(ap, filepos *); diff --git a/halibut.h b/halibut.h index 33780ea..69953b6 100644 --- a/halibut.h +++ b/halibut.h @@ -27,12 +27,6 @@ #include "tree234.h" /* - * FIXME: Charset temporary workarounds - */ -#define CS_FIXME CS_ISO8859_1 -#define CS_LOCAL CS_ISO8859_1 - -/* * Structure tags */ typedef struct input_Tag input; @@ -282,6 +276,8 @@ char *utoa_dup(wchar_t const *s, int charset); char *utoa_dup_len(wchar_t const *s, int charset, int *len); char *utoa_careful_dup(wchar_t const *s, int charset); wchar_t *ufroma_dup(char const *s, int charset); +char *utoa_locale_dup(wchar_t const *s); +wchar_t *ufroma_locale_dup(char const *s); int ustrlen(wchar_t const *s); wchar_t *uadv(wchar_t *s); wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source); diff --git a/index.c b/index.c index 9850750..9a2d9df 100644 --- a/index.c +++ b/index.c @@ -99,7 +99,7 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text, * warn (and drop it, since it won't be referenced). */ if (is_explicit) { - error(err_nosuchidxtag, tags); + error(err_nosuchidxtag, fpos, tags); continue; } diff --git a/main.c b/main.c index f486a3d..61b37f7 100644 --- a/main.c +++ b/main.c @@ -3,6 +3,7 @@ */ #include +#include #include #include #include "halibut.h" @@ -48,6 +49,8 @@ int main(int argc, char **argv) { paragraph *cfg, *cfg_tail; void *pre_backend_data[16]; + setlocale(LC_ALL, ""); + /* * Set up initial (default) parameters. */ @@ -260,7 +263,7 @@ int main(int argc, char **argv) { in.pushback = NULL; in.reportcols = reportcols; in.stack = NULL; - in.defcharset = CS_ASCII; + in.defcharset = locale_charset(); idx = make_index(); diff --git a/misc.c b/misc.c index 304cb1f..647d642 100644 --- a/misc.c +++ b/misc.c @@ -497,7 +497,7 @@ void cmdline_cfg_add(paragraph *cfg, char *string) while (cfg->origkeyword[len]) len += 1 + strlen(cfg->origkeyword+len); - ustring = ufroma_dup(string, CS_FIXME); + ustring = ufroma_locale_dup(string); upos = ulen; ulen += 2 + ustrlen(ustring); diff --git a/ustring.c b/ustring.c index 11a022c..1980a95 100644 --- a/ustring.c +++ b/ustring.c @@ -3,6 +3,8 @@ */ #include +#include +#include #include #include "halibut.h" @@ -164,6 +166,68 @@ wchar_t *ufroma_dup(char const *s, int charset) { return buf; } +char *utoa_locale_dup(wchar_t const *s) +{ + /* + * This variant uses the C library locale. + */ + char *ret; + int len; + size_t siz; + + len = ustrlen(s); + + ret = mknewa(char, 1 + MB_CUR_MAX * len); + + siz = wcstombs(ret, s, len); + + if (siz) { + assert(siz <= MB_CUR_MAX * len); + ret[siz] = '\0'; + ret = resize(ret, siz+1); + return ret; + } + + /* + * If that failed, try a different strategy (which we will also + * attempt in the total absence of wcstombs). Retrieve the + * locale's charset from nl_langinfo or equivalent, and use + * normal utoa_dup. + */ + return utoa_dup(s, charset_from_locale()); +} + +wchar_t *ufroma_locale_dup(char const *s) +{ + /* + * This variant uses the C library locale. + */ + wchar_t *ret; + int len; + size_t siz; + + len = strlen(s); + + ret = mknewa(wchar_t, 1 + 2*len); /* be conservative */ + + siz = mbstowcs(ret, s, len); + + if (siz) { + assert(siz <= (size_t)(2 * len)); + ret[siz] = L'\0'; + ret = resize(ret, siz+1); + return ret; + } + + /* + * If that failed, try a different strategy (which we will also + * attempt in the total absence of wcstombs). Retrieve the + * locale's charset from nl_langinfo or equivalent, and use + * normal ufroma_dup. + */ + return ufroma_dup(s, charset_from_locale()); +} + int ustrlen(wchar_t const *s) { int len = 0; while (*s++) len++; -- 2.11.0