X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/f15300499bce37cd28ea2ace0f2bd1c364fc835e..8333e399e0540063d20dd6b344f2cd09458975ef:/main.c diff --git a/main.c b/main.c index f09b33e..e291d06 100644 --- a/main.c +++ b/main.c @@ -43,13 +43,23 @@ int main(int argc, char **argv) { int nogo; int errs; int reportcols; + int input_charset; int debug; int backendbits, prebackbits; int k, b; paragraph *cfg, *cfg_tail; void *pre_backend_data[16]; + /* + * Use the specified locale everywhere. It'll be used for + * output of error messages, and as the default character set + * for input files if one is not explicitly specified. + * + * However, we need to use standard numeric formatting for + * output of things like PDF. + */ setlocale(LC_ALL, ""); + setlocale(LC_NUMERIC, "C"); /* * Set up initial (default) parameters. @@ -58,6 +68,7 @@ int main(int argc, char **argv) { nfiles = 0; nogo = errs = FALSE; reportcols = 0; + input_charset = CS_ASCII; debug = 0; backendbits = 0; cfg = cfg_tail = NULL; @@ -113,6 +124,17 @@ int main(int argc, char **argv) { } if (k < (int)lenof(backends)) { /* do nothing */; + } else if (!strcmp(opt, "-input-charset")) { + if (!val) { + errs = TRUE, error(err_optnoarg, opt); + } else { + int charset = charset_from_localenc(val); + if (charset == CS_NONE) { + errs = TRUE, error(err_cmdcharset, val); + } else { + input_charset = charset; + } + } } else if (!strcmp(opt, "-help")) { help(); nogo = TRUE; @@ -123,6 +145,9 @@ int main(int argc, char **argv) { !strcmp(opt, "-license")) { licence(); nogo = TRUE; + } else if (!strcmp(opt, "-list-charsets")) { + listcharsets(); + nogo = TRUE; } else if (!strcmp(opt, "-precise")) { reportcols = 1; } else { @@ -193,6 +218,10 @@ int main(int argc, char **argv) { while (*q) { if (*q == ':') { *r = '\0'; + /* XXX ad-hoc diagnostic */ + if (!strcmp(s, "input-charset")) + error(err_futileopt, "Cinput-charset", + "; use --input-charset"); cmdline_cfg_add(para, s); r = s; } else { @@ -263,7 +292,7 @@ int main(int argc, char **argv) { in.pushback = NULL; in.reportcols = reportcols; in.stack = NULL; - in.defcharset = charset_from_locale(); + in.defcharset = input_charset; idx = make_index(); @@ -430,6 +459,8 @@ static void dbg_prtwordlist(int level, word *w) { printf("\""); } else printf("(no text)"); + if (w->breaks) + printf(" [breaks]"); if (w->alt) { printf(" alt = {\n"); dbg_prtwordlist(level+1, w->alt);