*/
#include <assert.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include "halibut.h"
int bitfield, prebackend_bitfield;
} backends[] = {
{"text", text_backend, text_config_filename, 0x0001, 0},
- {"xhtml", xhtml_backend, xhtml_config_filename, 0x0002, 0},
- {"html", xhtml_backend, xhtml_config_filename, 0x0002, 0},
+ {"xhtml", html_backend, html_config_filename, 0x0002, 0},
+ {"html", html_backend, html_config_filename, 0x0002, 0},
{"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
{"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
{"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
int nogo;
int errs;
int reportcols;
+ int input_charset;
int debug;
int backendbits, prebackbits;
int k, b;
void *pre_backend_data[16];
/*
+ * Use the specified locale everywhere. It'll be used for
+ * output of error messages, and as the default character set
+ * for input files if one is not explicitly specified.
+ *
+ * However, we need to use standard numeric formatting for
+ * output of things like PDF.
+ */
+ setlocale(LC_ALL, "");
+ setlocale(LC_NUMERIC, "C");
+
+ /*
* Set up initial (default) parameters.
*/
- infiles = mknewa(char *, argc);
+ infiles = snewn(argc, char *);
nfiles = 0;
nogo = errs = FALSE;
reportcols = 0;
+ input_charset = CS_ASCII;
debug = 0;
backendbits = 0;
cfg = cfg_tail = NULL;
}
if (k < (int)lenof(backends)) {
/* do nothing */;
+ } else if (!strcmp(opt, "-input-charset")) {
+ if (!val) {
+ errs = TRUE, error(err_optnoarg, opt);
+ } else {
+ int charset = charset_from_localenc(val);
+ if (charset == CS_NONE) {
+ errs = TRUE, error(err_cmdcharset, val);
+ } else {
+ input_charset = charset;
+ }
+ }
} else if (!strcmp(opt, "-help")) {
help();
nogo = TRUE;
!strcmp(opt, "-license")) {
licence();
nogo = TRUE;
+ } else if (!strcmp(opt, "-list-charsets")) {
+ listcharsets();
+ nogo = TRUE;
} else if (!strcmp(opt, "-precise")) {
reportcols = 1;
} else {
* into a config paragraph.
*/
{
- wchar_t *keywords;
- char *q;
- wchar_t *u;
+ char *s = dupstr(p), *q, *r;
paragraph *para;
- keywords = mknewa(wchar_t, 2+strlen(p));
-
- u = keywords;
- q = p;
+ para = cmdline_cfg_new();
+ q = r = s;
while (*q) {
if (*q == ':') {
- *u++ = L'\0';
+ *r = '\0';
+ /* XXX ad-hoc diagnostic */
+ if (!strcmp(s, "input-charset"))
+ error(err_futileopt, "Cinput-charset",
+ "; use --input-charset");
+ cmdline_cfg_add(para, s);
+ r = s;
} else {
if (*q == '\\' && q[1])
q++;
- /* FIXME: lacks charset flexibility */
- *u++ = *q;
+ *r++ = *q;
}
q++;
}
- *u = L'\0';
-
- para = mknew(paragraph);
- memset(para, 0, sizeof(*para));
- para->type = para_Config;
- para->keyword = keywords;
- para->next = NULL;
- para->fpos.filename = "<command line>";
- para->fpos.line = para->fpos.col = -1;
+ *r = '\0';
+ cmdline_cfg_add(para, s);
if (cfg_tail)
cfg_tail->next = para;
in.pushback = NULL;
in.reportcols = reportcols;
in.stack = NULL;
- in.defcharset = CS_ASCII;
+ in.defcharset = input_charset;
idx = make_index();
sfree(in.pushback);
- mark_attr_ends(sourceform);
-
sfree(infiles);
keywords = get_keywords(sourceform);
build_index(idx);
+ /*
+ * Set up attr_First / attr_Last / attr_Always, in the main
+ * document and in the index entries.
+ */
+ for (p = sourceform; p; p = p->next)
+ mark_attr_ends(p->words);
+ {
+ int i;
+ indexentry *entry;
+
+ for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++)
+ mark_attr_ends(entry->text);
+ }
+
if (debug) {
index_debug(idx);
dbg_prtkws(keywords);
printf("\"");
} else
printf("(no text)");
+ if (w->breaks)
+ printf(" [breaks]");
if (w->alt) {
printf(" alt = {\n");
dbg_prtwordlist(level+1, w->alt);