Add an error check for correct formatting in Deflate uncompressed
[sgt/halibut] / main.c
diff --git a/main.c b/main.c
index 9af17b3..dae6581 100644 (file)
--- a/main.c
+++ b/main.c
@@ -3,6 +3,7 @@
  */
 
 #include <assert.h>
+#include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "halibut.h"
@@ -25,8 +26,8 @@ static const struct backend {
     int bitfield, prebackend_bitfield;
 } backends[] = {
     {"text", text_backend, text_config_filename, 0x0001, 0},
-    {"xhtml", xhtml_backend, xhtml_config_filename, 0x0002, 0},
-    {"html", xhtml_backend, xhtml_config_filename, 0x0002, 0},
+    {"xhtml", html_backend, html_config_filename, 0x0002, 0},
+    {"html", html_backend, html_config_filename, 0x0002, 0},
     {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
     {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
     {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
@@ -42,6 +43,8 @@ int main(int argc, char **argv) {
     int nogo;
     int errs;
     int reportcols;
+    int list_fonts;
+    int input_charset;
     int debug;
     int backendbits, prebackbits;
     int k, b;
@@ -49,12 +52,25 @@ int main(int argc, char **argv) {
     void *pre_backend_data[16];
 
     /*
+     * Use the specified locale everywhere. It'll be used for
+     * output of error messages, and as the default character set
+     * for input files if one is not explicitly specified.
+     * 
+     * However, we need to use standard numeric formatting for
+     * output of things like PDF.
+     */
+    setlocale(LC_ALL, "");
+    setlocale(LC_NUMERIC, "C");
+
+    /*
      * Set up initial (default) parameters.
      */
-    infiles = mknewa(char *, argc);
+    infiles = snewn(argc, char *);
     nfiles = 0;
     nogo = errs = FALSE;
     reportcols = 0;
+    list_fonts = 0;
+    input_charset = CS_ASCII;
     debug = 0;
     backendbits = 0;
     cfg = cfg_tail = NULL;
@@ -69,7 +85,7 @@ int main(int argc, char **argv) {
      */
     while (--argc) {
        char *p = *++argv;
-       if (*p == '-') {
+       if (*p == '-' && p[1]) {
            /*
             * An option.
             */
@@ -110,6 +126,17 @@ int main(int argc, char **argv) {
                            }
                        if (k < (int)lenof(backends)) {
                            /* do nothing */;
+                       } else if (!strcmp(opt, "-input-charset")) {
+                           if (!val) {
+                               errs = TRUE, error(err_optnoarg, opt);
+                           } else {
+                               int charset = charset_from_localenc(val);
+                               if (charset == CS_NONE) {
+                                   errs = TRUE, error(err_cmdcharset, val);
+                               } else {
+                                   input_charset = charset;
+                               }
+                           }
                        } else if (!strcmp(opt, "-help")) {
                            help();
                            nogo = TRUE;
@@ -120,6 +147,11 @@ int main(int argc, char **argv) {
                                   !strcmp(opt, "-license")) {
                            licence();
                            nogo = TRUE;
+                       } else if (!strcmp(opt, "-list-charsets")) {
+                           listcharsets();
+                           nogo = TRUE;
+                       } else if (!strcmp(opt, "-list-fonts")) {
+                           list_fonts = TRUE;
                        } else if (!strcmp(opt, "-precise")) {
                            reportcols = 1;
                        } else {
@@ -181,36 +213,30 @@ int main(int argc, char **argv) {
                         * into a config paragraph.
                         */
                        {
-                           wchar_t *keywords;
-                           char *q;
-                           wchar_t *u;
+                           char *s = dupstr(p), *q, *r;
                            paragraph *para;
 
-                           keywords = mknewa(wchar_t, 2+strlen(p));
-
-                           u = keywords;
-                           q = p;
+                           para = cmdline_cfg_new();
 
+                           q = r = s;
                            while (*q) {
                                if (*q == ':') {
-                                   *u++ = L'\0';
+                                   *r = '\0';
+                                   /* XXX ad-hoc diagnostic */
+                                   if (!strcmp(s, "input-charset"))
+                                       error(err_futileopt, "Cinput-charset",
+                                             "; use --input-charset");
+                                   cmdline_cfg_add(para, s);
+                                   r = s;
                                } else {
                                    if (*q == '\\' && q[1])
                                        q++;
-                                   /* FIXME: lacks charset flexibility */
-                                   *u++ = *q;
+                                   *r++ = *q;
                                }
                                q++;
                            }
-                           *u = L'\0';
-
-                           para = mknew(paragraph);
-                           memset(para, 0, sizeof(*para));
-                           para->type = para_Config;
-                           para->keyword = keywords;
-                           para->next = NULL;
-                           para->fpos.filename = "<command line>";
-                           para->fpos.line = para->fpos.col = -1;
+                           *r = '\0';
+                           cmdline_cfg_add(para, s);
 
                            if (cfg_tail)
                                cfg_tail->next = para;
@@ -238,7 +264,10 @@ int main(int argc, char **argv) {
            /*
             * A non-option argument.
             */
-           infiles[nfiles++] = p;
+           if (!strcmp(p, "-"))
+               infiles[nfiles++] = NULL;   /* special case: read stdin */
+           else
+               infiles[nfiles++] = p;
        }
     }
 
@@ -250,7 +279,7 @@ int main(int argc, char **argv) {
     /*
      * Do the work.
      */
-    if (nfiles == 0) {
+    if (nfiles == 0 && !list_fonts) {
        error(err_noinput);
        usage();
        exit(EXIT_FAILURE);
@@ -270,10 +299,15 @@ int main(int argc, char **argv) {
        in.pushback = NULL;
        in.reportcols = reportcols;
        in.stack = NULL;
+       in.defcharset = input_charset;
 
        idx = make_index();
 
        sourceform = read_input(&in, idx);
+       if (list_fonts) {
+           listfonts();
+           exit(EXIT_SUCCESS);
+       }
        if (!sourceform)
            exit(EXIT_FAILURE);
 
@@ -294,8 +328,6 @@ int main(int argc, char **argv) {
 
        sfree(in.pushback);
 
-       mark_attr_ends(sourceform);
-
        sfree(infiles);
 
        keywords = get_keywords(sourceform);
@@ -310,6 +342,20 @@ int main(int argc, char **argv) {
 
        build_index(idx);
 
+       /*
+        * Set up attr_First / attr_Last / attr_Always, in the main
+        * document and in the index entries.
+        */
+       for (p = sourceform; p; p = p->next)
+           mark_attr_ends(p->words);
+       {
+           int i;
+           indexentry *entry;
+
+           for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++)
+               mark_attr_ends(entry->text);
+       }
+
        if (debug) {
            index_debug(idx);
            dbg_prtkws(keywords);
@@ -424,6 +470,8 @@ static void dbg_prtwordlist(int level, word *w) {
            printf("\"");
        } else
            printf("(no text)");
+       if (w->breaks)
+           printf(" [breaks]");
        if (w->alt) {
            printf(" alt = {\n");
            dbg_prtwordlist(level+1, w->alt);