Add an error check for correct formatting in Deflate uncompressed
[sgt/halibut] / main.c
diff --git a/main.c b/main.c
index a528453..dae6581 100644 (file)
--- a/main.c
+++ b/main.c
@@ -3,6 +3,7 @@
  */
 
 #include <assert.h>
+#include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "halibut.h"
@@ -11,18 +12,29 @@ static void dbg_prtsource(paragraph *sourceform);
 static void dbg_prtwordlist(int level, word *w);
 static void dbg_prtkws(keywordlist *kws);
 
+static const struct pre_backend {
+    void *(*func)(paragraph *, keywordlist *, indexdata *);
+    int bitfield;
+} pre_backends[] = {
+    {paper_pre_backend, 0x0001}
+};
+
 static const struct backend {
     char *name;
-    void (*func)(paragraph *, keywordlist *, indexdata *);
-    int bitfield;
+    void (*func)(paragraph *, keywordlist *, indexdata *, void *);
+    paragraph *(*filename)(char *filename);
+    int bitfield, prebackend_bitfield;
 } backends[] = {
-    {"text", text_backend, 0x0001},
-    {"xhtml", xhtml_backend, 0x0002},
-    {"html", xhtml_backend, 0x0002},
-    {"hlp", whlp_backend, 0x0004},
-    {"whlp", whlp_backend, 0x0004},
-    {"winhelp", whlp_backend, 0x0004},
-    {"man", man_backend, 0x0008},
+    {"text", text_backend, text_config_filename, 0x0001, 0},
+    {"xhtml", html_backend, html_config_filename, 0x0002, 0},
+    {"html", html_backend, html_config_filename, 0x0002, 0},
+    {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
+    {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
+    {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
+    {"man", man_backend, man_config_filename, 0x0008, 0},
+    {"info", info_backend, info_config_filename, 0x0010, 0},
+    {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
+    {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001},
 };
 
 int main(int argc, char **argv) {
@@ -31,19 +43,37 @@ int main(int argc, char **argv) {
     int nogo;
     int errs;
     int reportcols;
+    int list_fonts;
+    int input_charset;
     int debug;
-    int backendbits;
+    int backendbits, prebackbits;
     int k, b;
+    paragraph *cfg, *cfg_tail;
+    void *pre_backend_data[16];
+
+    /*
+     * Use the specified locale everywhere. It'll be used for
+     * output of error messages, and as the default character set
+     * for input files if one is not explicitly specified.
+     * 
+     * However, we need to use standard numeric formatting for
+     * output of things like PDF.
+     */
+    setlocale(LC_ALL, "");
+    setlocale(LC_NUMERIC, "C");
 
     /*
      * Set up initial (default) parameters.
      */
-    infiles = mknewa(char *, argc);
+    infiles = snewn(argc, char *);
     nfiles = 0;
     nogo = errs = FALSE;
     reportcols = 0;
+    list_fonts = 0;
+    input_charset = CS_ASCII;
     debug = 0;
     backendbits = 0;
+    cfg = cfg_tail = NULL;
 
     if (argc == 1) {
        usage();
@@ -55,7 +85,7 @@ int main(int argc, char **argv) {
      */
     while (--argc) {
        char *p = *++argv;
-       if (*p == '-') {
+       if (*p == '-' && p[1]) {
            /*
             * An option.
             */
@@ -81,10 +111,32 @@ int main(int argc, char **argv) {
                        for (k = 0; k < (int)lenof(backends); k++)
                            if (!strcmp(opt+1, backends[k].name)) {
                                backendbits |= backends[k].bitfield;
+                               if (val) {
+                                   paragraph *p = backends[k].filename(val);
+                                   assert(p);
+                                   if (cfg_tail)
+                                       cfg_tail->next = p;
+                                   else
+                                       cfg = p;
+                                   while (p->next)
+                                       p = p->next;
+                                   cfg_tail = p;
+                               }
                                break;
                            }
                        if (k < (int)lenof(backends)) {
                            /* do nothing */;
+                       } else if (!strcmp(opt, "-input-charset")) {
+                           if (!val) {
+                               errs = TRUE, error(err_optnoarg, opt);
+                           } else {
+                               int charset = charset_from_localenc(val);
+                               if (charset == CS_NONE) {
+                                   errs = TRUE, error(err_cmdcharset, val);
+                               } else {
+                                   input_charset = charset;
+                               }
+                           }
                        } else if (!strcmp(opt, "-help")) {
                            help();
                            nogo = TRUE;
@@ -95,6 +147,11 @@ int main(int argc, char **argv) {
                                   !strcmp(opt, "-license")) {
                            licence();
                            nogo = TRUE;
+                       } else if (!strcmp(opt, "-list-charsets")) {
+                           listcharsets();
+                           nogo = TRUE;
+                       } else if (!strcmp(opt, "-list-fonts")) {
+                           list_fonts = TRUE;
                        } else if (!strcmp(opt, "-precise")) {
                            reportcols = 1;
                        } else {
@@ -132,8 +189,7 @@ int main(int argc, char **argv) {
                        break;
                    }
                    break;
-#if 0
-                 case 'o':
+                 case 'C':
                    /*
                     * Option requiring parameter.
                     */
@@ -150,13 +206,48 @@ int main(int argc, char **argv) {
                     * Now c is the option and p is the parameter.
                     */
                    switch (c) {
-                     case 'o':
-                       outfile = p;
+                     case 'C':
+                       /*
+                        * -C means we split our argument up into
+                        * colon-separated chunks and assemble them
+                        * into a config paragraph.
+                        */
+                       {
+                           char *s = dupstr(p), *q, *r;
+                           paragraph *para;
+
+                           para = cmdline_cfg_new();
+
+                           q = r = s;
+                           while (*q) {
+                               if (*q == ':') {
+                                   *r = '\0';
+                                   /* XXX ad-hoc diagnostic */
+                                   if (!strcmp(s, "input-charset"))
+                                       error(err_futileopt, "Cinput-charset",
+                                             "; use --input-charset");
+                                   cmdline_cfg_add(para, s);
+                                   r = s;
+                               } else {
+                                   if (*q == '\\' && q[1])
+                                       q++;
+                                   *r++ = *q;
+                               }
+                               q++;
+                           }
+                           *r = '\0';
+                           cmdline_cfg_add(para, s);
+
+                           if (cfg_tail)
+                               cfg_tail->next = para;
+                           else
+                               cfg = para;
+                           cfg_tail = para;
+                       }
                        break;
                    }
                    p = NULL;          /* prevent continued processing */
                    break;
-#endif
                  default:
                    /*
                     * Unrecognised option.
@@ -173,7 +264,10 @@ int main(int argc, char **argv) {
            /*
             * A non-option argument.
             */
-           infiles[nfiles++] = p;
+           if (!strcmp(p, "-"))
+               infiles[nfiles++] = NULL;   /* special case: read stdin */
+           else
+               infiles[nfiles++] = p;
        }
     }
 
@@ -185,7 +279,7 @@ int main(int argc, char **argv) {
     /*
      * Do the work.
      */
-    if (nfiles == 0) {
+    if (nfiles == 0 && !list_fonts) {
        error(err_noinput);
        usage();
        exit(EXIT_FAILURE);
@@ -205,16 +299,34 @@ int main(int argc, char **argv) {
        in.pushback = NULL;
        in.reportcols = reportcols;
        in.stack = NULL;
+       in.defcharset = input_charset;
 
        idx = make_index();
 
        sourceform = read_input(&in, idx);
+       if (list_fonts) {
+           listfonts();
+           exit(EXIT_SUCCESS);
+       }
        if (!sourceform)
            exit(EXIT_FAILURE);
 
-       sfree(in.pushback);
+       /*
+        * Append the config directives acquired from the command
+        * line.
+        */
+       {
+           paragraph *end;
 
-       mark_attr_ends(sourceform);
+           end = sourceform;
+           while (end && end->next)
+               end = end->next;
+           assert(end);
+
+           end->next = cfg;
+       }
+
+       sfree(in.pushback);
 
        sfree(infiles);
 
@@ -226,10 +338,24 @@ int main(int argc, char **argv) {
 
        for (p = sourceform; p; p = p->next)
            if (p->type == para_IM)
-               index_merge(idx, TRUE, p->keyword, p->words);
+               index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
 
        build_index(idx);
 
+       /*
+        * Set up attr_First / attr_Last / attr_Always, in the main
+        * document and in the index entries.
+        */
+       for (p = sourceform; p; p = p->next)
+           mark_attr_ends(p->words);
+       {
+           int i;
+           indexentry *entry;
+
+           for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++)
+               mark_attr_ends(entry->text);
+       }
+
        if (debug) {
            index_debug(idx);
            dbg_prtkws(keywords);
@@ -237,13 +363,39 @@ int main(int argc, char **argv) {
        }
 
        /*
+        * Select and run the pre-backends.
+        */
+       prebackbits = 0;
+       for (k = 0; k < (int)lenof(backends); k++)
+           if (backendbits == 0 || (backendbits & backends[k].bitfield))
+               prebackbits |= backends[k].prebackend_bitfield;
+       for (k = 0; k < (int)lenof(pre_backends); k++)
+           if (prebackbits & pre_backends[k].bitfield) {
+               assert(k < (int)lenof(pre_backend_data));
+               pre_backend_data[k] =
+                   pre_backends[k].func(sourceform, keywords, idx);
+           }
+
+       /*
         * Run the selected set of backends.
         */
        for (k = b = 0; k < (int)lenof(backends); k++)
            if (b != backends[k].bitfield) {
                b = backends[k].bitfield;
-               if (backendbits == 0 || (backendbits & b))
-                   backends[k].func(sourceform, keywords, idx);
+               if (backendbits == 0 || (backendbits & b)) {
+                   void *pbd = NULL;
+                   int pbb = backends[k].prebackend_bitfield;
+                   int m;
+
+                   for (m = 0; m < (int)lenof(pre_backends); m++)
+                       if (pbb & pre_backends[m].bitfield) {
+                           assert(m < (int)lenof(pre_backend_data));
+                           pbd = pre_backend_data[m];
+                           break;
+                       }
+                           
+                   backends[k].func(sourceform, keywords, idx, pbd);
+               }
            }
 
        free_para_list(sourceform);
@@ -318,6 +470,8 @@ static void dbg_prtwordlist(int level, word *w) {
            printf("\"");
        } else
            printf("(no text)");
+       if (w->breaks)
+           printf(" [breaks]");
        if (w->alt) {
            printf(" alt = {\n");
            dbg_prtwordlist(level+1, w->alt);