Add an error check for correct formatting in Deflate uncompressed
[sgt/halibut] / main.c
diff --git a/main.c b/main.c
index 531d122..dae6581 100644 (file)
--- a/main.c
+++ b/main.c
@@ -2,6 +2,8 @@
  * main.c: command line parsing and top level
  */
 
+#include <assert.h>
+#include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "halibut.h"
@@ -10,24 +12,68 @@ static void dbg_prtsource(paragraph *sourceform);
 static void dbg_prtwordlist(int level, word *w);
 static void dbg_prtkws(keywordlist *kws);
 
+static const struct pre_backend {
+    void *(*func)(paragraph *, keywordlist *, indexdata *);
+    int bitfield;
+} pre_backends[] = {
+    {paper_pre_backend, 0x0001}
+};
+
+static const struct backend {
+    char *name;
+    void (*func)(paragraph *, keywordlist *, indexdata *, void *);
+    paragraph *(*filename)(char *filename);
+    int bitfield, prebackend_bitfield;
+} backends[] = {
+    {"text", text_backend, text_config_filename, 0x0001, 0},
+    {"xhtml", html_backend, html_config_filename, 0x0002, 0},
+    {"html", html_backend, html_config_filename, 0x0002, 0},
+    {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
+    {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
+    {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
+    {"man", man_backend, man_config_filename, 0x0008, 0},
+    {"info", info_backend, info_config_filename, 0x0010, 0},
+    {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
+    {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001},
+};
+
 int main(int argc, char **argv) {
     char **infiles;
-    char *outfile;
     int nfiles;
     int nogo;
     int errs;
     int reportcols;
+    int list_fonts;
+    int input_charset;
     int debug;
+    int backendbits, prebackbits;
+    int k, b;
+    paragraph *cfg, *cfg_tail;
+    void *pre_backend_data[16];
+
+    /*
+     * Use the specified locale everywhere. It'll be used for
+     * output of error messages, and as the default character set
+     * for input files if one is not explicitly specified.
+     * 
+     * However, we need to use standard numeric formatting for
+     * output of things like PDF.
+     */
+    setlocale(LC_ALL, "");
+    setlocale(LC_NUMERIC, "C");
 
     /*
      * Set up initial (default) parameters.
      */
-    infiles = mknewa(char *, argc);
-    outfile = NULL;
+    infiles = snewn(argc, char *);
     nfiles = 0;
     nogo = errs = FALSE;
     reportcols = 0;
+    list_fonts = 0;
+    input_charset = CS_ASCII;
     debug = 0;
+    backendbits = 0;
+    cfg = cfg_tail = NULL;
 
     if (argc == 1) {
        usage();
@@ -39,7 +85,7 @@ int main(int argc, char **argv) {
      */
     while (--argc) {
        char *p = *++argv;
-       if (*p == '-') {
+       if (*p == '-' && p[1]) {
            /*
             * An option.
             */
@@ -60,7 +106,38 @@ int main(int argc, char **argv) {
                            val = p;
                        } else
                            val = NULL;
-                       if (!strcmp(opt, "-help")) {
+
+                       assert(opt[0] == '-');
+                       for (k = 0; k < (int)lenof(backends); k++)
+                           if (!strcmp(opt+1, backends[k].name)) {
+                               backendbits |= backends[k].bitfield;
+                               if (val) {
+                                   paragraph *p = backends[k].filename(val);
+                                   assert(p);
+                                   if (cfg_tail)
+                                       cfg_tail->next = p;
+                                   else
+                                       cfg = p;
+                                   while (p->next)
+                                       p = p->next;
+                                   cfg_tail = p;
+                               }
+                               break;
+                           }
+                       if (k < (int)lenof(backends)) {
+                           /* do nothing */;
+                       } else if (!strcmp(opt, "-input-charset")) {
+                           if (!val) {
+                               errs = TRUE, error(err_optnoarg, opt);
+                           } else {
+                               int charset = charset_from_localenc(val);
+                               if (charset == CS_NONE) {
+                                   errs = TRUE, error(err_cmdcharset, val);
+                               } else {
+                                   input_charset = charset;
+                               }
+                           }
+                       } else if (!strcmp(opt, "-help")) {
                            help();
                            nogo = TRUE;
                        } else if (!strcmp(opt, "-version")) {
@@ -70,11 +147,11 @@ int main(int argc, char **argv) {
                                   !strcmp(opt, "-license")) {
                            licence();
                            nogo = TRUE;
-                       } else if (!strcmp(opt, "-output")) {
-                           if (!val)
-                               errs = TRUE, error(err_optnoarg, opt);
-                           else
-                               outfile = val;
+                       } else if (!strcmp(opt, "-list-charsets")) {
+                           listcharsets();
+                           nogo = TRUE;
+                       } else if (!strcmp(opt, "-list-fonts")) {
+                           list_fonts = TRUE;
                        } else if (!strcmp(opt, "-precise")) {
                            reportcols = 1;
                        } else {
@@ -112,7 +189,7 @@ int main(int argc, char **argv) {
                        break;
                    }
                    break;
-                 case 'o':
+                 case 'C':
                    /*
                     * Option requiring parameter.
                     */
@@ -129,8 +206,44 @@ int main(int argc, char **argv) {
                     * Now c is the option and p is the parameter.
                     */
                    switch (c) {
-                     case 'o':
-                       outfile = p;
+                     case 'C':
+                       /*
+                        * -C means we split our argument up into
+                        * colon-separated chunks and assemble them
+                        * into a config paragraph.
+                        */
+                       {
+                           char *s = dupstr(p), *q, *r;
+                           paragraph *para;
+
+                           para = cmdline_cfg_new();
+
+                           q = r = s;
+                           while (*q) {
+                               if (*q == ':') {
+                                   *r = '\0';
+                                   /* XXX ad-hoc diagnostic */
+                                   if (!strcmp(s, "input-charset"))
+                                       error(err_futileopt, "Cinput-charset",
+                                             "; use --input-charset");
+                                   cmdline_cfg_add(para, s);
+                                   r = s;
+                               } else {
+                                   if (*q == '\\' && q[1])
+                                       q++;
+                                   *r++ = *q;
+                               }
+                               q++;
+                           }
+                           *r = '\0';
+                           cmdline_cfg_add(para, s);
+
+                           if (cfg_tail)
+                               cfg_tail->next = para;
+                           else
+                               cfg = para;
+                           cfg_tail = para;
+                       }
                        break;
                    }
                    p = NULL;          /* prevent continued processing */
@@ -151,7 +264,10 @@ int main(int argc, char **argv) {
            /*
             * A non-option argument.
             */
-           infiles[nfiles++] = p;
+           if (!strcmp(p, "-"))
+               infiles[nfiles++] = NULL;   /* special case: read stdin */
+           else
+               infiles[nfiles++] = p;
        }
     }
 
@@ -163,7 +279,7 @@ int main(int argc, char **argv) {
     /*
      * Do the work.
      */
-    if (nfiles == 0) {
+    if (nfiles == 0 && !list_fonts) {
        error(err_noinput);
        usage();
        exit(EXIT_FAILURE);
@@ -183,16 +299,34 @@ int main(int argc, char **argv) {
        in.pushback = NULL;
        in.reportcols = reportcols;
        in.stack = NULL;
+       in.defcharset = input_charset;
 
        idx = make_index();
 
        sourceform = read_input(&in, idx);
+       if (list_fonts) {
+           listfonts();
+           exit(EXIT_SUCCESS);
+       }
        if (!sourceform)
            exit(EXIT_FAILURE);
 
-       sfree(in.pushback);
+       /*
+        * Append the config directives acquired from the command
+        * line.
+        */
+       {
+           paragraph *end;
+
+           end = sourceform;
+           while (end && end->next)
+               end = end->next;
+           assert(end);
+
+           end->next = cfg;
+       }
 
-       mark_attr_ends(sourceform);
+       sfree(in.pushback);
 
        sfree(infiles);
 
@@ -204,20 +338,65 @@ int main(int argc, char **argv) {
 
        for (p = sourceform; p; p = p->next)
            if (p->type == para_IM)
-               index_merge(idx, TRUE, p->keyword, p->words);
+               index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
 
        build_index(idx);
 
+       /*
+        * Set up attr_First / attr_Last / attr_Always, in the main
+        * document and in the index entries.
+        */
+       for (p = sourceform; p; p = p->next)
+           mark_attr_ends(p->words);
+       {
+           int i;
+           indexentry *entry;
+
+           for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++)
+               mark_attr_ends(entry->text);
+       }
+
        if (debug) {
            index_debug(idx);
            dbg_prtkws(keywords);
            dbg_prtsource(sourceform);
        }
 
-       text_backend(sourceform, keywords, idx);
-       xhtml_backend(sourceform, keywords, idx);
-       whlp_backend(sourceform, keywords, idx);
-       man_backend(sourceform, keywords, idx);
+       /*
+        * Select and run the pre-backends.
+        */
+       prebackbits = 0;
+       for (k = 0; k < (int)lenof(backends); k++)
+           if (backendbits == 0 || (backendbits & backends[k].bitfield))
+               prebackbits |= backends[k].prebackend_bitfield;
+       for (k = 0; k < (int)lenof(pre_backends); k++)
+           if (prebackbits & pre_backends[k].bitfield) {
+               assert(k < (int)lenof(pre_backend_data));
+               pre_backend_data[k] =
+                   pre_backends[k].func(sourceform, keywords, idx);
+           }
+
+       /*
+        * Run the selected set of backends.
+        */
+       for (k = b = 0; k < (int)lenof(backends); k++)
+           if (b != backends[k].bitfield) {
+               b = backends[k].bitfield;
+               if (backendbits == 0 || (backendbits & b)) {
+                   void *pbd = NULL;
+                   int pbb = backends[k].prebackend_bitfield;
+                   int m;
+
+                   for (m = 0; m < (int)lenof(pre_backends); m++)
+                       if (pbb & pre_backends[m].bitfield) {
+                           assert(m < (int)lenof(pre_backend_data));
+                           pbd = pre_backend_data[m];
+                           break;
+                       }
+                           
+                   backends[k].func(sourceform, keywords, idx, pbd);
+               }
+           }
 
        free_para_list(sourceform);
        free_keywords(keywords);
@@ -291,6 +470,8 @@ static void dbg_prtwordlist(int level, word *w) {
            printf("\"");
        } else
            printf("(no text)");
+       if (w->breaks)
+           printf(" [breaks]");
        if (w->alt) {
            printf(" alt = {\n");
            dbg_prtwordlist(level+1, w->alt);