Enhance the text backend to support configurable quote characters,
[sgt/halibut] / bk_whlp.c
index a21ac04..d11fd22 100644 (file)
--- a/bk_whlp.c
+++ b/bk_whlp.c
@@ -1,12 +1,10 @@
 /*
  * Windows Help backend for Halibut
- * 
- * TODO:
- *  - allow user to specify section contexts.
  */
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <ctype.h>
 #include <assert.h>
 
 #include "halibut.h"
@@ -28,6 +26,8 @@ enum {
     FONT_NORMAL,
     FONT_EMPH,
     FONT_CODE,
+    FONT_ITAL_CODE,
+    FONT_BOLD_CODE,
     FONT_TITLE,
     FONT_TITLE_EMPH,
     FONT_TITLE_CODE,
@@ -35,15 +35,21 @@ enum {
 };
 
 static void whlp_rdaddwc(rdstringc *rs, word *text);
-static int whlp_convert(wchar_t *s, char **result, int hard_spaces);
+static int whlp_convert(wchar_t *s, int maxlen,
+                       char **result, int hard_spaces);
 static void whlp_mkparagraph(struct bk_whlp_state *state,
                             int font, word *text, int subsidiary);
 static void whlp_navmenu(struct bk_whlp_state *state, paragraph *p);
 static void whlp_contents_write(struct bk_whlp_state *state,
                                int level, char *text, WHLP_TOPIC topic);
     
+paragraph *whlp_config_filename(char *filename)
+{
+    return cmdline_cfg_simple("winhelp-filename", filename, NULL);
+}
+
 void whlp_backend(paragraph *sourceform, keywordlist *keywords,
-                 indexdata *idx) {
+                 indexdata *idx, void *unused) {
     WHLP h;
     char *filename, *cntname;
     paragraph *p, *lastsect;
@@ -52,12 +58,9 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
     int i;
     int nesting;
     indexentry *ie;
+    int done_contents_topic = FALSE;
 
-    filename = "output.hlp";          /* FIXME: configurability */
-    cntname = "output.cnt";           /* corresponding contents file */
-
-    state.cntfp = fopen(cntname, "wb");
-    state.cnt_last_level = -1; state.cnt_workaround = 0;
+    IGNORE(unused);
 
     h = state.h = whlp_new();
     state.keywords = keywords;
@@ -73,6 +76,10 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
                     WHLP_FONT_ITALIC, 0, 0, 0);
     whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
                     0, 0, 0, 0);
+    whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
+                    WHLP_FONT_ITALIC, 0, 0, 0);
+    whlp_create_font(h, "Courier New", WHLP_FONTFAM_FIXED, 24,
+                    WHLP_FONT_BOLD, 0, 0, 0);
     whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30,
                     WHLP_FONT_BOLD, 0, 0, 0);
     whlp_create_font(h, "Arial", WHLP_FONTFAM_SERIF, 30,
@@ -84,22 +91,52 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
 
     /*
      * Loop over the source form finding out whether the user has
-     * specified particular help topic names for anything.
+     * specified particular help topic names for anything. Also
+     * pick out the output file name at this stage.
      */
+    filename = dupstr("output.hlp");
     for (p = sourceform; p; p = p->next) {
        p->private_data = NULL;
        if (p->type == para_Config && p->parent) {
            if (!ustricmp(p->keyword, L"winhelp-topic")) {
                char *topicname;
-               whlp_convert(uadv(p->keyword), &topicname, 0);
+               whlp_convert(uadv(p->keyword), 0, &topicname, 0);
                /* Store the topic name in the private_data field of the
                 * containing section. */
                p->parent->private_data = topicname;
+           } else if (!ustricmp(p->keyword, L"winhelp-filename")) {
+               sfree(filename);
+               filename = dupstr(adv(p->origkeyword));
            }
        }
     }
 
     /*
+     * Ensure the output file name has a .hlp extension. This is
+     * required since we must create the .cnt file in parallel with
+     * it.
+     */
+    {
+       int len = strlen(filename);
+       if (len < 4 || filename[len-4] != '.' ||
+           tolower(filename[len-3] != 'h') ||
+           tolower(filename[len-2] != 'l') ||
+           tolower(filename[len-1] != 'p')) {
+           char *newf;
+           newf = mknewa(char, len + 5);
+           sprintf(newf, "%s.hlp", filename);
+           sfree(filename);
+           filename = newf;
+           len = strlen(newf);
+       }
+       cntname = mknewa(char, len+1);
+       sprintf(cntname, "%.*s.cnt", len-4, filename);
+    }
+
+    state.cntfp = fopen(cntname, "wb");
+    state.cnt_last_level = -1; state.cnt_workaround = 0;
+
+    /*
      * Loop over the source form registering WHLP_TOPICs for
      * everything.
      */
@@ -137,8 +174,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
     whlp_prepare(h);
 
     /* ------------------------------------------------------------------
-     * Do the contents page, containing title, preamble and
-     * copyright.
+     * Begin the contents page.
      */
 
     whlp_begin_topic(h, contents_topic, "Contents", "DB(\"btn_up\")", NULL);
@@ -167,32 +203,13 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
     }
 
     /*
-     * Next comes the preamble, which just goes into the ordinary
-     * scrolling region.
-     */
-    for (p = sourceform; p; p = p->next) {
-       if (p->type == para_Preamble) {
-           whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
-           whlp_begin_para(h, WHLP_PARA_SCROLL);
-           whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
-           whlp_end_para(h);
-       }
-    }
-
-    /*
-     * The copyright goes to two places, again: into the contents
-     * page and also into the system section.
+     * Put the copyright into the system section.
      */
     {
        rdstringc rs = {0, 0, NULL};
        for (p = sourceform; p; p = p->next) {
-           if (p->type == para_Copyright) {
-               whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
-               whlp_begin_para(h, WHLP_PARA_SCROLL);
-               whlp_mkparagraph(&state, FONT_NORMAL, p->words, FALSE);
-               whlp_end_para(h);
+           if (p->type == para_Copyright)
                whlp_rdaddwc(&rs, p->words);
-           }
        }
        if (rs.text) {
            whlp_copyright(h, rs.text);
@@ -200,17 +217,6 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
        }
     }
 
-    /*
-     * Now do the primary navigation menu.
-     */
-    for (p = sourceform; p; p = p->next) {
-       if (p->type == para_Chapter ||
-           p->type == para_Appendix ||
-           p->type == para_UnnumberedChapter)
-           whlp_navmenu(&state, p);
-    }
-
-    state.curr_topic = contents_topic;
     lastsect = NULL;
 
     /* ------------------------------------------------------------------
@@ -227,16 +233,16 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
       case para_BR:
       case para_Biblio:                       /* only touch BiblioCited */
       case para_VersionID:
-      case para_Copyright:
-      case para_Preamble:
       case para_NoCite:
       case para_Title:
        break;
 
       case para_LcontPush:
+      case para_QuotePush:
        nesting++;
        break;
       case para_LcontPop:
+      case para_QuotePop:
        assert(nesting > 0);
        nesting--;
        break;
@@ -249,6 +255,28 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
       case para_UnnumberedChapter:
       case para_Heading:
       case para_Subsect:
+
+       if (!done_contents_topic) {
+           paragraph *p;
+
+           /*
+            * If this is the first section title we've seen, then
+            * we're currently still in the contents topic. We
+            * should therefore finish up the contents page by
+            * writing a nav menu.
+            */
+           for (p = sourceform; p; p = p->next) {
+               if (p->type == para_Chapter ||
+                   p->type == para_Appendix ||
+                   p->type == para_UnnumberedChapter)
+                   whlp_navmenu(&state, p);
+           }
+
+           state.curr_topic = contents_topic;
+
+           done_contents_topic = TRUE;
+       }
+
        if (lastsect && lastsect->child) {
            paragraph *q;
            /*
@@ -353,6 +381,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
        break;
 
       case para_Normal:
+      case para_Copyright:
       case para_DescribedThing:
       case para_Description:
       case para_BiblioCited:
@@ -396,14 +425,41 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
         */
        {
            word *w;
+           wchar_t *t, *e;
            char *c;
-           for (w = p->words; w; w = w->next) {
+
+           for (w = p->words; w; w = w->next) if (w->type == word_WeakCode) {
+               t = w->text;
+               if (w->next && w->next->type == word_Emph) {
+                   w = w->next;
+                   e = w->text;
+               } else
+                   e = NULL;
+
                if (!w->next)
                    whlp_para_attr(h, WHLP_PARA_SPACEBELOW, 12);
+
                whlp_para_attr(h, WHLP_PARA_LEFTINDENT, 72*nesting);
                whlp_begin_para(h, WHLP_PARA_SCROLL);
+               while (e && *e && *t) {
+                   int n;
+                   int ec = *e;
+
+                   for (n = 0; t[n] && e[n] && e[n] == ec; n++);
+                   if (ec == 'i')
+                       whlp_set_font(h, FONT_ITAL_CODE);
+                   else if (ec == 'b')
+                       whlp_set_font(h, FONT_BOLD_CODE);
+                   else
+                       whlp_set_font(h, FONT_CODE);
+                   whlp_convert(t, n, &c, FALSE);
+                   whlp_text(h, c);
+                   sfree(c);
+                   t += n;
+                   e += n;
+               }
                whlp_set_font(h, FONT_CODE);
-               whlp_convert(w->text, &c, FALSE);
+               whlp_convert(t, 0, &c, FALSE);
                whlp_text(h, c);
                sfree(c);
                whlp_end_para(h);
@@ -422,6 +478,9 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
     for (i = 0; (ie = index234(idx->entries, i)) != NULL; i++) {
        sfree(ie->backend_data);
     }
+
+    sfree(filename);
+    sfree(cntname);
 }
 
 static void whlp_contents_write(struct bk_whlp_state *state,
@@ -546,7 +605,7 @@ static void whlp_mkparagraph(struct bk_whlp_state *state,
            whlp_set_font(state->h, newfont);
        }
        if (removeattr(text->type) == word_Normal) {
-           if (whlp_convert(text->text, &c, TRUE))
+           if (whlp_convert(text->text, 0, &c, TRUE) || !text->alt)
                whlp_text(state->h, c);
            else
                whlp_mkparagraph(state, deffont, text->alt, FALSE);
@@ -589,7 +648,7 @@ static void whlp_rdaddwc(rdstringc *rs, word *text) {
        assert(text->type != word_CodeQuote &&
               text->type != word_WkCodeQuote);
        if (removeattr(text->type) == word_Normal) {
-           if (whlp_convert(text->text, &c, FALSE))
+           if (whlp_convert(text->text, 0, &c, FALSE) || !text->alt)
                rdaddsc(rs, c);
            else
                whlp_rdaddwc(rs, text->alt);
@@ -615,43 +674,51 @@ static void whlp_rdaddwc(rdstringc *rs, word *text) {
  * characters are OK but `result' is non-NULL, a result _will_
  * still be generated!
  */
-static int whlp_convert(wchar_t *s, char **result, int hard_spaces) {
+static int whlp_convert(wchar_t *s, int maxlen,
+                       char **result, int hard_spaces) {
+    wchar_t *s2;
+    char *ret;
+    int ok;
+
     /*
-     * FIXME. Currently this is ISO8859-1 only.
+     * Enforce maxlen.
      */
-    int doing = (result != 0);
-    int ok = TRUE;
-    char *p = NULL;
-    int plen = 0, psize = 0;
-
-    for (; *s; s++) {
-       wchar_t c = *s;
-       char outc;
-
-       if ((c >= 32 && c <= 126) ||
-           (c >= 160 && c <= 255)) {
-           /* Char is OK. */
-           if (c == 32 && hard_spaces)
-               outc = '\240';
-           else
-               outc = (char)c;
-       } else {
-           /* Char is not OK. */
-           ok = FALSE;
-           outc = 0xBF;               /* approximate the good old DEC `uh?' */
-       }
-       if (doing) {
-           if (plen >= psize) {
-               psize = plen + 256;
-               p = resize(p, psize);
-           }
-           p[plen++] = outc;
-       }
-    }
-    if (doing) {
-       p = resize(p, plen+1);
-       p[plen] = '\0';
-       *result = p;
+    if (maxlen > 0 && ustrlen(s) > maxlen) {
+       s2 = mknewa(wchar_t, maxlen+1);
+       memcpy(s2, s, maxlen * sizeof(wchar_t));
+       s2[maxlen] = L'\0';
+       s = s2;
+    } else
+       s2 = NULL;
+
+    /*
+     * We currently only support Win1252 in Windows Help files,
+     * because I don't know how to fiddle the character set
+     * designation in the |SYSTEM file to indicate anything else.
+     */
+
+    ret = utoa_careful_dup(s, CS_CP1252);
+    if (!ret) {
+       ok = FALSE;
+       ret = utoa_dup(s, CS_CP1252);
+    } else
+       ok = TRUE;
+
+    /*
+     * Enforce hard_spaces.
+     */
+    if (hard_spaces) {
+       char *p;
+
+       for (p = ret; *p; p++)
+           if (*p == ' ')
+               *p = '\240';
     }
+
+    if (s2)
+       sfree(s2);
+
+    *result = ret;
+
     return ok;
 }