Charset support for the info backend (\cfg{info-charset}). (This

author simon <simon@cda61777-01e9-0310-a592-d414129be87e>

Wed, 21 Apr 2004 18:07:26 +0000 (18:07 +0000)

committer simon <simon@cda61777-01e9-0310-a592-d414129be87e>

Wed, 21 Apr 2004 18:07:26 +0000 (18:07 +0000)
author simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Wed, 21 Apr 2004 18:07:26 +0000 (18:07 +0000)
committer simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Wed, 21 Apr 2004 18:07:26 +0000 (18:07 +0000)
diff --git a/bk_info.c b/bk_info.c

index fc1b292..f7efbf5 100644 (file)
--- a/bk_info.c
+++ b/bk_info.c
@@ -25,6 +25,17 @@
   *  - might be helpful to diagnose duplicate node names!
   */
  
+/*
+ * FIXME:
+ * 
+ *  - alignment in the index is broken when a non-representable
+ *    character appears with no alternative. More generally, I
+ *    fear, this is the fault of the info_rdadd* functions failing
+ *    to return correct width figures in this circumstance (so it
+ *    will affect list paragraph prefixes and paragraph wrapping as
+ *    well).
+ */
+
  #include <stdio.h>
  #include <stdlib.h>
  #include <assert.h>
@@ -33,39 +44,53 @@
  typedef struct {
      char *filename;
      int maxfilesize;
+    int charset;
  } infoconfig;
  
+typedef struct {
+    rdstringc output;
+    int charset;
+    charset_state state;
+    int wcmode;
+} info_data;
+#define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
+static const info_data empty_info_data = EMPTY_INFO_DATA;
+
  typedef struct node_tag node;
  struct node_tag {
      node *listnext;
      node *up, *prev, *next, *lastchild;
      int pos, started_menu, filenum;
      char *name;
-    rdstringc text;
+    info_data text;
  };
  
  typedef struct {
      char *text;
+    int length;
      int nnodes, nodesize;
      node **nodes;
  } info_idx;
  
-static int info_convert(wchar_t *, char **);
+static int info_rdadd(info_data *, wchar_t);
+static int info_rdadds(info_data *, wchar_t const *);
+static int info_rdaddc(info_data *, char);
+static int info_rdaddsc(info_data *, char const *);
  
-static void info_heading(rdstringc *, word *, word *, int);
-static void info_rule(rdstringc *, int, int);
-static void info_para(rdstringc *, word *, char *, word *, keywordlist *,
+static void info_heading(info_data *, word *, word *, int);
+static void info_rule(info_data *, int, int);
+static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
                       int, int, int);
-static void info_codepara(rdstringc *, word *, int, int);
-static void info_versionid(rdstringc *, word *);
-static void info_menu_item(rdstringc *, node *, paragraph *);
+static void info_codepara(info_data *, word *, int, int);
+static void info_versionid(info_data *, word *);
+static void info_menu_item(info_data *, node *, paragraph *);
  static word *info_transform_wordlist(word *, keywordlist *);
  static int info_check_index(word *, node *, indexdata *);
  
-static void info_rdaddwc(rdstringc *, word *, word *, int);
+static int info_rdaddwc(info_data *, word *, word *, int);
  
-static node *info_node_new(char *name);
-static char *info_node_name(paragraph *p);
+static node *info_node_new(char *name, int charset);
+static char *info_node_name(paragraph *p, int charset);
  
  static infoconfig info_configure(paragraph *source) {
      infoconfig ret;
@@ -75,12 +100,17 @@ static infoconfig info_configure(paragraph *source) {
       */
      ret.filename = dupstr("output.info");
      ret.maxfilesize = 64 << 10;
+    ret.charset = CS_ASCII;
  
      for (; source; source = source->next) {
         if (source->type == para_Config) {
             if (!ustricmp(source->keyword, L"info-filename")) {
                 sfree(ret.filename);
                 ret.filename = dupstr(adv(source->origkeyword));
+           } else if (!ustricmp(source->keyword, L"info-charset")) {
+               char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
+               ret.charset = charset_from_localenc(csname);
+               sfree(csname);
             } else if (!ustricmp(source->keyword, L"info-max-file-size")) {
                 ret.maxfilesize = utoi(uadv(source->keyword));
             }
@@ -101,18 +131,18 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
      infoconfig conf;
      word *prefix, *body, *wp;
      word spaceword;
-    char *prefixextra;
+    wchar_t *prefixextra;
      int nesting, nestindent;
      int indentb, indenta;
      int filepos;
      int has_index;
-    rdstringc intro_text = { 0, 0, NULL };
+    info_data intro_text = EMPTY_INFO_DATA;
      node *topnode, *currnode;
      word bullet;
      FILE *fp;
  
      /*
-     * FIXME
+     * FIXME: possibly configurability?
       */
      int width = 70, listindentbefore = 1, listindentafter = 3;
      int indent_code = 2, index_width = 40;
@@ -124,7 +154,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
      /*
       * Go through and create a node for each section.
       */
-    topnode = info_node_new("Top");
+    topnode = info_node_new("Top", conf.charset);
      currnode = topnode;
      for (p = sourceform; p; p = p->next) switch (p->type) {
         /*
@@ -139,8 +169,8 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
             node *newnode, *upnode;
             char *nodename;
  
-           nodename = info_node_name(p);
-           newnode = info_node_new(nodename);
+           nodename = info_node_name(p, conf.charset);
+           newnode = info_node_new(nodename, conf.charset);
             sfree(nodename);
  
             p->private_data = newnode;
@@ -170,14 +200,16 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
  
         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
             info_idx *ii = mknew(info_idx);
-           rdstringc rs = { 0, 0, NULL };
+           info_data id = EMPTY_INFO_DATA;
+
+           id.charset = conf.charset;
  
             ii->nnodes = ii->nodesize = 0;
             ii->nodes = NULL;
  
-           info_rdaddwc(&rs, entry->text, NULL, FALSE);
+           ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE);
  
-           ii->text = rs.text;
+           ii->text = id.output.text;
  
             entry->backend_data = ii;
         }
@@ -189,11 +221,12 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
       * good place to put the copyright notice and the version IDs. 
       * Also, Info directory entries are expected to go here.
       */
+    intro_text.charset = conf.charset;
  
-    rdaddsc(&intro_text,
+    info_rdaddsc(&intro_text,
             "This Info file generated by Halibut, ");
-    rdaddsc(&intro_text, version);
-    rdaddsc(&intro_text, "\n\n");
+    info_rdaddsc(&intro_text, version);
+    info_rdaddsc(&intro_text, "\n\n");
  
      for (p = sourceform; p; p = p->next)
         if (p->type == para_Config &&
@@ -211,33 +244,27 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
                 continue;
             }
  
-           rdaddsc(&intro_text, "INFO-DIR-SECTION ");
-           s = utoa_dup(section, CS_FIXME);
-           rdaddsc(&intro_text, s);
-           sfree(s);
-           rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
-           s = utoa_dup(shortname, CS_FIXME);
-           rdaddsc(&intro_text, s);
-           sfree(s);
-           rdaddsc(&intro_text, ": (");
+           info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
+           info_rdadds(&intro_text, section);
+           info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
+           info_rdadds(&intro_text, shortname);
+           info_rdaddsc(&intro_text, ": (");
             s = dupstr(conf.filename);
             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
                 s[strlen(s)-5] = '\0';
-           rdaddsc(&intro_text, s);
+           info_rdaddsc(&intro_text, s);
             sfree(s);
-           rdaddsc(&intro_text, ")");
+           info_rdaddsc(&intro_text, ")");
             if (*kw) {
                 keyword *kwl = kw_lookup(keywords, kw);
                 if (kwl && kwl->para->private_data) {
                     node *n = (node *)kwl->para->private_data;
-                   rdaddsc(&intro_text, n->name);
+                   info_rdaddsc(&intro_text, n->name);
                 }
             }
-           rdaddsc(&intro_text, ".   ");
-           s = utoa_dup(longname, CS_FIXME);
-           rdaddsc(&intro_text, s);
-           sfree(s);
-           rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
+           info_rdaddsc(&intro_text, ".   ");
+           info_rdadds(&intro_text, longname);
+           info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
         }
  
      for (p = sourceform; p; p = p->next)
@@ -249,8 +276,8 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
         if (p->type == para_VersionID)
             info_versionid(&intro_text, p->words);
  
-    if (intro_text.text[intro_text.pos-1] != '\n')
-       rdaddc(&intro_text, '\n');
+    if (intro_text.output.text[intro_text.output.pos-1] != '\n')
+       info_rdaddc(&intro_text, '\n');
  
      /* Do the title */
      for (p = sourceform; p; p = p->next)
@@ -306,7 +333,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
         assert(currnode->up);
  
         if (!currnode->up->started_menu) {
-           rdaddsc(&currnode->up->text, "* Menu:\n\n");
+           info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
             currnode->up->started_menu = TRUE;
         }
         info_menu_item(&currnode->up->text, currnode, p);
@@ -339,7 +366,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
             indenta = listindentafter;
         } else if (p->type == para_NumberedList) {
             prefix = p->kwtext;
-           prefixextra = ".";         /* FIXME: configurability */
+           prefixextra = L".";        /* FIXME: configurability */
             indentb = listindentbefore;
             indenta = listindentafter;
         } else if (p->type == para_Description) {
@@ -388,14 +415,14 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
         int i, j, k;
         indexentry *entry;
  
-       newnode = info_node_new("Index");
+       newnode = info_node_new("Index", conf.charset);
         newnode->up = topnode;
  
         currnode->next = newnode;
         newnode->prev = currnode;
         currnode->listnext = newnode;
  
-       rdaddsc(&newnode->text, "Index\n-----\n\n");
+       info_rdaddsc(&newnode->text, "Index\n-----\n\n");
  
         info_menu_item(&topnode->text, newnode, NULL);
  
@@ -403,7 +430,6 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
             info_idx *ii = (info_idx *)entry->backend_data;
  
             for (j = 0; j < ii->nnodes; j++) {
-               int pos0 = newnode->text.pos;
                 /*
                  * When we have multiple references for a single
                  * index term, we only display the actual term on
@@ -411,12 +437,12 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
                  * really are the same.
                  */
                 if (j == 0)
-                   rdaddsc(&newnode->text, ii->text);
-               for (k = newnode->text.pos - pos0; k < index_width; k++)
-                   rdaddc(&newnode->text, ' ');
-               rdaddsc(&newnode->text, "   *Note ");
-               rdaddsc(&newnode->text, ii->nodes[j]->name);
-               rdaddsc(&newnode->text, "::\n");
+                   info_rdaddsc(&newnode->text, ii->text);
+               for (k = (j ? 0 : ii->length); k < index_width; k++)
+                   info_rdaddc(&newnode->text, ' ');
+               info_rdaddsc(&newnode->text, "   *Note ");
+               info_rdaddsc(&newnode->text, ii->nodes[j]->name);
+               info_rdaddsc(&newnode->text, "::\n");
             }
         }
      }
@@ -426,31 +452,31 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
       * and the node line at the top.
       */
      for (currnode = topnode; currnode; currnode = currnode->listnext) {
-       char *origtext = currnode->text.text;
-       currnode->text.text = NULL;
-       currnode->text.pos = currnode->text.size = 0;
-       rdaddsc(&currnode->text, "\037\nFile: ");
-       rdaddsc(&currnode->text, conf.filename);
-       rdaddsc(&currnode->text, ",  Node: ");
-       rdaddsc(&currnode->text, currnode->name);
+       char *origtext = currnode->text.output.text;
+       currnode->text = empty_info_data;
+       currnode->text.charset = conf.charset;
+       info_rdaddsc(&currnode->text, "\037\nFile: ");
+       info_rdaddsc(&currnode->text, conf.filename);
+       info_rdaddsc(&currnode->text, ",  Node: ");
+       info_rdaddsc(&currnode->text, currnode->name);
         if (currnode->prev) {
-           rdaddsc(&currnode->text, ",  Prev: ");
-           rdaddsc(&currnode->text, currnode->prev->name);
+           info_rdaddsc(&currnode->text, ",  Prev: ");
+           info_rdaddsc(&currnode->text, currnode->prev->name);
         }
-       rdaddsc(&currnode->text, ",  Up: ");
-       rdaddsc(&currnode->text, (currnode->up ?
-                                 currnode->up->name : "(dir)"));
+       info_rdaddsc(&currnode->text, ",  Up: ");
+       info_rdaddsc(&currnode->text, (currnode->up ?
+                                      currnode->up->name : "(dir)"));
         if (currnode->next) {
-           rdaddsc(&currnode->text, ",  Next: ");
-           rdaddsc(&currnode->text, currnode->next->name);
+           info_rdaddsc(&currnode->text, ",  Next: ");
+           info_rdaddsc(&currnode->text, currnode->next->name);
         }
-       rdaddsc(&currnode->text, "\n\n");
-       rdaddsc(&currnode->text, origtext);
+       info_rdaddsc(&currnode->text, "\n\n");
+       info_rdaddsc(&currnode->text, origtext);
         /*
          * Just make _absolutely_ sure we end with a newline.
          */
-       if (currnode->text.text[currnode->text.pos-1] != '\n')
-           rdaddc(&currnode->text, '\n');
+       if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
+           info_rdaddc(&currnode->text, '\n');
  
         sfree(origtext);
      }    
@@ -458,25 +484,25 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
      /*
       * Compute the offsets for the tag table.
       */
-    filepos = intro_text.pos;
+    filepos = intro_text.output.pos;
      for (currnode = topnode; currnode; currnode = currnode->listnext) {
         currnode->pos = filepos;
-       filepos += currnode->text.pos;
+       filepos += currnode->text.output.pos;
      }
  
      /*
       * Split into sub-files.
       */
      if (conf.maxfilesize > 0) {
-       int currfilesize = intro_text.pos, currfilenum = 1;
+       int currfilesize = intro_text.output.pos, currfilenum = 1;
         for (currnode = topnode; currnode; currnode = currnode->listnext) {
-           if (currfilesize > intro_text.pos &&
-               currfilesize + currnode->text.pos > conf.maxfilesize) {
+           if (currfilesize > intro_text.output.pos &&
+               currfilesize + currnode->text.output.pos > conf.maxfilesize) {
                 currfilenum++;
-               currfilesize = intro_text.pos;
+               currfilesize = intro_text.output.pos;
             }
             currnode->filenum = currfilenum;
-           currfilesize += currnode->text.pos;
+           currfilesize += currnode->text.output.pos;
         }
      }
  
@@ -488,10 +514,10 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
         error(err_cantopenw, conf.filename);
         return;
      }
-    fputs(intro_text.text, fp);
+    fputs(intro_text.output.text, fp);
      if (conf.maxfilesize == 0) {
         for (currnode = topnode; currnode; currnode = currnode->listnext)
-           fputs(currnode->text.text, fp);
+           fputs(currnode->text.output.text, fp);
      } else {
         int filenum = 0;
         fprintf(fp, "\037\nIndirect:\n");
@@ -533,9 +559,9 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
                     return;
                 }
                 sfree(fname);
-               fputs(intro_text.text, fp);
+               fputs(intro_text.output.text, fp);
             }
-           fputs(currnode->text.text, fp);
+           fputs(currnode->text.output.text, fp);
         }
  
         if (fp)
@@ -585,55 +611,6 @@ static int info_check_index(word *w, node *n, indexdata *idx)
      return ret;
  }
  
-/*
- * Convert a wide string into a string of chars. If `result' is
- * non-NULL, mallocs the resulting string and stores a pointer to
- * it in `*result'. If `result' is NULL, merely checks whether all
- * characters in the string are feasible for the output character
- * set.
- *
- * Return is nonzero if all characters are OK. If not all
- * characters are OK but `result' is non-NULL, a result _will_
- * still be generated!
- */
-static int info_convert(wchar_t *s, char **result) {
-    /*
-     * FIXME. Currently this is ISO8859-1 only.
-     */
-    int doing = (result != 0);
-    int ok = TRUE;
-    char *p = NULL;
-    int plen = 0, psize = 0;
-
-    for (; *s; s++) {
-       wchar_t c = *s;
-       char outc;
-
-       if ((c >= 32 && c <= 126) ||
-           (c >= 160 && c <= 255)) {
-           /* Char is OK. */
-           outc = (char)c;
-       } else {
-           /* Char is not OK. */
-           ok = FALSE;
-           outc = 0xBF;               /* approximate the good old DEC `uh?' */
-       }
-       if (doing) {
-           if (plen >= psize) {
-               psize = plen + 256;
-               p = resize(p, psize);
-           }
-           p[plen++] = outc;
-       }
-    }
-    if (doing) {
-       p = resize(p, plen+1);
-       p[plen] = '\0';
-       *result = p;
-    }
-    return ok;
-}
-
  static word *info_transform_wordlist(word *words, keywordlist *keywords)
  {
      word *ret = dup_word_list(words);
@@ -691,8 +668,8 @@ static word *info_transform_wordlist(word *words, keywordlist *keywords)
      return ret;
  }
  
-static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) {
-    char *c;
+static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs) {
+    int ret = 0;
  
      for (; words && words != end; words = words->next) switch (words->type) {
        case word_HyperLink:
@@ -718,56 +695,61 @@ static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) {
         if (towordstyle(words->type) == word_Emph &&
             (attraux(words->aux) == attr_First ||
              attraux(words->aux) == attr_Only))
-           rdaddc(rs, '_');           /* FIXME: configurability */
+           ret += info_rdadd(id, L'_');      /* FIXME: configurability */
         else if (towordstyle(words->type) == word_Code &&
                  (attraux(words->aux) == attr_First ||
                   attraux(words->aux) == attr_Only))
-           rdaddc(rs, '`');           /* FIXME: configurability */
+           ret += info_rdadd(id, L'`');      /* FIXME: configurability */
         if (removeattr(words->type) == word_Normal) {
-           if (info_convert(words->text, &c) || !words->alt)
-               rdaddsc(rs, c);
+           if (cvt_ok(id->charset, words->text) || !words->alt)
+               ret += info_rdadds(id, words->text);
             else
-               info_rdaddwc(rs, words->alt, NULL, FALSE);
-           sfree(c);
+               ret += info_rdaddwc(id, words->alt, NULL, FALSE);
         } else if (removeattr(words->type) == word_WhiteSpace) {
-           rdaddc(rs, ' ');
+           ret += info_rdadd(id, L' ');
         } else if (removeattr(words->type) == word_Quote) {
-           rdaddc(rs, quoteaux(words->aux) == quote_Open ? '`' : '\'');
+           ret += info_rdadd(id, quoteaux(words->aux) == quote_Open ? L'`' : L'\'');
                                        /* FIXME: configurability */
         }
         if (towordstyle(words->type) == word_Emph &&
             (attraux(words->aux) == attr_Last ||
              attraux(words->aux) == attr_Only))
-           rdaddc(rs, '_');           /* FIXME: configurability */
+           ret += info_rdadd(id, L'_');     /* FIXME: configurability */
         else if (towordstyle(words->type) == word_Code &&
                  (attraux(words->aux) == attr_Last ||
                   attraux(words->aux) == attr_Only))
-           rdaddc(rs, '\'');          /* FIXME: configurability */
+           ret += info_rdadd(id, L'\'');     /* FIXME: configurability */
         break;
  
        case word_UpperXref:
        case word_LowerXref:
         if (xrefs && words->private_data) {
-           rdaddsc(rs, "*Note ");
-           rdaddsc(rs, ((node *)words->private_data)->name);
-           rdaddsc(rs, "::");
+           /*
+            * This bit is structural and so must be done in char
+            * rather than wchar_t.
+            */
+           ret += info_rdaddsc(id, "*Note ");
+           ret += info_rdaddsc(id, ((node *)words->private_data)->name);
+           ret += info_rdaddsc(id, "::");
         }
         break;
      }
+
+    return ret;
  }
  
-static int info_width_internal(word *words, int xrefs);
+static int info_width_internal(word *words, int xrefs, int charset);
  
-static int info_width_internal_list(word *words, int xrefs) {
+static int info_width_internal_list(word *words, int xrefs, int charset) {
      int w = 0;
      while (words) {
-       w += info_width_internal(words, xrefs);
+       w += info_width_internal(words, xrefs, charset);
         words = words->next;
      }
      return w;
  }
  
-static int info_width_internal(word *words, int xrefs) {
+static int info_width_internal(word *words, int xrefs, int charset) {
      switch (words->type) {
        case word_HyperLink:
        case word_HyperEnd:
@@ -784,9 +766,9 @@ static int info_width_internal(word *words, int xrefs) {
                  ? (attraux(words->aux) == attr_Only ? 2 :
                     attraux(words->aux) == attr_Always ? 0 : 1)
                  : 0) +
-               (info_convert(words->text, NULL) || !words->alt ?
+               (cvt_ok(charset, words->text) || !words->alt ?
                  ustrlen(words->text) :
-                info_width_internal_list(words->alt, xrefs)));
+                info_width_internal_list(words->alt, xrefs, charset)));
  
        case word_WhiteSpace:
        case word_EmphSpace:
@@ -817,70 +799,54 @@ static int info_width_internal(word *words, int xrefs) {
  
  static int info_width_noxrefs(void *ctx, word *words)
  {
-    IGNORE(ctx);
-    return info_width_internal(words, FALSE);
+    return info_width_internal(words, FALSE, *(int *)ctx);
  }
  static int info_width_xrefs(void *ctx, word *words)
  {
-    IGNORE(ctx);
-    return info_width_internal(words, TRUE);
+    return info_width_internal(words, TRUE, *(int *)ctx);
  }
  
-static void info_heading(rdstringc *text, word *tprefix,
+static void info_heading(info_data *text, word *tprefix,
                          word *words, int width) {
-    rdstringc t = { 0, 0, NULL };
-    int margin, length;
+    int length;
      int firstlinewidth, wrapwidth;
-    int i;
      wrappedline *wrapping, *p;
  
+    length = 0;
      if (tprefix) {
-       info_rdaddwc(&t, tprefix, NULL, FALSE);
-       rdaddsc(&t, ": ");             /* FIXME: configurability */
+       length += info_rdaddwc(text, tprefix, NULL, FALSE);
+       length += info_rdadds(text, L": ");/* FIXME: configurability */
      }
-    margin = length = (t.text ? strlen(t.text) : 0);
  
-    margin = 0;
-    firstlinewidth = width - length;
      wrapwidth = width;
+    firstlinewidth = width - length;
  
      wrapping = wrap_para(words, firstlinewidth, wrapwidth,
-                        info_width_noxrefs, NULL, 0);
+                        info_width_noxrefs, &text->charset, 0);
      for (p = wrapping; p; p = p->next) {
-       info_rdaddwc(&t, p->begin, p->end, FALSE);
-       length = (t.text ? strlen(t.text) : 0);
-       for (i = 0; i < margin; i++)
-           rdaddc(text, ' ');
-       rdaddsc(text, t.text);
-       rdaddc(text, '\n');
-       for (i = 0; i < margin; i++)
-           rdaddc(text, ' ');
+       length += info_rdaddwc(text, p->begin, p->end, FALSE);
+       info_rdadd(text, L'\n');
         while (length--)
-           rdaddc(text, '-');
-       rdaddc(text, '\n');
-       margin = 0;
-       sfree(t.text);
-       t = empty_rdstringc;
+           info_rdadd(text, L'-');  /* FIXME: configurability */
+       info_rdadd(text, L'\n');
+       length = 0;
      }
      wrap_free(wrapping);
-    rdaddc(text, '\n');
-
-    sfree(t.text);
+    info_rdadd(text, L'\n');
  }
  
-static void info_rule(rdstringc *text, int indent, int width) {
-    while (indent--) rdaddc(text, ' ');
-    while (width--) rdaddc(text, '-');
-    rdaddc(text, '\n');
-    rdaddc(text, '\n');
+static void info_rule(info_data *text, int indent, int width) {
+    while (indent--) info_rdadd(text, L' ');
+    while (width--) info_rdadd(text, L'-');
+    info_rdadd(text, L'\n');
+    info_rdadd(text, L'\n');
  }
  
-static void info_para(rdstringc *text, word *prefix, char *prefixextra,
+static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
                       word *input, keywordlist *keywords,
                       int indent, int extraindent, int width) {
      wrappedline *wrapping, *p;
      word *words;
-    rdstringc pfx = { 0, 0, NULL };
      int e;
      int i;
      int firstlinewidth = width;
@@ -888,75 +854,69 @@ static void info_para(rdstringc *text, word *prefix, char *prefixextra,
      words = info_transform_wordlist(input, keywords);
  
      if (prefix) {
-       info_rdaddwc(&pfx, prefix, NULL, FALSE);
-       if (prefixextra)
-           rdaddsc(&pfx, prefixextra);
         for (i = 0; i < indent; i++)
-           rdaddc(text, ' ');
-       rdaddsc(text, pfx.text);
+           info_rdadd(text, L' ');
+       e = info_rdaddwc(text, prefix, NULL, FALSE);
+       if (prefixextra)
+           e += info_rdadds(text, prefixextra);
         /* If the prefix is too long, shorten the first line to fit. */
-       e = extraindent - strlen(pfx.text);
+       e = extraindent - e;
         if (e < 0) {
             firstlinewidth += e;       /* this decreases it, since e < 0 */
             if (firstlinewidth < 0) {
                 e = indent + extraindent;
                 firstlinewidth = width;
-               rdaddc(text, '\n');
+               info_rdadd(text, L'\n');
             } else
                 e = 0;
         }
-       sfree(pfx.text);
      } else
         e = indent + extraindent;
  
      wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
-                        NULL, 0);
+                        &text->charset, 0);
      for (p = wrapping; p; p = p->next) {
         for (i = 0; i < e; i++)
-           rdaddc(text, ' ');
+           info_rdadd(text, L' ');
         info_rdaddwc(text, p->begin, p->end, TRUE);
-       rdaddc(text, '\n');
+       info_rdadd(text, L'\n');
         e = indent + extraindent;
      }
      wrap_free(wrapping);
-    rdaddc(text, '\n');
+    info_rdadd(text, L'\n');
  
      free_word_list(words);
  }
  
-static void info_codepara(rdstringc *text, word *words,
+static void info_codepara(info_data *text, word *words,
                           int indent, int width) {
      int i;
  
      for (; words; words = words->next) if (words->type == word_WeakCode) {
-       char *c;
-       info_convert(words->text, &c);
-       if (strlen(c) > (size_t)width) {
+       for (i = 0; i < indent; i++)
+           info_rdadd(text, L' ');
+       if (info_rdadds(text, words->text) > width) {
             /* FIXME: warn */
         }
-       for (i = 0; i < indent; i++)
-           rdaddc(text, ' ');
-       rdaddsc(text, c);
-       rdaddc(text, '\n');
-       sfree(c);
+       info_rdadd(text, L'\n');
      }
  
-    rdaddc(text, '\n');
+    info_rdadd(text, L'\n');
  }
  
-static void info_versionid(rdstringc *text, word *words) {
-    rdaddc(text, '[');                /* FIXME: configurability */
+static void info_versionid(info_data *text, word *words) {
+    info_rdadd(text, L'[');                   /* FIXME: configurability */
      info_rdaddwc(text, words, NULL, FALSE);
-    rdaddsc(text, "]\n");
+    info_rdadds(text, L"]\n");
  }
  
-static node *info_node_new(char *name)
+static node *info_node_new(char *name, int charset)
  {
      node *n;
  
      n = mknew(node);
-    n->text.text = NULL;
-    n->text.pos = n->text.size = 0;
+    n->text = empty_info_data;
+    n->text.charset = charset;
      n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
      n->name = dupstr(name);
      n->started_menu = FALSE;
@@ -964,17 +924,20 @@ static node *info_node_new(char *name)
      return n;
  }
  
-static char *info_node_name(paragraph *par)
+static char *info_node_name(paragraph *par, int charset)
  {
-    rdstringc rsc = { 0, 0, NULL };
+    info_data id = EMPTY_INFO_DATA;
      char *p, *q;
-    info_rdaddwc(&rsc, par->kwtext ? par->kwtext : par->words, NULL, FALSE);
+
+    id.charset = charset;
+    info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words, NULL, FALSE);
+    info_rdaddsc(&id, NULL);
  
      /*
       * We cannot have commas or colons in a node name. Remove any
       * that we find, with a warning.
       */
-    p = q = rsc.text;
+    p = q = id.output.text;
      while (*p) {
         if (*p == ':' || *p == ',') {
             error(err_infonodechar, &par->fpos, *p);
@@ -985,10 +948,10 @@ static char *info_node_name(paragraph *par)
      }
      *p = '\0';
  
-    return rsc.text;
+    return id.output.text;
  }
  
-static void info_menu_item(rdstringc *text, node *n, paragraph *p)
+static void info_menu_item(info_data *text, node *n, paragraph *p)
  {
      /*
       * FIXME: Depending on how we're doing node names in this info
@@ -1000,14 +963,95 @@ static void info_menu_item(rdstringc *text, node *n, paragraph *p)
       * 
       *   * Chapter number: Node name.
       * 
-     * 
+     * This function mostly works in char rather than wchar_t,
+     * because a menu item is a structural component.
       */
-    rdaddsc(text, "* ");
-    rdaddsc(text, n->name);
-    rdaddsc(text, "::");
+    info_rdaddsc(text, "* ");
+    info_rdaddsc(text, n->name);
+    info_rdaddsc(text, "::");
      if (p) {
-       rdaddc(text, ' ');
+       info_rdaddc(text, ' ');
         info_rdaddwc(text, p->words, NULL, FALSE);
      }
-    rdaddc(text, '\n');
+    info_rdaddc(text, '\n');
+}
+
+/*
+ * These functions implement my wrapper on the rdadd* calls which
+ * allows me to switch arbitrarily between literal octet-string
+ * text and charset-translated Unicode. (Because no matter what
+ * character set I write the actual text in, I expect info readers
+ * to treat node names and file names literally and to expect
+ * keywords like `*Note' in their canonical form, so I have to take
+ * steps to ensure that those structural elements of the file
+ * aren't messed with.)
+ */
+static int info_rdadds(info_data *d, wchar_t const *wcs)
+{
+    if (!d->wcmode) {
+       d->state = charset_init_state;
+       d->wcmode = TRUE;
+    }
+
+    if (wcs) {
+       char buf[256];
+       int len, origlen, ret;
+
+       origlen = len = ustrlen(wcs);
+       while (len > 0) {
+           int prevlen = len;
+
+           ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
+                                      d->charset, &d->state, NULL);
+
+           assert(len < prevlen);
+
+           if (ret > 0) {
+               buf[ret] = '\0';
+               rdaddsc(&d->output, buf);
+           }
+       }
+
+       return origlen;
+    } else
+       return 0;
+}
+
+static int info_rdaddsc(info_data *d, char const *cs)
+{
+    if (d->wcmode) {
+       char buf[256];
+       int ret;
+
+       ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
+                                  d->charset, &d->state, NULL);
+       if (ret > 0) {
+           buf[ret] = '\0';
+           rdaddsc(&d->output, buf);
+       }
+
+       d->wcmode = FALSE;
+    }
+
+    if (cs) {
+       rdaddsc(&d->output, cs);
+       return strlen(cs);
+    } else
+       return 0;
+}
+
+static int info_rdadd(info_data *d, wchar_t wc)
+{
+    wchar_t wcs[2];
+    wcs[0] = wc;
+    wcs[1] = L'\0';
+    return info_rdadds(d, wcs);
+}
+
+static int info_rdaddc(info_data *d, char c)
+{
+    char cs[2];
+    cs[0] = c;
+    cs[1] = '\0';
+    return info_rdaddsc(d, cs);
  }
diff --git a/bk_text.c b/bk_text.c

index d551741..373de74 100644 (file)
--- a/bk_text.c
+++ b/bk_text.c
@@ -360,22 +360,6 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
      sfree(conf.filename);
  }
  
-static int text_ok(int charset, const wchar_t *s)
-{
-    char buf[256];
-    charset_state state = CHARSET_INIT_STATE;
-    int err, len = ustrlen(s);
-
-    err = 0;
-    while (len > 0) {
-       (void)charset_from_unicode(&s, &len, buf, lenof(buf),
-                                  charset, &state, &err);
-       if (err)
-           return FALSE;
-    }
-    return TRUE;
-}
-
  static void text_output(textfile *tf, const wchar_t *s)
  {
      char buf[256];
@@ -441,7 +425,7 @@ static void text_rdaddw(int charset, rdstring *rs, word *text, word *end) {
                   attraux(text->aux) == attr_Only))
             rdadd(rs, L'`');           /* FIXME: configurability */
         if (removeattr(text->type) == word_Normal) {
-           if (text_ok(charset, text->text) || !text->alt)
+           if (cvt_ok(charset, text->text) || !text->alt)
                 rdadds(rs, text->text);
             else
                 text_rdaddw(charset, rs, text->alt, NULL);
@@ -495,7 +479,7 @@ static int text_width(void *ctx, word *text) {
                  ? (attraux(text->aux) == attr_Only ? 2 :
                     attraux(text->aux) == attr_Always ? 0 : 1)
                  : 0) +
-               (text_ok(charset, text->text) || !text->alt ?
+               (cvt_ok(charset, text->text) || !text->alt ?
                  ustrlen(text->text) :
                  text_width_list(ctx, text->alt)));
  
diff --git a/halibut.h b/halibut.h

index 94493d4..33780ea 100644 (file)
--- a/halibut.h
+++ b/halibut.h
@@ -294,6 +294,7 @@ int utob(wchar_t *);
  int uisdigit(wchar_t);
  wchar_t *ustrlow(wchar_t *s);
  wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec);
+int cvt_ok(int charset, const wchar_t *s);
  
  /*
   * help.c
diff --git a/ustring.c b/ustring.c

index 169a377..11a022c 100644 (file)
--- a/ustring.c
+++ b/ustring.c
@@ -314,3 +314,23 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
      sfree(text);
      return wblk;
  }
+
+/*
+ * Determine whether a Unicode string can be translated into a
+ * given charset without any missing characters.
+ */
+int cvt_ok(int charset, const wchar_t *s)
+{
+    char buf[256];
+    charset_state state = CHARSET_INIT_STATE;
+    int err, len = ustrlen(s);
+
+    err = 0;
+    while (len > 0) {
+       (void)charset_from_unicode(&s, &len, buf, lenof(buf),
+                                  charset, &state, &err);
+       if (err)
+           return FALSE;
+    }
+    return TRUE;
+}
author	simon <simon@cda61777-01e9-0310-a592-d414129be87e>
	Wed, 21 Apr 2004 18:07:26 +0000 (18:07 +0000)
committer	simon <simon@cda61777-01e9-0310-a592-d414129be87e>
	Wed, 21 Apr 2004 18:07:26 +0000 (18:07 +0000)
bk_info.c		patch \| blob \| blame \| history
bk_text.c		patch \| blob \| blame \| history
halibut.h		patch \| blob \| blame \| history
ustring.c		patch \| blob \| blame \| history