From 91f93b94744447a088ce435e50500a9598cb5466 Mon Sep 17 00:00:00 2001
From: simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Date: Wed, 21 Apr 2004 18:07:26 +0000
Subject: [PATCH] Charset support for the info backend (\cfg{info-charset}).
 (This checkin touches other files because a function in bk_text.c turned out
 to be of more general use so I moved it out into ustring.c.)

git-svn-id: svn://svn.tartarus.org/sgt/halibut@4111 cda61777-01e9-0310-a592-d414129be87e
---
 bk_info.c | 502 ++++++++++++++++++++++++++++++++++----------------------------
 bk_text.c |  20 +--
 halibut.h |   1 +
 ustring.c |  20 +++
 4 files changed, 296 insertions(+), 247 deletions(-)

diff --git a/bk_info.c b/bk_info.c
index fc1b292..f7efbf5 100644
--- a/bk_info.c
+++ b/bk_info.c
@@ -25,6 +25,17 @@
  *  - might be helpful to diagnose duplicate node names!
  */
 
+/*
+ * FIXME:
+ * 
+ *  - alignment in the index is broken when a non-representable
+ *    character appears with no alternative. More generally, I
+ *    fear, this is the fault of the info_rdadd* functions failing
+ *    to return correct width figures in this circumstance (so it
+ *    will affect list paragraph prefixes and paragraph wrapping as
+ *    well).
+ */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
@@ -33,39 +44,53 @@
 typedef struct {
     char *filename;
     int maxfilesize;
+    int charset;
 } infoconfig;
 
+typedef struct {
+    rdstringc output;
+    int charset;
+    charset_state state;
+    int wcmode;
+} info_data;
+#define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
+static const info_data empty_info_data = EMPTY_INFO_DATA;
+
 typedef struct node_tag node;
 struct node_tag {
     node *listnext;
     node *up, *prev, *next, *lastchild;
     int pos, started_menu, filenum;
     char *name;
-    rdstringc text;
+    info_data text;
 };
 
 typedef struct {
     char *text;
+    int length;
     int nnodes, nodesize;
     node **nodes;
 } info_idx;
 
-static int info_convert(wchar_t *, char **);
+static int info_rdadd(info_data *, wchar_t);
+static int info_rdadds(info_data *, wchar_t const *);
+static int info_rdaddc(info_data *, char);
+static int info_rdaddsc(info_data *, char const *);
 
-static void info_heading(rdstringc *, word *, word *, int);
-static void info_rule(rdstringc *, int, int);
-static void info_para(rdstringc *, word *, char *, word *, keywordlist *,
+static void info_heading(info_data *, word *, word *, int);
+static void info_rule(info_data *, int, int);
+static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
 		      int, int, int);
-static void info_codepara(rdstringc *, word *, int, int);
-static void info_versionid(rdstringc *, word *);
-static void info_menu_item(rdstringc *, node *, paragraph *);
+static void info_codepara(info_data *, word *, int, int);
+static void info_versionid(info_data *, word *);
+static void info_menu_item(info_data *, node *, paragraph *);
 static word *info_transform_wordlist(word *, keywordlist *);
 static int info_check_index(word *, node *, indexdata *);
 
-static void info_rdaddwc(rdstringc *, word *, word *, int);
+static int info_rdaddwc(info_data *, word *, word *, int);
 
-static node *info_node_new(char *name);
-static char *info_node_name(paragraph *p);
+static node *info_node_new(char *name, int charset);
+static char *info_node_name(paragraph *p, int charset);
 
 static infoconfig info_configure(paragraph *source) {
     infoconfig ret;
@@ -75,12 +100,17 @@ static infoconfig info_configure(paragraph *source) {
      */
     ret.filename = dupstr("output.info");
     ret.maxfilesize = 64 << 10;
+    ret.charset = CS_ASCII;
 
     for (; source; source = source->next) {
 	if (source->type == para_Config) {
 	    if (!ustricmp(source->keyword, L"info-filename")) {
 		sfree(ret.filename);
 		ret.filename = dupstr(adv(source->origkeyword));
+	    } else if (!ustricmp(source->keyword, L"info-charset")) {
+		char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
+		ret.charset = charset_from_localenc(csname);
+		sfree(csname);
 	    } else if (!ustricmp(source->keyword, L"info-max-file-size")) {
 		ret.maxfilesize = utoi(uadv(source->keyword));
 	    }
@@ -101,18 +131,18 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
     infoconfig conf;
     word *prefix, *body, *wp;
     word spaceword;
-    char *prefixextra;
+    wchar_t *prefixextra;
     int nesting, nestindent;
     int indentb, indenta;
     int filepos;
     int has_index;
-    rdstringc intro_text = { 0, 0, NULL };
+    info_data intro_text = EMPTY_INFO_DATA;
     node *topnode, *currnode;
     word bullet;
     FILE *fp;
 
     /*
-     * FIXME
+     * FIXME: possibly configurability?
      */
     int width = 70, listindentbefore = 1, listindentafter = 3;
     int indent_code = 2, index_width = 40;
@@ -124,7 +154,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
     /*
      * Go through and create a node for each section.
      */
-    topnode = info_node_new("Top");
+    topnode = info_node_new("Top", conf.charset);
     currnode = topnode;
     for (p = sourceform; p; p = p->next) switch (p->type) {
 	/*
@@ -139,8 +169,8 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 	    node *newnode, *upnode;
 	    char *nodename;
 
-	    nodename = info_node_name(p);
-	    newnode = info_node_new(nodename);
+	    nodename = info_node_name(p, conf.charset);
+	    newnode = info_node_new(nodename, conf.charset);
 	    sfree(nodename);
 
 	    p->private_data = newnode;
@@ -170,14 +200,16 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 
 	for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 	    info_idx *ii = mknew(info_idx);
-	    rdstringc rs = { 0, 0, NULL };
+	    info_data id = EMPTY_INFO_DATA;
+
+	    id.charset = conf.charset;
 
 	    ii->nnodes = ii->nodesize = 0;
 	    ii->nodes = NULL;
 
-	    info_rdaddwc(&rs, entry->text, NULL, FALSE);
+	    ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE);
 
-	    ii->text = rs.text;
+	    ii->text = id.output.text;
 
 	    entry->backend_data = ii;
 	}
@@ -189,11 +221,12 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
      * good place to put the copyright notice and the version IDs. 
      * Also, Info directory entries are expected to go here.
      */
+    intro_text.charset = conf.charset;
 
-    rdaddsc(&intro_text,
+    info_rdaddsc(&intro_text,
 	    "This Info file generated by Halibut, ");
-    rdaddsc(&intro_text, version);
-    rdaddsc(&intro_text, "\n\n");
+    info_rdaddsc(&intro_text, version);
+    info_rdaddsc(&intro_text, "\n\n");
 
     for (p = sourceform; p; p = p->next)
 	if (p->type == para_Config &&
@@ -211,33 +244,27 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 		continue;
 	    }
 
-	    rdaddsc(&intro_text, "INFO-DIR-SECTION ");
-	    s = utoa_dup(section, CS_FIXME);
-	    rdaddsc(&intro_text, s);
-	    sfree(s);
-	    rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
-	    s = utoa_dup(shortname, CS_FIXME);
-	    rdaddsc(&intro_text, s);
-	    sfree(s);
-	    rdaddsc(&intro_text, ": (");
+	    info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
+	    info_rdadds(&intro_text, section);
+	    info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
+	    info_rdadds(&intro_text, shortname);
+	    info_rdaddsc(&intro_text, ": (");
 	    s = dupstr(conf.filename);
 	    if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 		s[strlen(s)-5] = '\0';
-	    rdaddsc(&intro_text, s);
+	    info_rdaddsc(&intro_text, s);
 	    sfree(s);
-	    rdaddsc(&intro_text, ")");
+	    info_rdaddsc(&intro_text, ")");
 	    if (*kw) {
 		keyword *kwl = kw_lookup(keywords, kw);
 		if (kwl && kwl->para->private_data) {
 		    node *n = (node *)kwl->para->private_data;
-		    rdaddsc(&intro_text, n->name);
+		    info_rdaddsc(&intro_text, n->name);
 		}
 	    }
-	    rdaddsc(&intro_text, ".   ");
-	    s = utoa_dup(longname, CS_FIXME);
-	    rdaddsc(&intro_text, s);
-	    sfree(s);
-	    rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
+	    info_rdaddsc(&intro_text, ".   ");
+	    info_rdadds(&intro_text, longname);
+	    info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 	}
 
     for (p = sourceform; p; p = p->next)
@@ -249,8 +276,8 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 	if (p->type == para_VersionID)
 	    info_versionid(&intro_text, p->words);
 
-    if (intro_text.text[intro_text.pos-1] != '\n')
-	rdaddc(&intro_text, '\n');
+    if (intro_text.output.text[intro_text.output.pos-1] != '\n')
+	info_rdaddc(&intro_text, '\n');
 
     /* Do the title */
     for (p = sourceform; p; p = p->next)
@@ -306,7 +333,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 	assert(currnode->up);
 
 	if (!currnode->up->started_menu) {
-	    rdaddsc(&currnode->up->text, "* Menu:\n\n");
+	    info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 	    currnode->up->started_menu = TRUE;
 	}
 	info_menu_item(&currnode->up->text, currnode, p);
@@ -339,7 +366,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 	    indenta = listindentafter;
 	} else if (p->type == para_NumberedList) {
 	    prefix = p->kwtext;
-	    prefixextra = ".";	       /* FIXME: configurability */
+	    prefixextra = L".";	       /* FIXME: configurability */
 	    indentb = listindentbefore;
 	    indenta = listindentafter;
 	} else if (p->type == para_Description) {
@@ -388,14 +415,14 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 	int i, j, k;
 	indexentry *entry;
 
-	newnode = info_node_new("Index");
+	newnode = info_node_new("Index", conf.charset);
 	newnode->up = topnode;
 
 	currnode->next = newnode;
 	newnode->prev = currnode;
 	currnode->listnext = newnode;
 
-	rdaddsc(&newnode->text, "Index\n-----\n\n");
+	info_rdaddsc(&newnode->text, "Index\n-----\n\n");
 
 	info_menu_item(&topnode->text, newnode, NULL);
 
@@ -403,7 +430,6 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 	    info_idx *ii = (info_idx *)entry->backend_data;
 
 	    for (j = 0; j < ii->nnodes; j++) {
-		int pos0 = newnode->text.pos;
 		/*
 		 * When we have multiple references for a single
 		 * index term, we only display the actual term on
@@ -411,12 +437,12 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 		 * really are the same.
 		 */
 		if (j == 0)
-		    rdaddsc(&newnode->text, ii->text);
-		for (k = newnode->text.pos - pos0; k < index_width; k++)
-		    rdaddc(&newnode->text, ' ');
-		rdaddsc(&newnode->text, "   *Note ");
-		rdaddsc(&newnode->text, ii->nodes[j]->name);
-		rdaddsc(&newnode->text, "::\n");
+		    info_rdaddsc(&newnode->text, ii->text);
+		for (k = (j ? 0 : ii->length); k < index_width; k++)
+		    info_rdaddc(&newnode->text, ' ');
+		info_rdaddsc(&newnode->text, "   *Note ");
+		info_rdaddsc(&newnode->text, ii->nodes[j]->name);
+		info_rdaddsc(&newnode->text, "::\n");
 	    }
 	}
     }
@@ -426,31 +452,31 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
      * and the node line at the top.
      */
     for (currnode = topnode; currnode; currnode = currnode->listnext) {
-	char *origtext = currnode->text.text;
-	currnode->text.text = NULL;
-	currnode->text.pos = currnode->text.size = 0;
-	rdaddsc(&currnode->text, "\037\nFile: ");
-	rdaddsc(&currnode->text, conf.filename);
-	rdaddsc(&currnode->text, ",  Node: ");
-	rdaddsc(&currnode->text, currnode->name);
+	char *origtext = currnode->text.output.text;
+	currnode->text = empty_info_data;
+	currnode->text.charset = conf.charset;
+	info_rdaddsc(&currnode->text, "\037\nFile: ");
+	info_rdaddsc(&currnode->text, conf.filename);
+	info_rdaddsc(&currnode->text, ",  Node: ");
+	info_rdaddsc(&currnode->text, currnode->name);
 	if (currnode->prev) {
-	    rdaddsc(&currnode->text, ",  Prev: ");
-	    rdaddsc(&currnode->text, currnode->prev->name);
+	    info_rdaddsc(&currnode->text, ",  Prev: ");
+	    info_rdaddsc(&currnode->text, currnode->prev->name);
 	}
-	rdaddsc(&currnode->text, ",  Up: ");
-	rdaddsc(&currnode->text, (currnode->up ?
-				  currnode->up->name : "(dir)"));
+	info_rdaddsc(&currnode->text, ",  Up: ");
+	info_rdaddsc(&currnode->text, (currnode->up ?
+				       currnode->up->name : "(dir)"));
 	if (currnode->next) {
-	    rdaddsc(&currnode->text, ",  Next: ");
-	    rdaddsc(&currnode->text, currnode->next->name);
+	    info_rdaddsc(&currnode->text, ",  Next: ");
+	    info_rdaddsc(&currnode->text, currnode->next->name);
 	}
-	rdaddsc(&currnode->text, "\n\n");
-	rdaddsc(&currnode->text, origtext);
+	info_rdaddsc(&currnode->text, "\n\n");
+	info_rdaddsc(&currnode->text, origtext);
 	/*
 	 * Just make _absolutely_ sure we end with a newline.
 	 */
-	if (currnode->text.text[currnode->text.pos-1] != '\n')
-	    rdaddc(&currnode->text, '\n');
+	if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
+	    info_rdaddc(&currnode->text, '\n');
 
 	sfree(origtext);
     }    
@@ -458,25 +484,25 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
     /*
      * Compute the offsets for the tag table.
      */
-    filepos = intro_text.pos;
+    filepos = intro_text.output.pos;
     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 	currnode->pos = filepos;
-	filepos += currnode->text.pos;
+	filepos += currnode->text.output.pos;
     }
 
     /*
      * Split into sub-files.
      */
     if (conf.maxfilesize > 0) {
-	int currfilesize = intro_text.pos, currfilenum = 1;
+	int currfilesize = intro_text.output.pos, currfilenum = 1;
 	for (currnode = topnode; currnode; currnode = currnode->listnext) {
-	    if (currfilesize > intro_text.pos &&
-		currfilesize + currnode->text.pos > conf.maxfilesize) {
+	    if (currfilesize > intro_text.output.pos &&
+		currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 		currfilenum++;
-		currfilesize = intro_text.pos;
+		currfilesize = intro_text.output.pos;
 	    }
 	    currnode->filenum = currfilenum;
-	    currfilesize += currnode->text.pos;
+	    currfilesize += currnode->text.output.pos;
 	}
     }
 
@@ -488,10 +514,10 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 	error(err_cantopenw, conf.filename);
 	return;
     }
-    fputs(intro_text.text, fp);
+    fputs(intro_text.output.text, fp);
     if (conf.maxfilesize == 0) {
 	for (currnode = topnode; currnode; currnode = currnode->listnext)
-	    fputs(currnode->text.text, fp);
+	    fputs(currnode->text.output.text, fp);
     } else {
 	int filenum = 0;
 	fprintf(fp, "\037\nIndirect:\n");
@@ -533,9 +559,9 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 		    return;
 		}
 		sfree(fname);
-		fputs(intro_text.text, fp);
+		fputs(intro_text.output.text, fp);
 	    }
-	    fputs(currnode->text.text, fp);
+	    fputs(currnode->text.output.text, fp);
 	}
 
 	if (fp)
@@ -585,55 +611,6 @@ static int info_check_index(word *w, node *n, indexdata *idx)
     return ret;
 }
 
-/*
- * Convert a wide string into a string of chars. If `result' is
- * non-NULL, mallocs the resulting string and stores a pointer to
- * it in `*result'. If `result' is NULL, merely checks whether all
- * characters in the string are feasible for the output character
- * set.
- *
- * Return is nonzero if all characters are OK. If not all
- * characters are OK but `result' is non-NULL, a result _will_
- * still be generated!
- */
-static int info_convert(wchar_t *s, char **result) {
-    /*
-     * FIXME. Currently this is ISO8859-1 only.
-     */
-    int doing = (result != 0);
-    int ok = TRUE;
-    char *p = NULL;
-    int plen = 0, psize = 0;
-
-    for (; *s; s++) {
-	wchar_t c = *s;
-	char outc;
-
-	if ((c >= 32 && c <= 126) ||
-	    (c >= 160 && c <= 255)) {
-	    /* Char is OK. */
-	    outc = (char)c;
-	} else {
-	    /* Char is not OK. */
-	    ok = FALSE;
-	    outc = 0xBF;	       /* approximate the good old DEC `uh?' */
-	}
-	if (doing) {
-	    if (plen >= psize) {
-		psize = plen + 256;
-		p = resize(p, psize);
-	    }
-	    p[plen++] = outc;
-	}
-    }
-    if (doing) {
-	p = resize(p, plen+1);
-	p[plen] = '\0';
-	*result = p;
-    }
-    return ok;
-}
-
 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 {
     word *ret = dup_word_list(words);
@@ -691,8 +668,8 @@ static word *info_transform_wordlist(word *words, keywordlist *keywords)
     return ret;
 }
 
-static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) {
-    char *c;
+static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs) {
+    int ret = 0;
 
     for (; words && words != end; words = words->next) switch (words->type) {
       case word_HyperLink:
@@ -718,56 +695,61 @@ static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) {
 	if (towordstyle(words->type) == word_Emph &&
 	    (attraux(words->aux) == attr_First ||
 	     attraux(words->aux) == attr_Only))
-	    rdaddc(rs, '_');	       /* FIXME: configurability */
+	    ret += info_rdadd(id, L'_');      /* FIXME: configurability */
 	else if (towordstyle(words->type) == word_Code &&
 		 (attraux(words->aux) == attr_First ||
 		  attraux(words->aux) == attr_Only))
-	    rdaddc(rs, '`');	       /* FIXME: configurability */
+	    ret += info_rdadd(id, L'`');      /* FIXME: configurability */
 	if (removeattr(words->type) == word_Normal) {
-	    if (info_convert(words->text, &c) || !words->alt)
-		rdaddsc(rs, c);
+	    if (cvt_ok(id->charset, words->text) || !words->alt)
+		ret += info_rdadds(id, words->text);
 	    else
-		info_rdaddwc(rs, words->alt, NULL, FALSE);
-	    sfree(c);
+		ret += info_rdaddwc(id, words->alt, NULL, FALSE);
 	} else if (removeattr(words->type) == word_WhiteSpace) {
-	    rdaddc(rs, ' ');
+	    ret += info_rdadd(id, L' ');
 	} else if (removeattr(words->type) == word_Quote) {
-	    rdaddc(rs, quoteaux(words->aux) == quote_Open ? '`' : '\'');
+	    ret += info_rdadd(id, quoteaux(words->aux) == quote_Open ? L'`' : L'\'');
 				       /* FIXME: configurability */
 	}
 	if (towordstyle(words->type) == word_Emph &&
 	    (attraux(words->aux) == attr_Last ||
 	     attraux(words->aux) == attr_Only))
-	    rdaddc(rs, '_');	       /* FIXME: configurability */
+	    ret += info_rdadd(id, L'_');     /* FIXME: configurability */
 	else if (towordstyle(words->type) == word_Code &&
 		 (attraux(words->aux) == attr_Last ||
 		  attraux(words->aux) == attr_Only))
-	    rdaddc(rs, '\'');	       /* FIXME: configurability */
+	    ret += info_rdadd(id, L'\'');     /* FIXME: configurability */
 	break;
 
       case word_UpperXref:
       case word_LowerXref:
 	if (xrefs && words->private_data) {
-	    rdaddsc(rs, "*Note ");
-	    rdaddsc(rs, ((node *)words->private_data)->name);
-	    rdaddsc(rs, "::");
+	    /*
+	     * This bit is structural and so must be done in char
+	     * rather than wchar_t.
+	     */
+	    ret += info_rdaddsc(id, "*Note ");
+	    ret += info_rdaddsc(id, ((node *)words->private_data)->name);
+	    ret += info_rdaddsc(id, "::");
 	}
 	break;
     }
+
+    return ret;
 }
 
-static int info_width_internal(word *words, int xrefs);
+static int info_width_internal(word *words, int xrefs, int charset);
 
-static int info_width_internal_list(word *words, int xrefs) {
+static int info_width_internal_list(word *words, int xrefs, int charset) {
     int w = 0;
     while (words) {
-	w += info_width_internal(words, xrefs);
+	w += info_width_internal(words, xrefs, charset);
 	words = words->next;
     }
     return w;
 }
 
-static int info_width_internal(word *words, int xrefs) {
+static int info_width_internal(word *words, int xrefs, int charset) {
     switch (words->type) {
       case word_HyperLink:
       case word_HyperEnd:
@@ -784,9 +766,9 @@ static int info_width_internal(word *words, int xrefs) {
 		 ? (attraux(words->aux) == attr_Only ? 2 :
 		    attraux(words->aux) == attr_Always ? 0 : 1)
 		 : 0) +
-		(info_convert(words->text, NULL) || !words->alt ?
+		(cvt_ok(charset, words->text) || !words->alt ?
 		 ustrlen(words->text) :
-		 info_width_internal_list(words->alt, xrefs)));
+		 info_width_internal_list(words->alt, xrefs, charset)));
 
       case word_WhiteSpace:
       case word_EmphSpace:
@@ -817,70 +799,54 @@ static int info_width_internal(word *words, int xrefs) {
 
 static int info_width_noxrefs(void *ctx, word *words)
 {
-    IGNORE(ctx);
-    return info_width_internal(words, FALSE);
+    return info_width_internal(words, FALSE, *(int *)ctx);
 }
 static int info_width_xrefs(void *ctx, word *words)
 {
-    IGNORE(ctx);
-    return info_width_internal(words, TRUE);
+    return info_width_internal(words, TRUE, *(int *)ctx);
 }
 
-static void info_heading(rdstringc *text, word *tprefix,
+static void info_heading(info_data *text, word *tprefix,
 			 word *words, int width) {
-    rdstringc t = { 0, 0, NULL };
-    int margin, length;
+    int length;
     int firstlinewidth, wrapwidth;
-    int i;
     wrappedline *wrapping, *p;
 
+    length = 0;
     if (tprefix) {
-	info_rdaddwc(&t, tprefix, NULL, FALSE);
-	rdaddsc(&t, ": ");	       /* FIXME: configurability */
+	length += info_rdaddwc(text, tprefix, NULL, FALSE);
+	length += info_rdadds(text, L": ");/* FIXME: configurability */
     }
-    margin = length = (t.text ? strlen(t.text) : 0);
 
-    margin = 0;
-    firstlinewidth = width - length;
     wrapwidth = width;
+    firstlinewidth = width - length;
 
     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
-			 info_width_noxrefs, NULL, 0);
+			 info_width_noxrefs, &text->charset, 0);
     for (p = wrapping; p; p = p->next) {
-	info_rdaddwc(&t, p->begin, p->end, FALSE);
-	length = (t.text ? strlen(t.text) : 0);
-	for (i = 0; i < margin; i++)
-	    rdaddc(text, ' ');
-	rdaddsc(text, t.text);
-	rdaddc(text, '\n');
-	for (i = 0; i < margin; i++)
-	    rdaddc(text, ' ');
+	length += info_rdaddwc(text, p->begin, p->end, FALSE);
+	info_rdadd(text, L'\n');
 	while (length--)
-	    rdaddc(text, '-');
-	rdaddc(text, '\n');
-	margin = 0;
-	sfree(t.text);
-	t = empty_rdstringc;
+	    info_rdadd(text, L'-');  /* FIXME: configurability */
+	info_rdadd(text, L'\n');
+	length = 0;
     }
     wrap_free(wrapping);
-    rdaddc(text, '\n');
-
-    sfree(t.text);
+    info_rdadd(text, L'\n');
 }
 
-static void info_rule(rdstringc *text, int indent, int width) {
-    while (indent--) rdaddc(text, ' ');
-    while (width--) rdaddc(text, '-');
-    rdaddc(text, '\n');
-    rdaddc(text, '\n');
+static void info_rule(info_data *text, int indent, int width) {
+    while (indent--) info_rdadd(text, L' ');
+    while (width--) info_rdadd(text, L'-');
+    info_rdadd(text, L'\n');
+    info_rdadd(text, L'\n');
 }
 
-static void info_para(rdstringc *text, word *prefix, char *prefixextra,
+static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
 		      word *input, keywordlist *keywords,
 		      int indent, int extraindent, int width) {
     wrappedline *wrapping, *p;
     word *words;
-    rdstringc pfx = { 0, 0, NULL };
     int e;
     int i;
     int firstlinewidth = width;
@@ -888,75 +854,69 @@ static void info_para(rdstringc *text, word *prefix, char *prefixextra,
     words = info_transform_wordlist(input, keywords);
 
     if (prefix) {
-	info_rdaddwc(&pfx, prefix, NULL, FALSE);
-	if (prefixextra)
-	    rdaddsc(&pfx, prefixextra);
 	for (i = 0; i < indent; i++)
-	    rdaddc(text, ' ');
-	rdaddsc(text, pfx.text);
+	    info_rdadd(text, L' ');
+	e = info_rdaddwc(text, prefix, NULL, FALSE);
+	if (prefixextra)
+	    e += info_rdadds(text, prefixextra);
 	/* If the prefix is too long, shorten the first line to fit. */
-	e = extraindent - strlen(pfx.text);
+	e = extraindent - e;
 	if (e < 0) {
 	    firstlinewidth += e;       /* this decreases it, since e < 0 */
 	    if (firstlinewidth < 0) {
 		e = indent + extraindent;
 		firstlinewidth = width;
-		rdaddc(text, '\n');
+		info_rdadd(text, L'\n');
 	    } else
 		e = 0;
 	}
-	sfree(pfx.text);
     } else
 	e = indent + extraindent;
 
     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
-			 NULL, 0);
+			 &text->charset, 0);
     for (p = wrapping; p; p = p->next) {
 	for (i = 0; i < e; i++)
-	    rdaddc(text, ' ');
+	    info_rdadd(text, L' ');
 	info_rdaddwc(text, p->begin, p->end, TRUE);
-	rdaddc(text, '\n');
+	info_rdadd(text, L'\n');
 	e = indent + extraindent;
     }
     wrap_free(wrapping);
-    rdaddc(text, '\n');
+    info_rdadd(text, L'\n');
 
     free_word_list(words);
 }
 
-static void info_codepara(rdstringc *text, word *words,
+static void info_codepara(info_data *text, word *words,
 			  int indent, int width) {
     int i;
 
     for (; words; words = words->next) if (words->type == word_WeakCode) {
-	char *c;
-	info_convert(words->text, &c);
-	if (strlen(c) > (size_t)width) {
+	for (i = 0; i < indent; i++)
+	    info_rdadd(text, L' ');
+	if (info_rdadds(text, words->text) > width) {
 	    /* FIXME: warn */
 	}
-	for (i = 0; i < indent; i++)
-	    rdaddc(text, ' ');
-	rdaddsc(text, c);
-	rdaddc(text, '\n');
-	sfree(c);
+	info_rdadd(text, L'\n');
     }
 
-    rdaddc(text, '\n');
+    info_rdadd(text, L'\n');
 }
 
-static void info_versionid(rdstringc *text, word *words) {
-    rdaddc(text, '[');		       /* FIXME: configurability */
+static void info_versionid(info_data *text, word *words) {
+    info_rdadd(text, L'[');		       /* FIXME: configurability */
     info_rdaddwc(text, words, NULL, FALSE);
-    rdaddsc(text, "]\n");
+    info_rdadds(text, L"]\n");
 }
 
-static node *info_node_new(char *name)
+static node *info_node_new(char *name, int charset)
 {
     node *n;
 
     n = mknew(node);
-    n->text.text = NULL;
-    n->text.pos = n->text.size = 0;
+    n->text = empty_info_data;
+    n->text.charset = charset;
     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
     n->name = dupstr(name);
     n->started_menu = FALSE;
@@ -964,17 +924,20 @@ static node *info_node_new(char *name)
     return n;
 }
 
-static char *info_node_name(paragraph *par)
+static char *info_node_name(paragraph *par, int charset)
 {
-    rdstringc rsc = { 0, 0, NULL };
+    info_data id = EMPTY_INFO_DATA;
     char *p, *q;
-    info_rdaddwc(&rsc, par->kwtext ? par->kwtext : par->words, NULL, FALSE);
+
+    id.charset = charset;
+    info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words, NULL, FALSE);
+    info_rdaddsc(&id, NULL);
 
     /*
      * We cannot have commas or colons in a node name. Remove any
      * that we find, with a warning.
      */
-    p = q = rsc.text;
+    p = q = id.output.text;
     while (*p) {
 	if (*p == ':' || *p == ',') {
 	    error(err_infonodechar, &par->fpos, *p);
@@ -985,10 +948,10 @@ static char *info_node_name(paragraph *par)
     }
     *p = '\0';
 
-    return rsc.text;
+    return id.output.text;
 }
 
-static void info_menu_item(rdstringc *text, node *n, paragraph *p)
+static void info_menu_item(info_data *text, node *n, paragraph *p)
 {
     /*
      * FIXME: Depending on how we're doing node names in this info
@@ -1000,14 +963,95 @@ static void info_menu_item(rdstringc *text, node *n, paragraph *p)
      * 
      *   * Chapter number: Node name.
      * 
-     * 
+     * This function mostly works in char rather than wchar_t,
+     * because a menu item is a structural component.
      */
-    rdaddsc(text, "* ");
-    rdaddsc(text, n->name);
-    rdaddsc(text, "::");
+    info_rdaddsc(text, "* ");
+    info_rdaddsc(text, n->name);
+    info_rdaddsc(text, "::");
     if (p) {
-	rdaddc(text, ' ');
+	info_rdaddc(text, ' ');
 	info_rdaddwc(text, p->words, NULL, FALSE);
     }
-    rdaddc(text, '\n');
+    info_rdaddc(text, '\n');
+}
+
+/*
+ * These functions implement my wrapper on the rdadd* calls which
+ * allows me to switch arbitrarily between literal octet-string
+ * text and charset-translated Unicode. (Because no matter what
+ * character set I write the actual text in, I expect info readers
+ * to treat node names and file names literally and to expect
+ * keywords like `*Note' in their canonical form, so I have to take
+ * steps to ensure that those structural elements of the file
+ * aren't messed with.)
+ */
+static int info_rdadds(info_data *d, wchar_t const *wcs)
+{
+    if (!d->wcmode) {
+	d->state = charset_init_state;
+	d->wcmode = TRUE;
+    }
+
+    if (wcs) {
+	char buf[256];
+	int len, origlen, ret;
+
+	origlen = len = ustrlen(wcs);
+	while (len > 0) {
+	    int prevlen = len;
+
+	    ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
+				       d->charset, &d->state, NULL);
+
+	    assert(len < prevlen);
+
+	    if (ret > 0) {
+		buf[ret] = '\0';
+		rdaddsc(&d->output, buf);
+	    }
+	}
+
+	return origlen;
+    } else
+	return 0;
+}
+
+static int info_rdaddsc(info_data *d, char const *cs)
+{
+    if (d->wcmode) {
+	char buf[256];
+	int ret;
+
+	ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
+				   d->charset, &d->state, NULL);
+	if (ret > 0) {
+	    buf[ret] = '\0';
+	    rdaddsc(&d->output, buf);
+	}
+
+	d->wcmode = FALSE;
+    }
+
+    if (cs) {
+	rdaddsc(&d->output, cs);
+	return strlen(cs);
+    } else
+	return 0;
+}
+
+static int info_rdadd(info_data *d, wchar_t wc)
+{
+    wchar_t wcs[2];
+    wcs[0] = wc;
+    wcs[1] = L'\0';
+    return info_rdadds(d, wcs);
+}
+
+static int info_rdaddc(info_data *d, char c)
+{
+    char cs[2];
+    cs[0] = c;
+    cs[1] = '\0';
+    return info_rdaddsc(d, cs);
 }
diff --git a/bk_text.c b/bk_text.c
index d551741..373de74 100644
--- a/bk_text.c
+++ b/bk_text.c
@@ -360,22 +360,6 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
     sfree(conf.filename);
 }
 
-static int text_ok(int charset, const wchar_t *s)
-{
-    char buf[256];
-    charset_state state = CHARSET_INIT_STATE;
-    int err, len = ustrlen(s);
-
-    err = 0;
-    while (len > 0) {
-	(void)charset_from_unicode(&s, &len, buf, lenof(buf),
-				   charset, &state, &err);
-	if (err)
-	    return FALSE;
-    }
-    return TRUE;
-}
-
 static void text_output(textfile *tf, const wchar_t *s)
 {
     char buf[256];
@@ -441,7 +425,7 @@ static void text_rdaddw(int charset, rdstring *rs, word *text, word *end) {
 		  attraux(text->aux) == attr_Only))
 	    rdadd(rs, L'`');	       /* FIXME: configurability */
 	if (removeattr(text->type) == word_Normal) {
-	    if (text_ok(charset, text->text) || !text->alt)
+	    if (cvt_ok(charset, text->text) || !text->alt)
 		rdadds(rs, text->text);
 	    else
 		text_rdaddw(charset, rs, text->alt, NULL);
@@ -495,7 +479,7 @@ static int text_width(void *ctx, word *text) {
 		 ? (attraux(text->aux) == attr_Only ? 2 :
 		    attraux(text->aux) == attr_Always ? 0 : 1)
 		 : 0) +
-		(text_ok(charset, text->text) || !text->alt ?
+		(cvt_ok(charset, text->text) || !text->alt ?
 		 ustrlen(text->text) :
 		 text_width_list(ctx, text->alt)));
 
diff --git a/halibut.h b/halibut.h
index 94493d4..33780ea 100644
--- a/halibut.h
+++ b/halibut.h
@@ -294,6 +294,7 @@ int utob(wchar_t *);
 int uisdigit(wchar_t);
 wchar_t *ustrlow(wchar_t *s);
 wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec);
+int cvt_ok(int charset, const wchar_t *s);
 
 /*
  * help.c
diff --git a/ustring.c b/ustring.c
index 169a377..11a022c 100644
--- a/ustring.c
+++ b/ustring.c
@@ -314,3 +314,23 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
     sfree(text);
     return wblk;
 }
+
+/*
+ * Determine whether a Unicode string can be translated into a
+ * given charset without any missing characters.
+ */
+int cvt_ok(int charset, const wchar_t *s)
+{
+    char buf[256];
+    charset_state state = CHARSET_INIT_STATE;
+    int err, len = ustrlen(s);
+
+    err = 0;
+    while (len > 0) {
+	(void)charset_from_unicode(&s, &len, buf, lenof(buf),
+				   charset, &state, &err);
+	if (err)
+	    return FALSE;
+    }
+    return TRUE;
+}
-- 
2.11.0