From 91f93b94744447a088ce435e50500a9598cb5466 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 21 Apr 2004 18:07:26 +0000 Subject: [PATCH] Charset support for the info backend (\cfg{info-charset}). (This checkin touches other files because a function in bk_text.c turned out to be of more general use so I moved it out into ustring.c.) git-svn-id: svn://svn.tartarus.org/sgt/halibut@4111 cda61777-01e9-0310-a592-d414129be87e --- bk_info.c | 502 ++++++++++++++++++++++++++++++++++---------------------------- bk_text.c | 20 +-- halibut.h | 1 + ustring.c | 20 +++ 4 files changed, 296 insertions(+), 247 deletions(-) diff --git a/bk_info.c b/bk_info.c index fc1b292..f7efbf5 100644 --- a/bk_info.c +++ b/bk_info.c @@ -25,6 +25,17 @@ * - might be helpful to diagnose duplicate node names! */ +/* + * FIXME: + * + * - alignment in the index is broken when a non-representable + * character appears with no alternative. More generally, I + * fear, this is the fault of the info_rdadd* functions failing + * to return correct width figures in this circumstance (so it + * will affect list paragraph prefixes and paragraph wrapping as + * well). + */ + #include #include #include @@ -33,39 +44,53 @@ typedef struct { char *filename; int maxfilesize; + int charset; } infoconfig; +typedef struct { + rdstringc output; + int charset; + charset_state state; + int wcmode; +} info_data; +#define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE } +static const info_data empty_info_data = EMPTY_INFO_DATA; + typedef struct node_tag node; struct node_tag { node *listnext; node *up, *prev, *next, *lastchild; int pos, started_menu, filenum; char *name; - rdstringc text; + info_data text; }; typedef struct { char *text; + int length; int nnodes, nodesize; node **nodes; } info_idx; -static int info_convert(wchar_t *, char **); +static int info_rdadd(info_data *, wchar_t); +static int info_rdadds(info_data *, wchar_t const *); +static int info_rdaddc(info_data *, char); +static int info_rdaddsc(info_data *, char const *); -static void info_heading(rdstringc *, word *, word *, int); -static void info_rule(rdstringc *, int, int); -static void info_para(rdstringc *, word *, char *, word *, keywordlist *, +static void info_heading(info_data *, word *, word *, int); +static void info_rule(info_data *, int, int); +static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *, int, int, int); -static void info_codepara(rdstringc *, word *, int, int); -static void info_versionid(rdstringc *, word *); -static void info_menu_item(rdstringc *, node *, paragraph *); +static void info_codepara(info_data *, word *, int, int); +static void info_versionid(info_data *, word *); +static void info_menu_item(info_data *, node *, paragraph *); static word *info_transform_wordlist(word *, keywordlist *); static int info_check_index(word *, node *, indexdata *); -static void info_rdaddwc(rdstringc *, word *, word *, int); +static int info_rdaddwc(info_data *, word *, word *, int); -static node *info_node_new(char *name); -static char *info_node_name(paragraph *p); +static node *info_node_new(char *name, int charset); +static char *info_node_name(paragraph *p, int charset); static infoconfig info_configure(paragraph *source) { infoconfig ret; @@ -75,12 +100,17 @@ static infoconfig info_configure(paragraph *source) { */ ret.filename = dupstr("output.info"); ret.maxfilesize = 64 << 10; + ret.charset = CS_ASCII; for (; source; source = source->next) { if (source->type == para_Config) { if (!ustricmp(source->keyword, L"info-filename")) { sfree(ret.filename); ret.filename = dupstr(adv(source->origkeyword)); + } else if (!ustricmp(source->keyword, L"info-charset")) { + char *csname = utoa_dup(uadv(source->keyword), CS_ASCII); + ret.charset = charset_from_localenc(csname); + sfree(csname); } else if (!ustricmp(source->keyword, L"info-max-file-size")) { ret.maxfilesize = utoi(uadv(source->keyword)); } @@ -101,18 +131,18 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, infoconfig conf; word *prefix, *body, *wp; word spaceword; - char *prefixextra; + wchar_t *prefixextra; int nesting, nestindent; int indentb, indenta; int filepos; int has_index; - rdstringc intro_text = { 0, 0, NULL }; + info_data intro_text = EMPTY_INFO_DATA; node *topnode, *currnode; word bullet; FILE *fp; /* - * FIXME + * FIXME: possibly configurability? */ int width = 70, listindentbefore = 1, listindentafter = 3; int indent_code = 2, index_width = 40; @@ -124,7 +154,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, /* * Go through and create a node for each section. */ - topnode = info_node_new("Top"); + topnode = info_node_new("Top", conf.charset); currnode = topnode; for (p = sourceform; p; p = p->next) switch (p->type) { /* @@ -139,8 +169,8 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, node *newnode, *upnode; char *nodename; - nodename = info_node_name(p); - newnode = info_node_new(nodename); + nodename = info_node_name(p, conf.charset); + newnode = info_node_new(nodename, conf.charset); sfree(nodename); p->private_data = newnode; @@ -170,14 +200,16 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { info_idx *ii = mknew(info_idx); - rdstringc rs = { 0, 0, NULL }; + info_data id = EMPTY_INFO_DATA; + + id.charset = conf.charset; ii->nnodes = ii->nodesize = 0; ii->nodes = NULL; - info_rdaddwc(&rs, entry->text, NULL, FALSE); + ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE); - ii->text = rs.text; + ii->text = id.output.text; entry->backend_data = ii; } @@ -189,11 +221,12 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, * good place to put the copyright notice and the version IDs. * Also, Info directory entries are expected to go here. */ + intro_text.charset = conf.charset; - rdaddsc(&intro_text, + info_rdaddsc(&intro_text, "This Info file generated by Halibut, "); - rdaddsc(&intro_text, version); - rdaddsc(&intro_text, "\n\n"); + info_rdaddsc(&intro_text, version); + info_rdaddsc(&intro_text, "\n\n"); for (p = sourceform; p; p = p->next) if (p->type == para_Config && @@ -211,33 +244,27 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, continue; } - rdaddsc(&intro_text, "INFO-DIR-SECTION "); - s = utoa_dup(section, CS_FIXME); - rdaddsc(&intro_text, s); - sfree(s); - rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* "); - s = utoa_dup(shortname, CS_FIXME); - rdaddsc(&intro_text, s); - sfree(s); - rdaddsc(&intro_text, ": ("); + info_rdaddsc(&intro_text, "INFO-DIR-SECTION "); + info_rdadds(&intro_text, section); + info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* "); + info_rdadds(&intro_text, shortname); + info_rdaddsc(&intro_text, ": ("); s = dupstr(conf.filename); if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info")) s[strlen(s)-5] = '\0'; - rdaddsc(&intro_text, s); + info_rdaddsc(&intro_text, s); sfree(s); - rdaddsc(&intro_text, ")"); + info_rdaddsc(&intro_text, ")"); if (*kw) { keyword *kwl = kw_lookup(keywords, kw); if (kwl && kwl->para->private_data) { node *n = (node *)kwl->para->private_data; - rdaddsc(&intro_text, n->name); + info_rdaddsc(&intro_text, n->name); } } - rdaddsc(&intro_text, ". "); - s = utoa_dup(longname, CS_FIXME); - rdaddsc(&intro_text, s); - sfree(s); - rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n"); + info_rdaddsc(&intro_text, ". "); + info_rdadds(&intro_text, longname); + info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n"); } for (p = sourceform; p; p = p->next) @@ -249,8 +276,8 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, if (p->type == para_VersionID) info_versionid(&intro_text, p->words); - if (intro_text.text[intro_text.pos-1] != '\n') - rdaddc(&intro_text, '\n'); + if (intro_text.output.text[intro_text.output.pos-1] != '\n') + info_rdaddc(&intro_text, '\n'); /* Do the title */ for (p = sourceform; p; p = p->next) @@ -306,7 +333,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, assert(currnode->up); if (!currnode->up->started_menu) { - rdaddsc(&currnode->up->text, "* Menu:\n\n"); + info_rdaddsc(&currnode->up->text, "* Menu:\n\n"); currnode->up->started_menu = TRUE; } info_menu_item(&currnode->up->text, currnode, p); @@ -339,7 +366,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, indenta = listindentafter; } else if (p->type == para_NumberedList) { prefix = p->kwtext; - prefixextra = "."; /* FIXME: configurability */ + prefixextra = L"."; /* FIXME: configurability */ indentb = listindentbefore; indenta = listindentafter; } else if (p->type == para_Description) { @@ -388,14 +415,14 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, int i, j, k; indexentry *entry; - newnode = info_node_new("Index"); + newnode = info_node_new("Index", conf.charset); newnode->up = topnode; currnode->next = newnode; newnode->prev = currnode; currnode->listnext = newnode; - rdaddsc(&newnode->text, "Index\n-----\n\n"); + info_rdaddsc(&newnode->text, "Index\n-----\n\n"); info_menu_item(&topnode->text, newnode, NULL); @@ -403,7 +430,6 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, info_idx *ii = (info_idx *)entry->backend_data; for (j = 0; j < ii->nnodes; j++) { - int pos0 = newnode->text.pos; /* * When we have multiple references for a single * index term, we only display the actual term on @@ -411,12 +437,12 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, * really are the same. */ if (j == 0) - rdaddsc(&newnode->text, ii->text); - for (k = newnode->text.pos - pos0; k < index_width; k++) - rdaddc(&newnode->text, ' '); - rdaddsc(&newnode->text, " *Note "); - rdaddsc(&newnode->text, ii->nodes[j]->name); - rdaddsc(&newnode->text, "::\n"); + info_rdaddsc(&newnode->text, ii->text); + for (k = (j ? 0 : ii->length); k < index_width; k++) + info_rdaddc(&newnode->text, ' '); + info_rdaddsc(&newnode->text, " *Note "); + info_rdaddsc(&newnode->text, ii->nodes[j]->name); + info_rdaddsc(&newnode->text, "::\n"); } } } @@ -426,31 +452,31 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, * and the node line at the top. */ for (currnode = topnode; currnode; currnode = currnode->listnext) { - char *origtext = currnode->text.text; - currnode->text.text = NULL; - currnode->text.pos = currnode->text.size = 0; - rdaddsc(&currnode->text, "\037\nFile: "); - rdaddsc(&currnode->text, conf.filename); - rdaddsc(&currnode->text, ", Node: "); - rdaddsc(&currnode->text, currnode->name); + char *origtext = currnode->text.output.text; + currnode->text = empty_info_data; + currnode->text.charset = conf.charset; + info_rdaddsc(&currnode->text, "\037\nFile: "); + info_rdaddsc(&currnode->text, conf.filename); + info_rdaddsc(&currnode->text, ", Node: "); + info_rdaddsc(&currnode->text, currnode->name); if (currnode->prev) { - rdaddsc(&currnode->text, ", Prev: "); - rdaddsc(&currnode->text, currnode->prev->name); + info_rdaddsc(&currnode->text, ", Prev: "); + info_rdaddsc(&currnode->text, currnode->prev->name); } - rdaddsc(&currnode->text, ", Up: "); - rdaddsc(&currnode->text, (currnode->up ? - currnode->up->name : "(dir)")); + info_rdaddsc(&currnode->text, ", Up: "); + info_rdaddsc(&currnode->text, (currnode->up ? + currnode->up->name : "(dir)")); if (currnode->next) { - rdaddsc(&currnode->text, ", Next: "); - rdaddsc(&currnode->text, currnode->next->name); + info_rdaddsc(&currnode->text, ", Next: "); + info_rdaddsc(&currnode->text, currnode->next->name); } - rdaddsc(&currnode->text, "\n\n"); - rdaddsc(&currnode->text, origtext); + info_rdaddsc(&currnode->text, "\n\n"); + info_rdaddsc(&currnode->text, origtext); /* * Just make _absolutely_ sure we end with a newline. */ - if (currnode->text.text[currnode->text.pos-1] != '\n') - rdaddc(&currnode->text, '\n'); + if (currnode->text.output.text[currnode->text.output.pos-1] != '\n') + info_rdaddc(&currnode->text, '\n'); sfree(origtext); } @@ -458,25 +484,25 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, /* * Compute the offsets for the tag table. */ - filepos = intro_text.pos; + filepos = intro_text.output.pos; for (currnode = topnode; currnode; currnode = currnode->listnext) { currnode->pos = filepos; - filepos += currnode->text.pos; + filepos += currnode->text.output.pos; } /* * Split into sub-files. */ if (conf.maxfilesize > 0) { - int currfilesize = intro_text.pos, currfilenum = 1; + int currfilesize = intro_text.output.pos, currfilenum = 1; for (currnode = topnode; currnode; currnode = currnode->listnext) { - if (currfilesize > intro_text.pos && - currfilesize + currnode->text.pos > conf.maxfilesize) { + if (currfilesize > intro_text.output.pos && + currfilesize + currnode->text.output.pos > conf.maxfilesize) { currfilenum++; - currfilesize = intro_text.pos; + currfilesize = intro_text.output.pos; } currnode->filenum = currfilenum; - currfilesize += currnode->text.pos; + currfilesize += currnode->text.output.pos; } } @@ -488,10 +514,10 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, error(err_cantopenw, conf.filename); return; } - fputs(intro_text.text, fp); + fputs(intro_text.output.text, fp); if (conf.maxfilesize == 0) { for (currnode = topnode; currnode; currnode = currnode->listnext) - fputs(currnode->text.text, fp); + fputs(currnode->text.output.text, fp); } else { int filenum = 0; fprintf(fp, "\037\nIndirect:\n"); @@ -533,9 +559,9 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, return; } sfree(fname); - fputs(intro_text.text, fp); + fputs(intro_text.output.text, fp); } - fputs(currnode->text.text, fp); + fputs(currnode->text.output.text, fp); } if (fp) @@ -585,55 +611,6 @@ static int info_check_index(word *w, node *n, indexdata *idx) return ret; } -/* - * Convert a wide string into a string of chars. If `result' is - * non-NULL, mallocs the resulting string and stores a pointer to - * it in `*result'. If `result' is NULL, merely checks whether all - * characters in the string are feasible for the output character - * set. - * - * Return is nonzero if all characters are OK. If not all - * characters are OK but `result' is non-NULL, a result _will_ - * still be generated! - */ -static int info_convert(wchar_t *s, char **result) { - /* - * FIXME. Currently this is ISO8859-1 only. - */ - int doing = (result != 0); - int ok = TRUE; - char *p = NULL; - int plen = 0, psize = 0; - - for (; *s; s++) { - wchar_t c = *s; - char outc; - - if ((c >= 32 && c <= 126) || - (c >= 160 && c <= 255)) { - /* Char is OK. */ - outc = (char)c; - } else { - /* Char is not OK. */ - ok = FALSE; - outc = 0xBF; /* approximate the good old DEC `uh?' */ - } - if (doing) { - if (plen >= psize) { - psize = plen + 256; - p = resize(p, psize); - } - p[plen++] = outc; - } - } - if (doing) { - p = resize(p, plen+1); - p[plen] = '\0'; - *result = p; - } - return ok; -} - static word *info_transform_wordlist(word *words, keywordlist *keywords) { word *ret = dup_word_list(words); @@ -691,8 +668,8 @@ static word *info_transform_wordlist(word *words, keywordlist *keywords) return ret; } -static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) { - char *c; +static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs) { + int ret = 0; for (; words && words != end; words = words->next) switch (words->type) { case word_HyperLink: @@ -718,56 +695,61 @@ static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) { if (towordstyle(words->type) == word_Emph && (attraux(words->aux) == attr_First || attraux(words->aux) == attr_Only)) - rdaddc(rs, '_'); /* FIXME: configurability */ + ret += info_rdadd(id, L'_'); /* FIXME: configurability */ else if (towordstyle(words->type) == word_Code && (attraux(words->aux) == attr_First || attraux(words->aux) == attr_Only)) - rdaddc(rs, '`'); /* FIXME: configurability */ + ret += info_rdadd(id, L'`'); /* FIXME: configurability */ if (removeattr(words->type) == word_Normal) { - if (info_convert(words->text, &c) || !words->alt) - rdaddsc(rs, c); + if (cvt_ok(id->charset, words->text) || !words->alt) + ret += info_rdadds(id, words->text); else - info_rdaddwc(rs, words->alt, NULL, FALSE); - sfree(c); + ret += info_rdaddwc(id, words->alt, NULL, FALSE); } else if (removeattr(words->type) == word_WhiteSpace) { - rdaddc(rs, ' '); + ret += info_rdadd(id, L' '); } else if (removeattr(words->type) == word_Quote) { - rdaddc(rs, quoteaux(words->aux) == quote_Open ? '`' : '\''); + ret += info_rdadd(id, quoteaux(words->aux) == quote_Open ? L'`' : L'\''); /* FIXME: configurability */ } if (towordstyle(words->type) == word_Emph && (attraux(words->aux) == attr_Last || attraux(words->aux) == attr_Only)) - rdaddc(rs, '_'); /* FIXME: configurability */ + ret += info_rdadd(id, L'_'); /* FIXME: configurability */ else if (towordstyle(words->type) == word_Code && (attraux(words->aux) == attr_Last || attraux(words->aux) == attr_Only)) - rdaddc(rs, '\''); /* FIXME: configurability */ + ret += info_rdadd(id, L'\''); /* FIXME: configurability */ break; case word_UpperXref: case word_LowerXref: if (xrefs && words->private_data) { - rdaddsc(rs, "*Note "); - rdaddsc(rs, ((node *)words->private_data)->name); - rdaddsc(rs, "::"); + /* + * This bit is structural and so must be done in char + * rather than wchar_t. + */ + ret += info_rdaddsc(id, "*Note "); + ret += info_rdaddsc(id, ((node *)words->private_data)->name); + ret += info_rdaddsc(id, "::"); } break; } + + return ret; } -static int info_width_internal(word *words, int xrefs); +static int info_width_internal(word *words, int xrefs, int charset); -static int info_width_internal_list(word *words, int xrefs) { +static int info_width_internal_list(word *words, int xrefs, int charset) { int w = 0; while (words) { - w += info_width_internal(words, xrefs); + w += info_width_internal(words, xrefs, charset); words = words->next; } return w; } -static int info_width_internal(word *words, int xrefs) { +static int info_width_internal(word *words, int xrefs, int charset) { switch (words->type) { case word_HyperLink: case word_HyperEnd: @@ -784,9 +766,9 @@ static int info_width_internal(word *words, int xrefs) { ? (attraux(words->aux) == attr_Only ? 2 : attraux(words->aux) == attr_Always ? 0 : 1) : 0) + - (info_convert(words->text, NULL) || !words->alt ? + (cvt_ok(charset, words->text) || !words->alt ? ustrlen(words->text) : - info_width_internal_list(words->alt, xrefs))); + info_width_internal_list(words->alt, xrefs, charset))); case word_WhiteSpace: case word_EmphSpace: @@ -817,70 +799,54 @@ static int info_width_internal(word *words, int xrefs) { static int info_width_noxrefs(void *ctx, word *words) { - IGNORE(ctx); - return info_width_internal(words, FALSE); + return info_width_internal(words, FALSE, *(int *)ctx); } static int info_width_xrefs(void *ctx, word *words) { - IGNORE(ctx); - return info_width_internal(words, TRUE); + return info_width_internal(words, TRUE, *(int *)ctx); } -static void info_heading(rdstringc *text, word *tprefix, +static void info_heading(info_data *text, word *tprefix, word *words, int width) { - rdstringc t = { 0, 0, NULL }; - int margin, length; + int length; int firstlinewidth, wrapwidth; - int i; wrappedline *wrapping, *p; + length = 0; if (tprefix) { - info_rdaddwc(&t, tprefix, NULL, FALSE); - rdaddsc(&t, ": "); /* FIXME: configurability */ + length += info_rdaddwc(text, tprefix, NULL, FALSE); + length += info_rdadds(text, L": ");/* FIXME: configurability */ } - margin = length = (t.text ? strlen(t.text) : 0); - margin = 0; - firstlinewidth = width - length; wrapwidth = width; + firstlinewidth = width - length; wrapping = wrap_para(words, firstlinewidth, wrapwidth, - info_width_noxrefs, NULL, 0); + info_width_noxrefs, &text->charset, 0); for (p = wrapping; p; p = p->next) { - info_rdaddwc(&t, p->begin, p->end, FALSE); - length = (t.text ? strlen(t.text) : 0); - for (i = 0; i < margin; i++) - rdaddc(text, ' '); - rdaddsc(text, t.text); - rdaddc(text, '\n'); - for (i = 0; i < margin; i++) - rdaddc(text, ' '); + length += info_rdaddwc(text, p->begin, p->end, FALSE); + info_rdadd(text, L'\n'); while (length--) - rdaddc(text, '-'); - rdaddc(text, '\n'); - margin = 0; - sfree(t.text); - t = empty_rdstringc; + info_rdadd(text, L'-'); /* FIXME: configurability */ + info_rdadd(text, L'\n'); + length = 0; } wrap_free(wrapping); - rdaddc(text, '\n'); - - sfree(t.text); + info_rdadd(text, L'\n'); } -static void info_rule(rdstringc *text, int indent, int width) { - while (indent--) rdaddc(text, ' '); - while (width--) rdaddc(text, '-'); - rdaddc(text, '\n'); - rdaddc(text, '\n'); +static void info_rule(info_data *text, int indent, int width) { + while (indent--) info_rdadd(text, L' '); + while (width--) info_rdadd(text, L'-'); + info_rdadd(text, L'\n'); + info_rdadd(text, L'\n'); } -static void info_para(rdstringc *text, word *prefix, char *prefixextra, +static void info_para(info_data *text, word *prefix, wchar_t *prefixextra, word *input, keywordlist *keywords, int indent, int extraindent, int width) { wrappedline *wrapping, *p; word *words; - rdstringc pfx = { 0, 0, NULL }; int e; int i; int firstlinewidth = width; @@ -888,75 +854,69 @@ static void info_para(rdstringc *text, word *prefix, char *prefixextra, words = info_transform_wordlist(input, keywords); if (prefix) { - info_rdaddwc(&pfx, prefix, NULL, FALSE); - if (prefixextra) - rdaddsc(&pfx, prefixextra); for (i = 0; i < indent; i++) - rdaddc(text, ' '); - rdaddsc(text, pfx.text); + info_rdadd(text, L' '); + e = info_rdaddwc(text, prefix, NULL, FALSE); + if (prefixextra) + e += info_rdadds(text, prefixextra); /* If the prefix is too long, shorten the first line to fit. */ - e = extraindent - strlen(pfx.text); + e = extraindent - e; if (e < 0) { firstlinewidth += e; /* this decreases it, since e < 0 */ if (firstlinewidth < 0) { e = indent + extraindent; firstlinewidth = width; - rdaddc(text, '\n'); + info_rdadd(text, L'\n'); } else e = 0; } - sfree(pfx.text); } else e = indent + extraindent; wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs, - NULL, 0); + &text->charset, 0); for (p = wrapping; p; p = p->next) { for (i = 0; i < e; i++) - rdaddc(text, ' '); + info_rdadd(text, L' '); info_rdaddwc(text, p->begin, p->end, TRUE); - rdaddc(text, '\n'); + info_rdadd(text, L'\n'); e = indent + extraindent; } wrap_free(wrapping); - rdaddc(text, '\n'); + info_rdadd(text, L'\n'); free_word_list(words); } -static void info_codepara(rdstringc *text, word *words, +static void info_codepara(info_data *text, word *words, int indent, int width) { int i; for (; words; words = words->next) if (words->type == word_WeakCode) { - char *c; - info_convert(words->text, &c); - if (strlen(c) > (size_t)width) { + for (i = 0; i < indent; i++) + info_rdadd(text, L' '); + if (info_rdadds(text, words->text) > width) { /* FIXME: warn */ } - for (i = 0; i < indent; i++) - rdaddc(text, ' '); - rdaddsc(text, c); - rdaddc(text, '\n'); - sfree(c); + info_rdadd(text, L'\n'); } - rdaddc(text, '\n'); + info_rdadd(text, L'\n'); } -static void info_versionid(rdstringc *text, word *words) { - rdaddc(text, '['); /* FIXME: configurability */ +static void info_versionid(info_data *text, word *words) { + info_rdadd(text, L'['); /* FIXME: configurability */ info_rdaddwc(text, words, NULL, FALSE); - rdaddsc(text, "]\n"); + info_rdadds(text, L"]\n"); } -static node *info_node_new(char *name) +static node *info_node_new(char *name, int charset) { node *n; n = mknew(node); - n->text.text = NULL; - n->text.pos = n->text.size = 0; + n->text = empty_info_data; + n->text.charset = charset; n->up = n->next = n->prev = n->lastchild = n->listnext = NULL; n->name = dupstr(name); n->started_menu = FALSE; @@ -964,17 +924,20 @@ static node *info_node_new(char *name) return n; } -static char *info_node_name(paragraph *par) +static char *info_node_name(paragraph *par, int charset) { - rdstringc rsc = { 0, 0, NULL }; + info_data id = EMPTY_INFO_DATA; char *p, *q; - info_rdaddwc(&rsc, par->kwtext ? par->kwtext : par->words, NULL, FALSE); + + id.charset = charset; + info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words, NULL, FALSE); + info_rdaddsc(&id, NULL); /* * We cannot have commas or colons in a node name. Remove any * that we find, with a warning. */ - p = q = rsc.text; + p = q = id.output.text; while (*p) { if (*p == ':' || *p == ',') { error(err_infonodechar, &par->fpos, *p); @@ -985,10 +948,10 @@ static char *info_node_name(paragraph *par) } *p = '\0'; - return rsc.text; + return id.output.text; } -static void info_menu_item(rdstringc *text, node *n, paragraph *p) +static void info_menu_item(info_data *text, node *n, paragraph *p) { /* * FIXME: Depending on how we're doing node names in this info @@ -1000,14 +963,95 @@ static void info_menu_item(rdstringc *text, node *n, paragraph *p) * * * Chapter number: Node name. * - * + * This function mostly works in char rather than wchar_t, + * because a menu item is a structural component. */ - rdaddsc(text, "* "); - rdaddsc(text, n->name); - rdaddsc(text, "::"); + info_rdaddsc(text, "* "); + info_rdaddsc(text, n->name); + info_rdaddsc(text, "::"); if (p) { - rdaddc(text, ' '); + info_rdaddc(text, ' '); info_rdaddwc(text, p->words, NULL, FALSE); } - rdaddc(text, '\n'); + info_rdaddc(text, '\n'); +} + +/* + * These functions implement my wrapper on the rdadd* calls which + * allows me to switch arbitrarily between literal octet-string + * text and charset-translated Unicode. (Because no matter what + * character set I write the actual text in, I expect info readers + * to treat node names and file names literally and to expect + * keywords like `*Note' in their canonical form, so I have to take + * steps to ensure that those structural elements of the file + * aren't messed with.) + */ +static int info_rdadds(info_data *d, wchar_t const *wcs) +{ + if (!d->wcmode) { + d->state = charset_init_state; + d->wcmode = TRUE; + } + + if (wcs) { + char buf[256]; + int len, origlen, ret; + + origlen = len = ustrlen(wcs); + while (len > 0) { + int prevlen = len; + + ret = charset_from_unicode(&wcs, &len, buf, lenof(buf), + d->charset, &d->state, NULL); + + assert(len < prevlen); + + if (ret > 0) { + buf[ret] = '\0'; + rdaddsc(&d->output, buf); + } + } + + return origlen; + } else + return 0; +} + +static int info_rdaddsc(info_data *d, char const *cs) +{ + if (d->wcmode) { + char buf[256]; + int ret; + + ret = charset_from_unicode(NULL, 0, buf, lenof(buf), + d->charset, &d->state, NULL); + if (ret > 0) { + buf[ret] = '\0'; + rdaddsc(&d->output, buf); + } + + d->wcmode = FALSE; + } + + if (cs) { + rdaddsc(&d->output, cs); + return strlen(cs); + } else + return 0; +} + +static int info_rdadd(info_data *d, wchar_t wc) +{ + wchar_t wcs[2]; + wcs[0] = wc; + wcs[1] = L'\0'; + return info_rdadds(d, wcs); +} + +static int info_rdaddc(info_data *d, char c) +{ + char cs[2]; + cs[0] = c; + cs[1] = '\0'; + return info_rdaddsc(d, cs); } diff --git a/bk_text.c b/bk_text.c index d551741..373de74 100644 --- a/bk_text.c +++ b/bk_text.c @@ -360,22 +360,6 @@ void text_backend(paragraph *sourceform, keywordlist *keywords, sfree(conf.filename); } -static int text_ok(int charset, const wchar_t *s) -{ - char buf[256]; - charset_state state = CHARSET_INIT_STATE; - int err, len = ustrlen(s); - - err = 0; - while (len > 0) { - (void)charset_from_unicode(&s, &len, buf, lenof(buf), - charset, &state, &err); - if (err) - return FALSE; - } - return TRUE; -} - static void text_output(textfile *tf, const wchar_t *s) { char buf[256]; @@ -441,7 +425,7 @@ static void text_rdaddw(int charset, rdstring *rs, word *text, word *end) { attraux(text->aux) == attr_Only)) rdadd(rs, L'`'); /* FIXME: configurability */ if (removeattr(text->type) == word_Normal) { - if (text_ok(charset, text->text) || !text->alt) + if (cvt_ok(charset, text->text) || !text->alt) rdadds(rs, text->text); else text_rdaddw(charset, rs, text->alt, NULL); @@ -495,7 +479,7 @@ static int text_width(void *ctx, word *text) { ? (attraux(text->aux) == attr_Only ? 2 : attraux(text->aux) == attr_Always ? 0 : 1) : 0) + - (text_ok(charset, text->text) || !text->alt ? + (cvt_ok(charset, text->text) || !text->alt ? ustrlen(text->text) : text_width_list(ctx, text->alt))); diff --git a/halibut.h b/halibut.h index 94493d4..33780ea 100644 --- a/halibut.h +++ b/halibut.h @@ -294,6 +294,7 @@ int utob(wchar_t *); int uisdigit(wchar_t); wchar_t *ustrlow(wchar_t *s); wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec); +int cvt_ok(int charset, const wchar_t *s); /* * help.c diff --git a/ustring.c b/ustring.c index 169a377..11a022c 100644 --- a/ustring.c +++ b/ustring.c @@ -314,3 +314,23 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { sfree(text); return wblk; } + +/* + * Determine whether a Unicode string can be translated into a + * given charset without any missing characters. + */ +int cvt_ok(int charset, const wchar_t *s) +{ + char buf[256]; + charset_state state = CHARSET_INIT_STATE; + int err, len = ustrlen(s); + + err = 0; + while (len > 0) { + (void)charset_from_unicode(&s, &len, buf, lenof(buf), + charset, &state, &err); + if (err) + return FALSE; + } + return TRUE; +} -- 2.11.0