X-Git-Url: https://git.distorted.org.uk/~mdw/sgt/halibut/blobdiff_plain/b921687e6593e2a473b12e5851a5ff145a6ac3d8..8f664e7e91c918cd13248f6b684580c4dd2cdb31:/bk_info.c diff --git a/bk_info.c b/bk_info.c index 23f8628..d898e61 100644 --- a/bk_info.c +++ b/bk_info.c @@ -1,10 +1,15 @@ /* - * info backend for Halibut + * Info backend for Halibut + * + * The Info file format isn't well-specified, and what specification + * there is is scattered all over the place. Sources include: + * (info), from GNU Texinfo. + * (texinfo), also from GNU Texinfo. + * (Emacs)Misc Help, and (emacs)Info Lookup, from GNU Emacs. + * info.el, from GNU Emacs. * * Possible future work: * - * - configurable indentation, bullets, emphasis, quotes etc? - * * - configurable choice of how to allocate node names? * + possibly a template-like approach, choosing node names to * be the full section title or perhaps the internal keyword? @@ -23,6 +28,26 @@ * * Section 1.2: Nodename. Title of section. * * - might be helpful to diagnose duplicate node names! + * + * - Indices generated by makeinfo use a menu rather than a bunch of + * cross-references, which reduces visual clutter rather. For + * singly-referenced items, it looks like: + * * toner cartridge, replacing: Toner. + * It does a horrid job on multiply-referenced entries, though, + * perhaps because the name before the colon is meant to be unique. + * Info's 'i' command requires the use of a menu -- it fails to + * find any index entries at all with Halibut's current index format. + * + * - The string "*note" is matched case-insensitively, so we could + * make things slightly less ugly by using the lower-case version + * when the user asks for \k. Unfortunately, standalone Info seems + * to match node names case-sensitively, so we can't downcase that. + * + * - The character encoding used in an Info file can be configured using + * an Emacs local variables block at the end, like this: + * Local Variables: + * coding: iso-8859-1 + * End: */ #include @@ -31,124 +56,264 @@ #include "halibut.h" typedef struct { + wchar_t *underline; +} alignstruct; + +typedef struct { char *filename; int maxfilesize; + int charset; + int listindentbefore, listindentafter; + int indent_code, width, index_width; + alignstruct atitle, achapter, *asect; + int nasect; + wchar_t *bullet, *listsuffix; + wchar_t *startemph, *endemph; + wchar_t *lquote, *rquote; + wchar_t *sectsuffix; + wchar_t *rule; + wchar_t *index_text; } infoconfig; +typedef struct { + rdstringc output; + int charset; + charset_state state; + int wcmode; +} info_data; +#define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE } +static const info_data empty_info_data = EMPTY_INFO_DATA; + typedef struct node_tag node; struct node_tag { node *listnext; node *up, *prev, *next, *lastchild; int pos, started_menu, filenum; char *name; - rdstringc text; + info_data text; }; typedef struct { char *text; + int length; int nnodes, nodesize; node **nodes; } info_idx; -static int info_convert(wchar_t *, char **); - -static void info_heading(rdstringc *, word *, word *, int); -static void info_rule(rdstringc *, int, int); -static void info_para(rdstringc *, word *, char *, word *, keywordlist *, - int, int, int); -static void info_codepara(rdstringc *, word *, int, int); -static void info_versionid(rdstringc *, word *); -static void info_menu_item(rdstringc *, node *, paragraph *); +static int info_rdadd(info_data *, wchar_t); +static int info_rdadds(info_data *, wchar_t const *); +static int info_rdaddc(info_data *, char); +static int info_rdaddsc(info_data *, char const *); + +static void info_heading(info_data *, word *, word *, alignstruct, int, + infoconfig *); +static void info_rule(info_data *, int, int, infoconfig *); +static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *, + int, int, int, infoconfig *); +static void info_codepara(info_data *, word *, int, int); +static void info_versionid(info_data *, word *, infoconfig *); +static void info_menu_item(info_data *, node *, paragraph *, infoconfig *); static word *info_transform_wordlist(word *, keywordlist *); static int info_check_index(word *, node *, indexdata *); -static void info_rdaddwc(rdstringc *, word *, word *, int); +static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *); -static node *info_node_new(char *name); -static char *info_node_name(paragraph *p); +static node *info_node_new(char *name, int charset); +static char *info_node_name_for_para(paragraph *p, infoconfig *); +static char *info_node_name_for_text(wchar_t *text, infoconfig *); static infoconfig info_configure(paragraph *source) { infoconfig ret; + paragraph *p; + int n; /* * Defaults. */ ret.filename = dupstr("output.info"); ret.maxfilesize = 64 << 10; + ret.charset = CS_ASCII; + ret.width = 70; + ret.listindentbefore = 1; + ret.listindentafter = 3; + ret.indent_code = 2; + ret.index_width = 40; + ret.listsuffix = L"."; + ret.bullet = L"\x2022\0-\0\0"; + ret.rule = L"\x2500\0-\0\0"; + ret.startemph = L"_\0_\0\0"; + ret.endemph = uadv(ret.startemph); + ret.lquote = L"\x2018\0\x2019\0`\0'\0\0"; + ret.rquote = uadv(ret.lquote); + ret.sectsuffix = L": "; + /* + * Default underline characters are chosen to match those recognised by + * Info-fontify-node. + */ + ret.atitle.underline = L"*\0\0"; + ret.achapter.underline = L"=\0\0"; + ret.nasect = 2; + ret.asect = snewn(ret.nasect, alignstruct); + ret.asect[0].underline = L"-\0\0"; + ret.asect[1].underline = L".\0\0"; + ret.index_text = L"Index"; + + /* + * Two-pass configuration so that we can pick up global config + * (e.g. `quotes') before having it overridden by specific + * config (`info-quotes'), irrespective of the order in which + * they occur. + */ + for (p = source; p; p = p->next) { + if (p->type == para_Config) { + if (!ustricmp(p->keyword, L"quotes")) { + if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) { + ret.lquote = uadv(p->keyword); + ret.rquote = uadv(ret.lquote); + } + } else if (!ustricmp(p->keyword, L"index")) { + ret.index_text = uadv(p->keyword); + } + } + } - for (; source; source = source->next) { - if (source->type == para_Config) { - if (!ustricmp(source->keyword, L"info-filename")) { + for (p = source; p; p = p->next) { + if (p->type == para_Config) { + if (!ustricmp(p->keyword, L"info-filename")) { sfree(ret.filename); - ret.filename = utoa_dup(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"info-max-file-size")) { - ret.maxfilesize = utoi(uadv(source->keyword)); + ret.filename = dupstr(adv(p->origkeyword)); + } else if (!ustricmp(p->keyword, L"info-charset")) { + ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword)); + } else if (!ustricmp(p->keyword, L"info-max-file-size")) { + ret.maxfilesize = utoi(uadv(p->keyword)); + } else if (!ustricmp(p->keyword, L"info-width")) { + ret.width = utoi(uadv(p->keyword)); + } else if (!ustricmp(p->keyword, L"info-indent-code")) { + ret.indent_code = utoi(uadv(p->keyword)); + } else if (!ustricmp(p->keyword, L"info-index-width")) { + ret.index_width = utoi(uadv(p->keyword)); + } else if (!ustricmp(p->keyword, L"info-list-indent")) { + ret.listindentbefore = utoi(uadv(p->keyword)); + } else if (!ustricmp(p->keyword, L"info-listitem-indent")) { + ret.listindentafter = utoi(uadv(p->keyword)); + } else if (!ustricmp(p->keyword, L"info-section-suffix")) { + ret.sectsuffix = uadv(p->keyword); + } else if (!ustricmp(p->keyword, L"info-underline")) { + ret.atitle.underline = ret.achapter.underline = + uadv(p->keyword); + for (n = 0; n < ret.nasect; n++) + ret.asect[n].underline = ret.atitle.underline; + } else if (!ustricmp(p->keyword, L"info-chapter-underline")) { + ret.achapter.underline = uadv(p->keyword); + } else if (!ustricmp(p->keyword, L"info-section-underline")) { + wchar_t *q = uadv(p->keyword); + int n = 0; + if (uisdigit(*q)) { + n = utoi(q); + q = uadv(q); + } + if (n >= ret.nasect) { + int i; + ret.asect = sresize(ret.asect, n+1, alignstruct); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect-1]; + ret.nasect = n+1; + } + ret.asect[n].underline = q; + } else if (!ustricmp(p->keyword, L"text-title-underline")) { + ret.atitle.underline = uadv(p->keyword); + } else if (!ustricmp(p->keyword, L"info-bullet")) { + ret.bullet = uadv(p->keyword); + } else if (!ustricmp(p->keyword, L"info-rule")) { + ret.rule = uadv(p->keyword); + } else if (!ustricmp(p->keyword, L"info-list-suffix")) { + ret.listsuffix = uadv(p->keyword); + } else if (!ustricmp(p->keyword, L"info-emphasis")) { + if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) { + ret.startemph = uadv(p->keyword); + ret.endemph = uadv(ret.startemph); + } + } else if (!ustricmp(p->keyword, L"info-quotes")) { + if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) { + ret.lquote = uadv(p->keyword); + ret.rquote = uadv(ret.lquote); + } } } } + /* + * Now process fallbacks on quote characters, underlines, the + * rule character, the emphasis characters, and bullets. + */ + while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) && + (!cvt_ok(ret.charset, ret.lquote) || + !cvt_ok(ret.charset, ret.rquote))) { + ret.lquote = uadv(ret.rquote); + ret.rquote = uadv(ret.lquote); + } + + while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) && + (!cvt_ok(ret.charset, ret.startemph) || + !cvt_ok(ret.charset, ret.endemph))) { + ret.startemph = uadv(ret.endemph); + ret.endemph = uadv(ret.startemph); + } + + while (*ret.atitle.underline && *uadv(ret.atitle.underline) && + !cvt_ok(ret.charset, ret.atitle.underline)) + ret.atitle.underline = uadv(ret.atitle.underline); + + while (*ret.achapter.underline && *uadv(ret.achapter.underline) && + !cvt_ok(ret.charset, ret.achapter.underline)) + ret.achapter.underline = uadv(ret.achapter.underline); + + for (n = 0; n < ret.nasect; n++) { + while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) && + !cvt_ok(ret.charset, ret.asect[n].underline)) + ret.asect[n].underline = uadv(ret.asect[n].underline); + } + + while (*ret.bullet && *uadv(ret.bullet) && + !cvt_ok(ret.charset, ret.bullet)) + ret.bullet = uadv(ret.bullet); + + while (*ret.rule && *uadv(ret.rule) && + !cvt_ok(ret.charset, ret.rule)) + ret.rule = uadv(ret.rule); + return ret; } paragraph *info_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = ""; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"info-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"info-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("info-filename", filename, NULL); } void info_backend(paragraph *sourceform, keywordlist *keywords, - indexdata *idx) { + indexdata *idx, void *unused) { paragraph *p; infoconfig conf; word *prefix, *body, *wp; word spaceword; - char *prefixextra; + wchar_t *prefixextra; int nesting, nestindent; int indentb, indenta; int filepos; int has_index; - rdstringc intro_text = { 0, 0, NULL }; + info_data intro_text = EMPTY_INFO_DATA; node *topnode, *currnode; word bullet; FILE *fp; - /* - * FIXME - */ - int width = 70, listindentbefore = 1, listindentafter = 3; - int indent_code = 2, index_width = 40; - - IGNORE(keywords); /* we don't happen to need this */ - IGNORE(idx); /* or this */ + IGNORE(unused); conf = info_configure(sourceform); /* * Go through and create a node for each section. */ - topnode = info_node_new("Top"); + topnode = info_node_new("Top", conf.charset); currnode = topnode; for (p = sourceform; p; p = p->next) switch (p->type) { /* @@ -163,8 +328,8 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, node *newnode, *upnode; char *nodename; - nodename = info_node_name(p); - newnode = info_node_new(nodename); + nodename = info_node_name_for_para(p, &conf); + newnode = info_node_new(nodename, conf.charset); sfree(nodename); p->private_data = newnode; @@ -183,6 +348,9 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, currnode = newnode; } break; + default: + p->private_data = NULL; + break; } /* @@ -193,15 +361,17 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, indexentry *entry; for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { - info_idx *ii = mknew(info_idx); - rdstringc rs = { 0, 0, NULL }; + info_idx *ii = snew(info_idx); + info_data id = EMPTY_INFO_DATA; + + id.charset = conf.charset; ii->nnodes = ii->nodesize = 0; ii->nodes = NULL; - info_rdaddwc(&rs, entry->text, NULL, FALSE); + ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf); - ii->text = rs.text; + ii->text = id.output.text; entry->backend_data = ii; } @@ -213,11 +383,12 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, * good place to put the copyright notice and the version IDs. * Also, Info directory entries are expected to go here. */ + intro_text.charset = conf.charset; - rdaddsc(&intro_text, + info_rdaddsc(&intro_text, "This Info file generated by Halibut, "); - rdaddsc(&intro_text, version); - rdaddsc(&intro_text, "\n\n"); + info_rdaddsc(&intro_text, version); + info_rdaddsc(&intro_text, "\n\n"); for (p = sourceform; p; p = p->next) if (p->type == para_Config && @@ -226,62 +397,57 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, char *s; section = uadv(p->keyword); - shortname = *section ? uadv(section) : NULL; - longname = *shortname ? uadv(shortname) : NULL; - kw = *longname ? uadv(longname) : NULL; + shortname = *section ? uadv(section) : L""; + longname = *shortname ? uadv(shortname) : L""; + kw = *longname ? uadv(longname) : L""; if (!*longname) { - error(err_infodirentry, &p->fpos); + error(err_cfginsufarg, &p->fpos, p->origkeyword, 3); continue; } - rdaddsc(&intro_text, "INFO-DIR-SECTION "); - s = utoa_dup(section); - rdaddsc(&intro_text, s); - sfree(s); - rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* "); - s = utoa_dup(shortname); - rdaddsc(&intro_text, s); - sfree(s); - rdaddsc(&intro_text, ": ("); + info_rdaddsc(&intro_text, "INFO-DIR-SECTION "); + info_rdadds(&intro_text, section); + info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* "); + info_rdadds(&intro_text, shortname); + info_rdaddsc(&intro_text, ": ("); s = dupstr(conf.filename); if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info")) s[strlen(s)-5] = '\0'; - rdaddsc(&intro_text, s); + info_rdaddsc(&intro_text, s); sfree(s); - rdaddsc(&intro_text, ")"); + info_rdaddsc(&intro_text, ")"); if (*kw) { keyword *kwl = kw_lookup(keywords, kw); if (kwl && kwl->para->private_data) { node *n = (node *)kwl->para->private_data; - rdaddsc(&intro_text, n->name); + info_rdaddsc(&intro_text, n->name); } } - rdaddsc(&intro_text, ". "); - s = utoa_dup(longname); - rdaddsc(&intro_text, s); - sfree(s); - rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n"); + info_rdaddsc(&intro_text, ". "); + info_rdadds(&intro_text, longname); + info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n"); } for (p = sourceform; p; p = p->next) if (p->type == para_Copyright) info_para(&intro_text, NULL, NULL, p->words, keywords, - 0, 0, width); + 0, 0, conf.width, &conf); for (p = sourceform; p; p = p->next) if (p->type == para_VersionID) - info_versionid(&intro_text, p->words); + info_versionid(&intro_text, p->words, &conf); - if (intro_text.text[intro_text.pos-1] != '\n') - rdaddc(&intro_text, '\n'); + if (intro_text.output.text[intro_text.output.pos-1] != '\n') + info_rdaddc(&intro_text, '\n'); /* Do the title */ for (p = sourceform; p; p = p->next) if (p->type == para_Title) - info_heading(&topnode->text, NULL, p->words, width); + info_heading(&topnode->text, NULL, p->words, + conf.atitle, conf.width, &conf); - nestindent = listindentbefore + listindentafter; + nestindent = conf.listindentbefore + conf.listindentafter; nesting = 0; currnode = topnode; @@ -330,18 +496,25 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, assert(currnode->up); if (!currnode->up->started_menu) { - rdaddsc(&currnode->up->text, "* Menu:\n\n"); + info_rdaddsc(&currnode->up->text, "* Menu:\n\n"); currnode->up->started_menu = TRUE; } - info_menu_item(&currnode->up->text, currnode, p); + info_menu_item(&currnode->up->text, currnode, p, &conf); has_index |= info_check_index(p->words, currnode, idx); - info_heading(&currnode->text, p->kwtext, p->words, width); + if (p->type == para_Chapter || p->type == para_Appendix || + p->type == para_UnnumberedChapter) + info_heading(&currnode->text, p->kwtext, p->words, + conf.achapter, conf.width, &conf); + else + info_heading(&currnode->text, p->kwtext, p->words, + conf.asect[p->aux>=conf.nasect?conf.nasect-1:p->aux], + conf.width, &conf); nesting = 0; break; case para_Rule: - info_rule(&currnode->text, nesting, width - nesting); + info_rule(&currnode->text, nesting, conf.width - nesting, &conf); break; case para_Normal: @@ -356,21 +529,21 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, bullet.next = NULL; bullet.alt = NULL; bullet.type = word_Normal; - bullet.text = L"-"; /* FIXME: configurability */ + bullet.text = conf.bullet; prefix = • prefixextra = NULL; - indentb = listindentbefore; - indenta = listindentafter; + indentb = conf.listindentbefore; + indenta = conf.listindentafter; } else if (p->type == para_NumberedList) { prefix = p->kwtext; - prefixextra = "."; /* FIXME: configurability */ - indentb = listindentbefore; - indenta = listindentafter; + prefixextra = conf.listsuffix; + indentb = conf.listindentbefore; + indenta = conf.listindentafter; } else if (p->type == para_Description) { prefix = NULL; prefixextra = NULL; - indentb = listindentbefore; - indenta = listindentafter; + indentb = conf.listindentbefore; + indenta = conf.listindentafter; } else { prefix = NULL; prefixextra = NULL; @@ -390,7 +563,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, } info_para(&currnode->text, prefix, prefixextra, body, keywords, nesting + indentb, indenta, - width - nesting - indentb - indenta); + conf.width - nesting - indentb - indenta, &conf); if (wp) { wp->next = NULL; free_word_list(body); @@ -399,8 +572,8 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, case para_Code: info_codepara(&currnode->text, p->words, - nesting + indent_code, - width - nesting - 2 * indent_code); + nesting + conf.indent_code, + conf.width - nesting - 2 * conf.indent_code); break; } @@ -411,23 +584,32 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, node *newnode; int i, j, k; indexentry *entry; + char *nodename; + + nodename = info_node_name_for_text(conf.index_text, &conf); + newnode = info_node_new(nodename, conf.charset); + sfree(nodename); - newnode = info_node_new("Index"); newnode->up = topnode; currnode->next = newnode; newnode->prev = currnode; currnode->listnext = newnode; - rdaddsc(&newnode->text, "Index\n-----\n\n"); + k = info_rdadds(&newnode->text, conf.index_text); + info_rdaddsc(&newnode->text, "\n"); + while (k > 0) { + info_rdadds(&newnode->text, conf.achapter.underline); + k -= ustrwid(conf.achapter.underline, conf.charset); + } + info_rdaddsc(&newnode->text, "\n\n"); - info_menu_item(&topnode->text, newnode, NULL); + info_menu_item(&topnode->text, newnode, NULL, &conf); for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { info_idx *ii = (info_idx *)entry->backend_data; for (j = 0; j < ii->nnodes; j++) { - int pos0 = newnode->text.pos; /* * When we have multiple references for a single * index term, we only display the actual term on @@ -435,12 +617,12 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, * really are the same. */ if (j == 0) - rdaddsc(&newnode->text, ii->text); - for (k = newnode->text.pos - pos0; k < index_width; k++) - rdaddc(&newnode->text, ' '); - rdaddsc(&newnode->text, " *Note "); - rdaddsc(&newnode->text, ii->nodes[j]->name); - rdaddsc(&newnode->text, "::\n"); + info_rdaddsc(&newnode->text, ii->text); + for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++) + info_rdaddc(&newnode->text, ' '); + info_rdaddsc(&newnode->text, " *Note "); + info_rdaddsc(&newnode->text, ii->nodes[j]->name); + info_rdaddsc(&newnode->text, "::\n"); } } } @@ -450,31 +632,31 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, * and the node line at the top. */ for (currnode = topnode; currnode; currnode = currnode->listnext) { - char *origtext = currnode->text.text; - currnode->text.text = NULL; - currnode->text.pos = currnode->text.size = 0; - rdaddsc(&currnode->text, "\037\nFile: "); - rdaddsc(&currnode->text, conf.filename); - rdaddsc(&currnode->text, ", Node: "); - rdaddsc(&currnode->text, currnode->name); + char *origtext = currnode->text.output.text; + currnode->text = empty_info_data; + currnode->text.charset = conf.charset; + info_rdaddsc(&currnode->text, "\037\nFile: "); + info_rdaddsc(&currnode->text, conf.filename); + info_rdaddsc(&currnode->text, ", Node: "); + info_rdaddsc(&currnode->text, currnode->name); if (currnode->prev) { - rdaddsc(&currnode->text, ", Prev: "); - rdaddsc(&currnode->text, currnode->prev->name); + info_rdaddsc(&currnode->text, ", Prev: "); + info_rdaddsc(&currnode->text, currnode->prev->name); } - rdaddsc(&currnode->text, ", Up: "); - rdaddsc(&currnode->text, (currnode->up ? - currnode->up->name : "(dir)")); + info_rdaddsc(&currnode->text, ", Up: "); + info_rdaddsc(&currnode->text, (currnode->up ? + currnode->up->name : "(dir)")); if (currnode->next) { - rdaddsc(&currnode->text, ", Next: "); - rdaddsc(&currnode->text, currnode->next->name); + info_rdaddsc(&currnode->text, ", Next: "); + info_rdaddsc(&currnode->text, currnode->next->name); } - rdaddsc(&currnode->text, "\n\n"); - rdaddsc(&currnode->text, origtext); + info_rdaddsc(&currnode->text, "\n\n"); + info_rdaddsc(&currnode->text, origtext); /* * Just make _absolutely_ sure we end with a newline. */ - if (currnode->text.text[currnode->text.pos-1] != '\n') - rdaddc(&currnode->text, '\n'); + if (currnode->text.output.text[currnode->text.output.pos-1] != '\n') + info_rdaddc(&currnode->text, '\n'); sfree(origtext); } @@ -482,25 +664,25 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, /* * Compute the offsets for the tag table. */ - filepos = intro_text.pos; + filepos = intro_text.output.pos; for (currnode = topnode; currnode; currnode = currnode->listnext) { currnode->pos = filepos; - filepos += currnode->text.pos; + filepos += currnode->text.output.pos; } /* * Split into sub-files. */ if (conf.maxfilesize > 0) { - int currfilesize = intro_text.pos, currfilenum = 1; + int currfilesize = intro_text.output.pos, currfilenum = 1; for (currnode = topnode; currnode; currnode = currnode->listnext) { - if (currfilesize > intro_text.pos && - currfilesize + currnode->text.pos > conf.maxfilesize) { + if (currfilesize > intro_text.output.pos && + currfilesize + currnode->text.output.pos > conf.maxfilesize) { currfilenum++; - currfilesize = intro_text.pos; + currfilesize = intro_text.output.pos; } currnode->filenum = currfilenum; - currfilesize += currnode->text.pos; + currfilesize += currnode->text.output.pos; } } @@ -512,10 +694,10 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, error(err_cantopenw, conf.filename); return; } - fputs(intro_text.text, fp); + fputs(intro_text.output.text, fp); if (conf.maxfilesize == 0) { for (currnode = topnode; currnode; currnode = currnode->listnext) - fputs(currnode->text.text, fp); + fputs(currnode->text.output.text, fp); } else { int filenum = 0; fprintf(fp, "\037\nIndirect:\n"); @@ -549,7 +731,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, if (fp) fclose(fp); - fname = mknewa(char, strlen(conf.filename) + 40); + fname = snewn(strlen(conf.filename) + 40, char); sprintf(fname, "%s-%d", conf.filename, filenum); fp = fopen(fname, "w"); if (!fp) { @@ -557,9 +739,9 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, return; } sfree(fname); - fputs(intro_text.text, fp); + fputs(intro_text.output.text, fp); } - fputs(currnode->text.text, fp); + fputs(currnode->text.output.text, fp); } if (fp) @@ -596,7 +778,7 @@ static int info_check_index(word *w, node *n, indexdata *idx) if (ii->nnodes >= ii->nodesize) { ii->nodesize += 32; - ii->nodes = resize(ii->nodes, ii->nodesize); + ii->nodes = sresize(ii->nodes, ii->nodesize, node *); } ii->nodes[ii->nnodes++] = n; @@ -609,55 +791,6 @@ static int info_check_index(word *w, node *n, indexdata *idx) return ret; } -/* - * Convert a wide string into a string of chars. If `result' is - * non-NULL, mallocs the resulting string and stores a pointer to - * it in `*result'. If `result' is NULL, merely checks whether all - * characters in the string are feasible for the output character - * set. - * - * Return is nonzero if all characters are OK. If not all - * characters are OK but `result' is non-NULL, a result _will_ - * still be generated! - */ -static int info_convert(wchar_t *s, char **result) { - /* - * FIXME. Currently this is ISO8859-1 only. - */ - int doing = (result != 0); - int ok = TRUE; - char *p = NULL; - int plen = 0, psize = 0; - - for (; *s; s++) { - wchar_t c = *s; - char outc; - - if ((c >= 32 && c <= 126) || - (c >= 160 && c <= 255)) { - /* Char is OK. */ - outc = (char)c; - } else { - /* Char is not OK. */ - ok = FALSE; - outc = 0xBF; /* approximate the good old DEC `uh?' */ - } - if (doing) { - if (plen >= psize) { - psize = plen + 256; - p = resize(p, psize); - } - p[plen++] = outc; - } - } - if (doing) { - p = resize(p, plen+1); - p[plen] = '\0'; - *result = p; - } - return ok; -} - static word *info_transform_wordlist(word *words, keywordlist *keywords) { word *ret = dup_word_list(words); @@ -675,7 +808,7 @@ static word *info_transform_wordlist(word *words, keywordlist *keywords) * In Info, we do nothing special for xrefs to * numbered list items or bibliography entries. */ - break; + continue; } else { /* * An xref to a different section has its text @@ -715,8 +848,9 @@ static word *info_transform_wordlist(word *words, keywordlist *keywords) return ret; } -static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) { - char *c; +static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs, + infoconfig *cfg) { + int ret = 0; for (; words && words != end; words = words->next) switch (words->type) { case word_HyperLink: @@ -742,56 +876,64 @@ static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) { if (towordstyle(words->type) == word_Emph && (attraux(words->aux) == attr_First || attraux(words->aux) == attr_Only)) - rdaddc(rs, '_'); /* FIXME: configurability */ + ret += info_rdadds(id, cfg->startemph); else if (towordstyle(words->type) == word_Code && (attraux(words->aux) == attr_First || attraux(words->aux) == attr_Only)) - rdaddc(rs, '`'); /* FIXME: configurability */ + ret += info_rdadds(id, cfg->lquote); if (removeattr(words->type) == word_Normal) { - if (info_convert(words->text, &c)) - rdaddsc(rs, c); + if (cvt_ok(id->charset, words->text) || !words->alt) + ret += info_rdadds(id, words->text); else - info_rdaddwc(rs, words->alt, NULL, FALSE); - sfree(c); + ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg); } else if (removeattr(words->type) == word_WhiteSpace) { - rdaddc(rs, ' '); + ret += info_rdadd(id, L' '); } else if (removeattr(words->type) == word_Quote) { - rdaddc(rs, quoteaux(words->aux) == quote_Open ? '`' : '\''); - /* FIXME: configurability */ + ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ? + cfg->lquote : cfg->rquote); } if (towordstyle(words->type) == word_Emph && (attraux(words->aux) == attr_Last || attraux(words->aux) == attr_Only)) - rdaddc(rs, '_'); /* FIXME: configurability */ + ret += info_rdadds(id, cfg->endemph); else if (towordstyle(words->type) == word_Code && (attraux(words->aux) == attr_Last || attraux(words->aux) == attr_Only)) - rdaddc(rs, '\''); /* FIXME: configurability */ + ret += info_rdadds(id, cfg->rquote); break; case word_UpperXref: case word_LowerXref: if (xrefs && words->private_data) { - rdaddsc(rs, "*Note "); - rdaddsc(rs, ((node *)words->private_data)->name); - rdaddsc(rs, "::"); + /* + * This bit is structural and so must be done in char + * rather than wchar_t. + */ + ret += info_rdaddsc(id, "*Note "); + ret += info_rdaddsc(id, ((node *)words->private_data)->name); + ret += info_rdaddsc(id, "::"); } break; } + + return ret; } -static int info_width_internal(word *words, int xrefs); +static int info_width_internal(word *words, int xrefs, infoconfig *cfg); -static int info_width_internal_list(word *words, int xrefs) { +static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) { int w = 0; while (words) { - w += info_width_internal(words, xrefs); + w += info_width_internal(words, xrefs, cfg); words = words->next; } return w; } -static int info_width_internal(word *words, int xrefs) { +static int info_width_internal(word *words, int xrefs, infoconfig *cfg) { + int wid; + int attr; + switch (words->type) { case word_HyperLink: case word_HyperEnd: @@ -799,18 +941,44 @@ static int info_width_internal(word *words, int xrefs) { case word_IndexRef: return 0; + case word_UpperXref: + case word_LowerXref: + if (xrefs && words->private_data) { + /* "*Note " plus "::" comes to 8 characters */ + return 8 + strwid(((node *)words->private_data)->name, + cfg->charset); + } else + return 0; + } + + assert(words->type < word_internal_endattrs); + + wid = 0; + attr = towordstyle(words->type); + + if (attr == word_Emph || attr == word_Code) { + if (attraux(words->aux) == attr_Only || + attraux(words->aux) == attr_First) + wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote, + cfg->charset); + } + if (attr == word_Emph || attr == word_Code) { + if (attraux(words->aux) == attr_Only || + attraux(words->aux) == attr_Last) + wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote, + cfg->charset); + } + + switch (words->type) { case word_Normal: case word_Emph: case word_Code: case word_WeakCode: - return (((words->type == word_Emph || - words->type == word_Code) - ? (attraux(words->aux) == attr_Only ? 2 : - attraux(words->aux) == attr_Always ? 0 : 1) - : 0) + - (info_convert(words->text, NULL) ? - ustrlen(words->text) : - info_width_internal_list(words->alt, xrefs))); + if (cvt_ok(cfg->charset, words->text) || !words->alt) + wid += ustrwid(words->text, cfg->charset); + else + wid += info_width_internal_list(words->alt, xrefs, cfg); + return wid; case word_WhiteSpace: case word_EmphSpace: @@ -822,86 +990,76 @@ static int info_width_internal(word *words, int xrefs) { case word_WkCodeQuote: assert(words->type != word_CodeQuote && words->type != word_WkCodeQuote); - return (((towordstyle(words->type) == word_Emph || - towordstyle(words->type) == word_Code) - ? (attraux(words->aux) == attr_Only ? 2 : - attraux(words->aux) == attr_Always ? 0 : 1) - : 0) + 1); - - case word_UpperXref: - case word_LowerXref: - if (xrefs && words->private_data) { - /* "*Note " plus "::" comes to 8 characters */ - return 8 + strlen(((node *)words->private_data)->name); - } - break; + if (removeattr(words->type) == word_Quote) { + if (quoteaux(words->aux) == quote_Open) + wid += ustrwid(cfg->lquote, cfg->charset); + else + wid += ustrwid(cfg->rquote, cfg->charset); + } else + wid++; /* space */ } - return 0; /* should never happen */ + return wid; } -static int info_width_noxrefs(word *words) +static int info_width_noxrefs(void *ctx, word *words) { - return info_width_internal(words, FALSE); + return info_width_internal(words, FALSE, (infoconfig *)ctx); } -static int info_width_xrefs(word *words) +static int info_width_xrefs(void *ctx, word *words) { - return info_width_internal(words, TRUE); + return info_width_internal(words, TRUE, (infoconfig *)ctx); } -static void info_heading(rdstringc *text, word *tprefix, - word *words, int width) { - rdstringc t = { 0, 0, NULL }; - int margin, length; +static void info_heading(info_data *text, word *tprefix, + word *words, alignstruct align, + int width, infoconfig *cfg) { + int length; int firstlinewidth, wrapwidth; - int i; wrappedline *wrapping, *p; + length = 0; if (tprefix) { - info_rdaddwc(&t, tprefix, NULL, FALSE); - rdaddsc(&t, ": "); /* FIXME: configurability */ + length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg); + length += info_rdadds(text, cfg->sectsuffix); } - margin = length = (t.text ? strlen(t.text) : 0); - margin = 0; - firstlinewidth = width - length; wrapwidth = width; + firstlinewidth = width - length; - wrapping = wrap_para(words, firstlinewidth, wrapwidth, info_width_noxrefs); + wrapping = wrap_para(words, firstlinewidth, wrapwidth, + info_width_noxrefs, cfg, 0); for (p = wrapping; p; p = p->next) { - info_rdaddwc(&t, p->begin, p->end, FALSE); - length = (t.text ? strlen(t.text) : 0); - for (i = 0; i < margin; i++) - rdaddc(text, ' '); - rdaddsc(text, t.text); - rdaddc(text, '\n'); - for (i = 0; i < margin; i++) - rdaddc(text, ' '); - while (length--) - rdaddc(text, '-'); - rdaddc(text, '\n'); - margin = 0; - sfree(t.text); - t = empty_rdstringc; + length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg); + info_rdadd(text, L'\n'); + if (*align.underline) { + while (length > 0) { + info_rdadds(text, align.underline); + length -= ustrwid(align.underline, cfg->charset); + } + info_rdadd(text, L'\n'); + } + length = 0; } wrap_free(wrapping); - rdaddc(text, '\n'); - - sfree(t.text); + info_rdadd(text, L'\n'); } -static void info_rule(rdstringc *text, int indent, int width) { - while (indent--) rdaddc(text, ' '); - while (width--) rdaddc(text, '-'); - rdaddc(text, '\n'); - rdaddc(text, '\n'); +static void info_rule(info_data *text, int indent, int width, infoconfig *cfg) +{ + while (indent--) info_rdadd(text, L' '); + while (width > 0) { + info_rdadds(text, cfg->rule); + width -= ustrwid(cfg->rule, cfg->charset); + } + info_rdadd(text, L'\n'); + info_rdadd(text, L'\n'); } -static void info_para(rdstringc *text, word *prefix, char *prefixextra, - word *input, keywordlist *keywords, - int indent, int extraindent, int width) { +static void info_para(info_data *text, word *prefix, wchar_t *prefixextra, + word *input, keywordlist *keywords, int indent, + int extraindent, int width, infoconfig *cfg) { wrappedline *wrapping, *p; word *words; - rdstringc pfx = { 0, 0, NULL }; int e; int i; int firstlinewidth = width; @@ -909,74 +1067,69 @@ static void info_para(rdstringc *text, word *prefix, char *prefixextra, words = info_transform_wordlist(input, keywords); if (prefix) { - info_rdaddwc(&pfx, prefix, NULL, FALSE); - if (prefixextra) - rdaddsc(&pfx, prefixextra); for (i = 0; i < indent; i++) - rdaddc(text, ' '); - rdaddsc(text, pfx.text); + info_rdadd(text, L' '); + e = info_rdaddwc(text, prefix, NULL, FALSE, cfg); + if (prefixextra) + e += info_rdadds(text, prefixextra); /* If the prefix is too long, shorten the first line to fit. */ - e = extraindent - strlen(pfx.text); + e = extraindent - e; if (e < 0) { firstlinewidth += e; /* this decreases it, since e < 0 */ if (firstlinewidth < 0) { e = indent + extraindent; firstlinewidth = width; - rdaddc(text, '\n'); + info_rdadd(text, L'\n'); } else e = 0; } - sfree(pfx.text); } else e = indent + extraindent; - wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs); + wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs, + cfg, 0); for (p = wrapping; p; p = p->next) { for (i = 0; i < e; i++) - rdaddc(text, ' '); - info_rdaddwc(text, p->begin, p->end, TRUE); - rdaddc(text, '\n'); + info_rdadd(text, L' '); + info_rdaddwc(text, p->begin, p->end, TRUE, cfg); + info_rdadd(text, L'\n'); e = indent + extraindent; } wrap_free(wrapping); - rdaddc(text, '\n'); + info_rdadd(text, L'\n'); free_word_list(words); } -static void info_codepara(rdstringc *text, word *words, +static void info_codepara(info_data *text, word *words, int indent, int width) { int i; for (; words; words = words->next) if (words->type == word_WeakCode) { - char *c; - info_convert(words->text, &c); - if (strlen(c) > (size_t)width) { + for (i = 0; i < indent; i++) + info_rdadd(text, L' '); + if (info_rdadds(text, words->text) > width) { /* FIXME: warn */ } - for (i = 0; i < indent; i++) - rdaddc(text, ' '); - rdaddsc(text, c); - rdaddc(text, '\n'); - sfree(c); + info_rdadd(text, L'\n'); } - rdaddc(text, '\n'); + info_rdadd(text, L'\n'); } -static void info_versionid(rdstringc *text, word *words) { - rdaddc(text, '['); /* FIXME: configurability */ - info_rdaddwc(text, words, NULL, FALSE); - rdaddsc(text, "]\n"); +static void info_versionid(info_data *text, word *words, infoconfig *cfg) { + info_rdadd(text, L'['); + info_rdaddwc(text, words, NULL, FALSE, cfg); + info_rdadds(text, L"]\n"); } -static node *info_node_new(char *name) +static node *info_node_new(char *name, int charset) { node *n; - n = mknew(node); - n->text.text = NULL; - n->text.pos = n->text.size = 0; + n = snew(node); + n->text = empty_info_data; + n->text.charset = charset; n->up = n->next = n->prev = n->lastchild = n->listnext = NULL; n->name = dupstr(name); n->started_menu = FALSE; @@ -984,31 +1137,53 @@ static node *info_node_new(char *name) return n; } -static char *info_node_name(paragraph *par) +static char *info_node_name_core(info_data *id, filepos *fpos) { - rdstringc rsc = { 0, 0, NULL }; char *p, *q; - info_rdaddwc(&rsc, par->kwtext ? par->kwtext : par->words, NULL, FALSE); /* - * We cannot have commas or colons in a node name. Remove any - * that we find, with a warning. + * We cannot have commas, colons or parentheses in a node name. + * Remove any that we find, with a warning. */ - p = q = rsc.text; + p = q = id->output.text; while (*p) { - if (*p == ':' || *p == ',') { - error(err_infonodechar, &par->fpos, *p); + if (*p == ':' || *p == ',' || *p == '(' || *p == ')') { + error(err_infonodechar, fpos, *p); } else { *q++ = *p; } p++; } - *p = '\0'; + *q = '\0'; + + return id->output.text; +} + +static char *info_node_name_for_para(paragraph *par, infoconfig *cfg) +{ + info_data id = EMPTY_INFO_DATA; + + id.charset = cfg->charset; + info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words, + NULL, FALSE, cfg); + info_rdaddsc(&id, NULL); + + return info_node_name_core(&id, &par->fpos); +} + +static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg) +{ + info_data id = EMPTY_INFO_DATA; + + id.charset = cfg->charset; + info_rdadds(&id, text); + info_rdaddsc(&id, NULL); - return rsc.text; + return info_node_name_core(&id, NULL); } -static void info_menu_item(rdstringc *text, node *n, paragraph *p) +static void info_menu_item(info_data *text, node *n, paragraph *p, + infoconfig *cfg) { /* * FIXME: Depending on how we're doing node names in this info @@ -1020,14 +1195,97 @@ static void info_menu_item(rdstringc *text, node *n, paragraph *p) * * * Chapter number: Node name. * - * + * This function mostly works in char rather than wchar_t, + * because a menu item is a structural component. */ - rdaddsc(text, "* "); - rdaddsc(text, n->name); - rdaddsc(text, "::"); + info_rdaddsc(text, "* "); + info_rdaddsc(text, n->name); + info_rdaddsc(text, "::"); if (p) { - rdaddc(text, ' '); - info_rdaddwc(text, p->words, NULL, FALSE); + info_rdaddc(text, ' '); + info_rdaddwc(text, p->words, NULL, FALSE, cfg); } - rdaddc(text, '\n'); + info_rdaddc(text, '\n'); +} + +/* + * These functions implement my wrapper on the rdadd* calls which + * allows me to switch arbitrarily between literal octet-string + * text and charset-translated Unicode. (Because no matter what + * character set I write the actual text in, I expect info readers + * to treat node names and file names literally and to expect + * keywords like `*Note' in their canonical form, so I have to take + * steps to ensure that those structural elements of the file + * aren't messed with.) + */ +static int info_rdadds(info_data *d, wchar_t const *wcs) +{ + if (!d->wcmode) { + d->state = charset_init_state; + d->wcmode = TRUE; + } + + if (wcs) { + char buf[256]; + int len, width, ret; + + width = ustrwid(wcs, d->charset); + + len = ustrlen(wcs); + while (len > 0) { + int prevlen = len; + + ret = charset_from_unicode(&wcs, &len, buf, lenof(buf), + d->charset, &d->state, NULL); + + assert(len < prevlen); + + if (ret > 0) { + buf[ret] = '\0'; + rdaddsc(&d->output, buf); + } + } + + return width; + } else + return 0; +} + +static int info_rdaddsc(info_data *d, char const *cs) +{ + if (d->wcmode) { + char buf[256]; + int ret; + + ret = charset_from_unicode(NULL, 0, buf, lenof(buf), + d->charset, &d->state, NULL); + if (ret > 0) { + buf[ret] = '\0'; + rdaddsc(&d->output, buf); + } + + d->wcmode = FALSE; + } + + if (cs) { + rdaddsc(&d->output, cs); + return strwid(cs, d->charset); + } else + return 0; +} + +static int info_rdadd(info_data *d, wchar_t wc) +{ + wchar_t wcs[2]; + wcs[0] = wc; + wcs[1] = L'\0'; + return info_rdadds(d, wcs); +} + +static int info_rdaddc(info_data *d, char c) +{ + char cs[2]; + cs[0] = c; + cs[1] = '\0'; + return info_rdaddsc(d, cs); }