/*
- * info backend for Halibut
- *
- * TODO:
+ * Info backend for Halibut
+ *
+ * The Info file format isn't well-specified, and what specification
+ * there is is scattered all over the place. Sources include:
+ * (info), from GNU Texinfo.
+ * (texinfo), also from GNU Texinfo.
+ * (Emacs)Misc Help, and (emacs)Info Lookup, from GNU Emacs.
+ * info.el, from GNU Emacs.
*
- * - configurable choice of how to allocate node names
- * - escape, warn or simply remove commas and colons in node
- * names; also test colons in index terms.
- * - might be helpful to diagnose duplicate node names too!
- * - test everything in info(1), and probably jed too
+ * Possible future work:
*
- * Later:
+ * - configurable choice of how to allocate node names?
+ * + possibly a template-like approach, choosing node names to
+ * be the full section title or perhaps the internal keyword?
+ * + neither of those seems quite right. Perhaps instead a
+ * Windows Help-like mechanism, where a magic config
+ * directive allows user choice of name for every node.
+ * + Only trouble with that is, now what happens to the section
+ * numbers? Do they become completely vestigial and just sit
+ * in the title text of each node? Or do we keep them in the
+ * menus somehow? I think people might occasionally want to
+ * go to a section by number, if only because all the _other_
+ * formats of the same document will reference the numbers
+ * all the time. So our menu lines could look like one of
+ * these:
+ * * Nodename: Section 1.2. Title of section.
+ * * Section 1.2: Nodename. Title of section.
*
- * - configurable indentation, bullets, emphasis, quotes etc?
+ * - might be helpful to diagnose duplicate node names!
+ *
+ * - Indices generated by makeinfo use a menu rather than a bunch of
+ * cross-references, which reduces visual clutter rather. For
+ * singly-referenced items, it looks like:
+ * * toner cartridge, replacing: Toner.
+ * It does a horrid job on multiply-referenced entries, though,
+ * perhaps because the name before the colon is meant to be unique.
+ * Info's 'i' command requires the use of a menu -- it fails to
+ * find any index entries at all with Halibut's current index format.
+ *
+ * - The string "*note" is matched case-insensitively, so we could
+ * make things slightly less ugly by using the lower-case version
+ * when the user asks for \k. Unfortunately, standalone Info seems
+ * to match node names case-sensitively, so we can't downcase that.
+ *
+ * - The character encoding used in an Info file can be configured using
+ * an Emacs local variables block at the end, like this:
+ * Local Variables:
+ * coding: iso-8859-1
+ * End:
*/
#include <stdio.h>
#include "halibut.h"
typedef struct {
+ wchar_t *underline;
+} alignstruct;
+
+typedef struct {
char *filename;
int maxfilesize;
+ int charset;
+ int listindentbefore, listindentafter;
+ int indent_code, width, index_width;
+ alignstruct atitle, achapter, *asect;
+ int nasect;
+ wchar_t *bullet, *listsuffix;
+ wchar_t *startemph, *endemph;
+ wchar_t *lquote, *rquote;
+ wchar_t *sectsuffix;
+ wchar_t *rule;
+ wchar_t *index_text;
} infoconfig;
+typedef struct {
+ rdstringc output;
+ int charset;
+ charset_state state;
+ int wcmode;
+} info_data;
+#define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
+static const info_data empty_info_data = EMPTY_INFO_DATA;
+
typedef struct node_tag node;
struct node_tag {
node *listnext;
node *up, *prev, *next, *lastchild;
int pos, started_menu, filenum;
char *name;
- rdstringc text;
+ info_data text;
};
typedef struct {
char *text;
+ int length;
int nnodes, nodesize;
node **nodes;
} info_idx;
-static int info_convert(wchar_t *, char **);
-
-static void info_heading(rdstringc *, word *, word *, int);
-static void info_rule(rdstringc *, int, int);
-static void info_para(rdstringc *, word *, char *, word *, keywordlist *,
- int, int, int);
-static void info_codepara(rdstringc *, word *, int, int);
-static void info_versionid(rdstringc *, word *);
-static void info_menu_item(rdstringc *, node *, paragraph *);
+static int info_rdadd(info_data *, wchar_t);
+static int info_rdadds(info_data *, wchar_t const *);
+static int info_rdaddc(info_data *, char);
+static int info_rdaddsc(info_data *, char const *);
+
+static void info_heading(info_data *, word *, word *, alignstruct, int,
+ infoconfig *);
+static void info_rule(info_data *, int, int, infoconfig *);
+static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
+ int, int, int, infoconfig *);
+static void info_codepara(info_data *, word *, int, int);
+static void info_versionid(info_data *, word *, infoconfig *);
+static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
static word *info_transform_wordlist(word *, keywordlist *);
static int info_check_index(word *, node *, indexdata *);
-static void info_rdaddwc(rdstringc *, word *, word *, int);
+static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
-static node *info_node_new(char *name);
-static char *info_node_name(paragraph *p);
+static node *info_node_new(char *name, int charset);
+static char *info_node_name_for_para(paragraph *p, infoconfig *);
+static char *info_node_name_for_text(wchar_t *text, infoconfig *);
static infoconfig info_configure(paragraph *source) {
infoconfig ret;
+ paragraph *p;
+ int n;
/*
* Defaults.
*/
ret.filename = dupstr("output.info");
ret.maxfilesize = 64 << 10;
+ ret.charset = CS_ASCII;
+ ret.width = 70;
+ ret.listindentbefore = 1;
+ ret.listindentafter = 3;
+ ret.indent_code = 2;
+ ret.index_width = 40;
+ ret.listsuffix = L".";
+ ret.bullet = L"\x2022\0-\0\0";
+ ret.rule = L"\x2500\0-\0\0";
+ ret.startemph = L"_\0_\0\0";
+ ret.endemph = uadv(ret.startemph);
+ ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
+ ret.rquote = uadv(ret.lquote);
+ ret.sectsuffix = L": ";
+ /*
+ * Default underline characters are chosen to match those recognised by
+ * Info-fontify-node.
+ */
+ ret.atitle.underline = L"*\0\0";
+ ret.achapter.underline = L"=\0\0";
+ ret.nasect = 2;
+ ret.asect = snewn(ret.nasect, alignstruct);
+ ret.asect[0].underline = L"-\0\0";
+ ret.asect[1].underline = L".\0\0";
+ ret.index_text = L"Index";
- for (; source; source = source->next) {
- if (source->type == para_Config) {
- if (!ustricmp(source->keyword, L"info-filename")) {
+ /*
+ * Two-pass configuration so that we can pick up global config
+ * (e.g. `quotes') before having it overridden by specific
+ * config (`info-quotes'), irrespective of the order in which
+ * they occur.
+ */
+ for (p = source; p; p = p->next) {
+ if (p->type == para_Config) {
+ if (!ustricmp(p->keyword, L"quotes")) {
+ if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
+ ret.lquote = uadv(p->keyword);
+ ret.rquote = uadv(ret.lquote);
+ }
+ } else if (!ustricmp(p->keyword, L"index")) {
+ ret.index_text = uadv(p->keyword);
+ }
+ }
+ }
+
+ for (p = source; p; p = p->next) {
+ if (p->type == para_Config) {
+ if (!ustricmp(p->keyword, L"info-filename")) {
sfree(ret.filename);
- ret.filename = utoa_dup(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"info-max-file-size")) {
- ret.maxfilesize = utoi(uadv(source->keyword));
+ ret.filename = dupstr(adv(p->origkeyword));
+ } else if (!ustricmp(p->keyword, L"info-charset")) {
+ ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
+ ret.maxfilesize = utoi(uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"info-width")) {
+ ret.width = utoi(uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"info-indent-code")) {
+ ret.indent_code = utoi(uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"info-index-width")) {
+ ret.index_width = utoi(uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"info-list-indent")) {
+ ret.listindentbefore = utoi(uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
+ ret.listindentafter = utoi(uadv(p->keyword));
+ } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
+ ret.sectsuffix = uadv(p->keyword);
+ } else if (!ustricmp(p->keyword, L"info-underline")) {
+ ret.atitle.underline = ret.achapter.underline =
+ uadv(p->keyword);
+ for (n = 0; n < ret.nasect; n++)
+ ret.asect[n].underline = ret.atitle.underline;
+ } else if (!ustricmp(p->keyword, L"info-chapter-underline")) {
+ ret.achapter.underline = uadv(p->keyword);
+ } else if (!ustricmp(p->keyword, L"info-section-underline")) {
+ wchar_t *q = uadv(p->keyword);
+ int n = 0;
+ if (uisdigit(*q)) {
+ n = utoi(q);
+ q = uadv(q);
+ }
+ if (n >= ret.nasect) {
+ int i;
+ ret.asect = sresize(ret.asect, n+1, alignstruct);
+ for (i = ret.nasect; i <= n; i++)
+ ret.asect[i] = ret.asect[ret.nasect-1];
+ ret.nasect = n+1;
+ }
+ ret.asect[n].underline = q;
+ } else if (!ustricmp(p->keyword, L"text-title-underline")) {
+ ret.atitle.underline = uadv(p->keyword);
+ } else if (!ustricmp(p->keyword, L"info-bullet")) {
+ ret.bullet = uadv(p->keyword);
+ } else if (!ustricmp(p->keyword, L"info-rule")) {
+ ret.rule = uadv(p->keyword);
+ } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
+ ret.listsuffix = uadv(p->keyword);
+ } else if (!ustricmp(p->keyword, L"info-emphasis")) {
+ if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
+ ret.startemph = uadv(p->keyword);
+ ret.endemph = uadv(ret.startemph);
+ }
+ } else if (!ustricmp(p->keyword, L"info-quotes")) {
+ if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
+ ret.lquote = uadv(p->keyword);
+ ret.rquote = uadv(ret.lquote);
+ }
}
}
}
+ /*
+ * Now process fallbacks on quote characters, underlines, the
+ * rule character, the emphasis characters, and bullets.
+ */
+ while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
+ (!cvt_ok(ret.charset, ret.lquote) ||
+ !cvt_ok(ret.charset, ret.rquote))) {
+ ret.lquote = uadv(ret.rquote);
+ ret.rquote = uadv(ret.lquote);
+ }
+
+ while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
+ (!cvt_ok(ret.charset, ret.startemph) ||
+ !cvt_ok(ret.charset, ret.endemph))) {
+ ret.startemph = uadv(ret.endemph);
+ ret.endemph = uadv(ret.startemph);
+ }
+
+ while (*ret.atitle.underline && *uadv(ret.atitle.underline) &&
+ !cvt_ok(ret.charset, ret.atitle.underline))
+ ret.atitle.underline = uadv(ret.atitle.underline);
+
+ while (*ret.achapter.underline && *uadv(ret.achapter.underline) &&
+ !cvt_ok(ret.charset, ret.achapter.underline))
+ ret.achapter.underline = uadv(ret.achapter.underline);
+
+ for (n = 0; n < ret.nasect; n++) {
+ while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) &&
+ !cvt_ok(ret.charset, ret.asect[n].underline))
+ ret.asect[n].underline = uadv(ret.asect[n].underline);
+ }
+
+ while (*ret.bullet && *uadv(ret.bullet) &&
+ !cvt_ok(ret.charset, ret.bullet))
+ ret.bullet = uadv(ret.bullet);
+
+ while (*ret.rule && *uadv(ret.rule) &&
+ !cvt_ok(ret.charset, ret.rule))
+ ret.rule = uadv(ret.rule);
+
return ret;
}
paragraph *info_config_filename(char *filename)
{
- paragraph *p;
- wchar_t *ufilename, *up;
- int len;
-
- p = mknew(paragraph);
- memset(p, 0, sizeof(*p));
- p->type = para_Config;
- p->next = NULL;
- p->fpos.filename = "<command line>";
- p->fpos.line = p->fpos.col = -1;
-
- ufilename = ufroma_dup(filename);
- len = ustrlen(ufilename) + 2 + lenof(L"info-filename");
- p->keyword = mknewa(wchar_t, len);
- up = p->keyword;
- ustrcpy(up, L"info-filename");
- up = uadv(up);
- ustrcpy(up, ufilename);
- up = uadv(up);
- *up = L'\0';
- assert(up - p->keyword < len);
- sfree(ufilename);
-
- return p;
+ return cmdline_cfg_simple("info-filename", filename, NULL);
}
void info_backend(paragraph *sourceform, keywordlist *keywords,
- indexdata *idx) {
+ indexdata *idx, void *unused) {
paragraph *p;
infoconfig conf;
word *prefix, *body, *wp;
word spaceword;
- char *prefixextra;
+ wchar_t *prefixextra;
int nesting, nestindent;
int indentb, indenta;
int filepos;
int has_index;
- rdstringc intro_text = { 0, 0, NULL };
+ info_data intro_text = EMPTY_INFO_DATA;
node *topnode, *currnode;
word bullet;
FILE *fp;
- /*
- * FIXME
- */
- int width = 70, listindentbefore = 1, listindentafter = 3;
- int indent_code = 2, index_width = 40;
-
- IGNORE(keywords); /* we don't happen to need this */
- IGNORE(idx); /* or this */
+ IGNORE(unused);
conf = info_configure(sourceform);
/*
* Go through and create a node for each section.
*/
- topnode = info_node_new("Top");
+ topnode = info_node_new("Top", conf.charset);
currnode = topnode;
for (p = sourceform; p; p = p->next) switch (p->type) {
/*
node *newnode, *upnode;
char *nodename;
- nodename = info_node_name(p);
- newnode = info_node_new(nodename);
+ nodename = info_node_name_for_para(p, &conf);
+ newnode = info_node_new(nodename, conf.charset);
sfree(nodename);
p->private_data = newnode;
currnode = newnode;
}
break;
+ default:
+ p->private_data = NULL;
+ break;
}
/*
indexentry *entry;
for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
- info_idx *ii = mknew(info_idx);
- rdstringc rs = { 0, 0, NULL };
+ info_idx *ii = snew(info_idx);
+ info_data id = EMPTY_INFO_DATA;
+
+ id.charset = conf.charset;
ii->nnodes = ii->nodesize = 0;
ii->nodes = NULL;
- info_rdaddwc(&rs, entry->text, NULL, FALSE);
- /*
- * FIXME: splatter colons.
- */
- ii->text = rs.text;
+ ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
+
+ ii->text = id.output.text;
entry->backend_data = ii;
}
* good place to put the copyright notice and the version IDs.
* Also, Info directory entries are expected to go here.
*/
+ intro_text.charset = conf.charset;
- rdaddsc(&intro_text,
+ info_rdaddsc(&intro_text,
"This Info file generated by Halibut, ");
- rdaddsc(&intro_text, version);
- rdaddsc(&intro_text, "\n\n");
+ info_rdaddsc(&intro_text, version);
+ info_rdaddsc(&intro_text, "\n\n");
for (p = sourceform; p; p = p->next)
if (p->type == para_Config &&
char *s;
section = uadv(p->keyword);
- shortname = *section ? uadv(section) : NULL;
- longname = *shortname ? uadv(shortname) : NULL;
- kw = *longname ? uadv(longname) : NULL;
+ shortname = *section ? uadv(section) : L"";
+ longname = *shortname ? uadv(shortname) : L"";
+ kw = *longname ? uadv(longname) : L"";
if (!*longname) {
- error(err_infodirentry, &p->fpos);
+ err_cfginsufarg(&p->fpos, p->origkeyword, 3);
continue;
}
- rdaddsc(&intro_text, "INFO-DIR-SECTION ");
- s = utoa_dup(section);
- rdaddsc(&intro_text, s);
- sfree(s);
- rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
- s = utoa_dup(shortname);
- rdaddsc(&intro_text, s);
- sfree(s);
- rdaddsc(&intro_text, ": (");
+ info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
+ info_rdadds(&intro_text, section);
+ info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
+ info_rdadds(&intro_text, shortname);
+ info_rdaddsc(&intro_text, ": (");
s = dupstr(conf.filename);
if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
s[strlen(s)-5] = '\0';
- rdaddsc(&intro_text, s);
+ info_rdaddsc(&intro_text, s);
sfree(s);
- rdaddsc(&intro_text, ")");
+ info_rdaddsc(&intro_text, ")");
if (*kw) {
keyword *kwl = kw_lookup(keywords, kw);
if (kwl && kwl->para->private_data) {
node *n = (node *)kwl->para->private_data;
- rdaddsc(&intro_text, n->name);
+ info_rdaddsc(&intro_text, n->name);
}
}
- rdaddsc(&intro_text, ". ");
- s = utoa_dup(longname);
- rdaddsc(&intro_text, s);
- sfree(s);
- rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
+ info_rdaddsc(&intro_text, ". ");
+ info_rdadds(&intro_text, longname);
+ info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
}
for (p = sourceform; p; p = p->next)
if (p->type == para_Copyright)
info_para(&intro_text, NULL, NULL, p->words, keywords,
- 0, 0, width);
+ 0, 0, conf.width, &conf);
for (p = sourceform; p; p = p->next)
if (p->type == para_VersionID)
- info_versionid(&intro_text, p->words);
+ info_versionid(&intro_text, p->words, &conf);
- if (intro_text.text[intro_text.pos-1] != '\n')
- rdaddc(&intro_text, '\n');
+ if (intro_text.output.text[intro_text.output.pos-1] != '\n')
+ info_rdaddc(&intro_text, '\n');
/* Do the title */
for (p = sourceform; p; p = p->next)
if (p->type == para_Title)
- info_heading(&topnode->text, NULL, p->words, width);
+ info_heading(&topnode->text, NULL, p->words,
+ conf.atitle, conf.width, &conf);
- nestindent = listindentbefore + listindentafter;
+ nestindent = conf.listindentbefore + conf.listindentafter;
nesting = 0;
currnode = topnode;
assert(currnode->up);
if (!currnode->up->started_menu) {
- rdaddsc(&currnode->up->text, "* Menu:\n\n");
+ info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
currnode->up->started_menu = TRUE;
}
- info_menu_item(&currnode->up->text, currnode, p);
+ info_menu_item(&currnode->up->text, currnode, p, &conf);
has_index |= info_check_index(p->words, currnode, idx);
- info_heading(&currnode->text, p->kwtext, p->words, width);
+ if (p->type == para_Chapter || p->type == para_Appendix ||
+ p->type == para_UnnumberedChapter)
+ info_heading(&currnode->text, p->kwtext, p->words,
+ conf.achapter, conf.width, &conf);
+ else
+ info_heading(&currnode->text, p->kwtext, p->words,
+ conf.asect[p->aux>=conf.nasect?conf.nasect-1:p->aux],
+ conf.width, &conf);
nesting = 0;
break;
case para_Rule:
- info_rule(&currnode->text, nesting, width - nesting);
+ info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
break;
case para_Normal:
bullet.next = NULL;
bullet.alt = NULL;
bullet.type = word_Normal;
- bullet.text = L"-"; /* FIXME: configurability */
+ bullet.text = conf.bullet;
prefix = •
prefixextra = NULL;
- indentb = listindentbefore;
- indenta = listindentafter;
+ indentb = conf.listindentbefore;
+ indenta = conf.listindentafter;
} else if (p->type == para_NumberedList) {
prefix = p->kwtext;
- prefixextra = "."; /* FIXME: configurability */
- indentb = listindentbefore;
- indenta = listindentafter;
+ prefixextra = conf.listsuffix;
+ indentb = conf.listindentbefore;
+ indenta = conf.listindentafter;
} else if (p->type == para_Description) {
prefix = NULL;
prefixextra = NULL;
- indentb = listindentbefore;
- indenta = listindentafter;
+ indentb = conf.listindentbefore;
+ indenta = conf.listindentafter;
} else {
prefix = NULL;
prefixextra = NULL;
}
info_para(&currnode->text, prefix, prefixextra, body, keywords,
nesting + indentb, indenta,
- width - nesting - indentb - indenta);
+ conf.width - nesting - indentb - indenta, &conf);
if (wp) {
wp->next = NULL;
free_word_list(body);
case para_Code:
info_codepara(&currnode->text, p->words,
- nesting + indent_code,
- width - nesting - 2 * indent_code);
+ nesting + conf.indent_code,
+ conf.width - nesting - 2 * conf.indent_code);
break;
}
node *newnode;
int i, j, k;
indexentry *entry;
+ char *nodename;
+
+ nodename = info_node_name_for_text(conf.index_text, &conf);
+ newnode = info_node_new(nodename, conf.charset);
+ sfree(nodename);
- newnode = info_node_new("Index");
newnode->up = topnode;
currnode->next = newnode;
newnode->prev = currnode;
currnode->listnext = newnode;
- rdaddsc(&newnode->text, "Index\n-----\n\n* Menu:\n\n");
+ k = info_rdadds(&newnode->text, conf.index_text);
+ info_rdaddsc(&newnode->text, "\n");
+ while (k > 0) {
+ info_rdadds(&newnode->text, conf.achapter.underline);
+ k -= ustrwid(conf.achapter.underline, conf.charset);
+ }
+ info_rdaddsc(&newnode->text, "\n\n");
- info_menu_item(&topnode->text, newnode, NULL);
+ info_menu_item(&topnode->text, newnode, NULL, &conf);
for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
info_idx *ii = (info_idx *)entry->backend_data;
for (j = 0; j < ii->nnodes; j++) {
- int pos0 = newnode->text.pos;
- rdaddsc(&newnode->text, "* ");
/*
* When we have multiple references for a single
* index term, we only display the actual term on
* really are the same.
*/
if (j == 0)
- rdaddsc(&newnode->text, ii->text);
- for (k = newnode->text.pos - pos0; k < index_width; k++)
- rdaddc(&newnode->text, ' ');
- rdaddsc(&newnode->text, ": ");
- rdaddsc(&newnode->text, ii->nodes[j]->name);
- rdaddsc(&newnode->text, ".\n");
+ info_rdaddsc(&newnode->text, ii->text);
+ for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
+ info_rdaddc(&newnode->text, ' ');
+ info_rdaddsc(&newnode->text, " *Note ");
+ info_rdaddsc(&newnode->text, ii->nodes[j]->name);
+ info_rdaddsc(&newnode->text, "::\n");
}
}
}
* and the node line at the top.
*/
for (currnode = topnode; currnode; currnode = currnode->listnext) {
- char *origtext = currnode->text.text;
- currnode->text.text = NULL;
- currnode->text.pos = currnode->text.size = 0;
- rdaddsc(&currnode->text, "\037\nFile: ");
- rdaddsc(&currnode->text, conf.filename);
- rdaddsc(&currnode->text, ", Node: ");
- rdaddsc(&currnode->text, currnode->name);
+ char *origtext = currnode->text.output.text;
+ currnode->text = empty_info_data;
+ currnode->text.charset = conf.charset;
+ info_rdaddsc(&currnode->text, "\037\nFile: ");
+ info_rdaddsc(&currnode->text, conf.filename);
+ info_rdaddsc(&currnode->text, ", Node: ");
+ info_rdaddsc(&currnode->text, currnode->name);
if (currnode->prev) {
- rdaddsc(&currnode->text, ", Prev: ");
- rdaddsc(&currnode->text, currnode->prev->name);
+ info_rdaddsc(&currnode->text, ", Prev: ");
+ info_rdaddsc(&currnode->text, currnode->prev->name);
}
- rdaddsc(&currnode->text, ", Up: ");
- rdaddsc(&currnode->text, (currnode->up ?
- currnode->up->name : "(dir)"));
+ info_rdaddsc(&currnode->text, ", Up: ");
+ info_rdaddsc(&currnode->text, (currnode->up ?
+ currnode->up->name : "(dir)"));
if (currnode->next) {
- rdaddsc(&currnode->text, ", Next: ");
- rdaddsc(&currnode->text, currnode->next->name);
+ info_rdaddsc(&currnode->text, ", Next: ");
+ info_rdaddsc(&currnode->text, currnode->next->name);
}
- rdaddsc(&currnode->text, "\n\n");
- rdaddsc(&currnode->text, origtext);
+ info_rdaddsc(&currnode->text, "\n\n");
+ info_rdaddsc(&currnode->text, origtext);
/*
* Just make _absolutely_ sure we end with a newline.
*/
- if (currnode->text.text[currnode->text.pos-1] != '\n')
- rdaddc(&currnode->text, '\n');
+ if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
+ info_rdaddc(&currnode->text, '\n');
sfree(origtext);
}
/*
* Compute the offsets for the tag table.
*/
- filepos = intro_text.pos;
+ filepos = intro_text.output.pos;
for (currnode = topnode; currnode; currnode = currnode->listnext) {
currnode->pos = filepos;
- filepos += currnode->text.pos;
+ filepos += currnode->text.output.pos;
}
/*
* Split into sub-files.
*/
if (conf.maxfilesize > 0) {
- int currfilesize = intro_text.pos, currfilenum = 1;
+ int currfilesize = intro_text.output.pos, currfilenum = 1;
for (currnode = topnode; currnode; currnode = currnode->listnext) {
- if (currfilesize > intro_text.pos &&
- currfilesize + currnode->text.pos > conf.maxfilesize) {
+ if (currfilesize > intro_text.output.pos &&
+ currfilesize + currnode->text.output.pos > conf.maxfilesize) {
currfilenum++;
- currfilesize = intro_text.pos;
+ currfilesize = intro_text.output.pos;
}
currnode->filenum = currfilenum;
- currfilesize += currnode->text.pos;
+ currfilesize += currnode->text.output.pos;
}
}
*/
fp = fopen(conf.filename, "w");
if (!fp) {
- error(err_cantopenw, conf.filename);
+ err_cantopenw(conf.filename);
return;
}
- fputs(intro_text.text, fp);
+ fputs(intro_text.output.text, fp);
if (conf.maxfilesize == 0) {
for (currnode = topnode; currnode; currnode = currnode->listnext)
- fputs(currnode->text.text, fp);
+ fputs(currnode->text.output.text, fp);
} else {
int filenum = 0;
fprintf(fp, "\037\nIndirect:\n");
if (fp)
fclose(fp);
- fname = mknewa(char, strlen(conf.filename) + 40);
+ fname = snewn(strlen(conf.filename) + 40, char);
sprintf(fname, "%s-%d", conf.filename, filenum);
fp = fopen(fname, "w");
if (!fp) {
- error(err_cantopenw, fname);
+ err_cantopenw(fname);
return;
}
sfree(fname);
- fputs(intro_text.text, fp);
+ fputs(intro_text.output.text, fp);
}
- fputs(currnode->text.text, fp);
+ fputs(currnode->text.output.text, fp);
}
if (fp)
if (ii->nnodes >= ii->nodesize) {
ii->nodesize += 32;
- ii->nodes = resize(ii->nodes, ii->nodesize);
+ ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
}
ii->nodes[ii->nnodes++] = n;
return ret;
}
-/*
- * Convert a wide string into a string of chars. If `result' is
- * non-NULL, mallocs the resulting string and stores a pointer to
- * it in `*result'. If `result' is NULL, merely checks whether all
- * characters in the string are feasible for the output character
- * set.
- *
- * Return is nonzero if all characters are OK. If not all
- * characters are OK but `result' is non-NULL, a result _will_
- * still be generated!
- */
-static int info_convert(wchar_t *s, char **result) {
- /*
- * FIXME. Currently this is ISO8859-1 only.
- */
- int doing = (result != 0);
- int ok = TRUE;
- char *p = NULL;
- int plen = 0, psize = 0;
-
- for (; *s; s++) {
- wchar_t c = *s;
- char outc;
-
- if ((c >= 32 && c <= 126) ||
- (c >= 160 && c <= 255)) {
- /* Char is OK. */
- outc = (char)c;
- } else {
- /* Char is not OK. */
- ok = FALSE;
- outc = 0xBF; /* approximate the good old DEC `uh?' */
- }
- if (doing) {
- if (plen >= psize) {
- psize = plen + 256;
- p = resize(p, psize);
- }
- p[plen++] = outc;
- }
- }
- if (doing) {
- p = resize(p, plen+1);
- p[plen] = '\0';
- *result = p;
- }
- return ok;
-}
-
static word *info_transform_wordlist(word *words, keywordlist *keywords)
{
word *ret = dup_word_list(words);
* In Info, we do nothing special for xrefs to
* numbered list items or bibliography entries.
*/
- break;
+ continue;
} else {
/*
* An xref to a different section has its text
return ret;
}
-static void info_rdaddwc(rdstringc *rs, word *words, word *end, int xrefs) {
- char *c;
+static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
+ infoconfig *cfg) {
+ int ret = 0;
for (; words && words != end; words = words->next) switch (words->type) {
case word_HyperLink:
if (towordstyle(words->type) == word_Emph &&
(attraux(words->aux) == attr_First ||
attraux(words->aux) == attr_Only))
- rdaddc(rs, '_'); /* FIXME: configurability */
+ ret += info_rdadds(id, cfg->startemph);
else if (towordstyle(words->type) == word_Code &&
(attraux(words->aux) == attr_First ||
attraux(words->aux) == attr_Only))
- rdaddc(rs, '`'); /* FIXME: configurability */
+ ret += info_rdadds(id, cfg->lquote);
if (removeattr(words->type) == word_Normal) {
- if (info_convert(words->text, &c))
- rdaddsc(rs, c);
+ if (cvt_ok(id->charset, words->text) || !words->alt)
+ ret += info_rdadds(id, words->text);
else
- info_rdaddwc(rs, words->alt, NULL, FALSE);
- sfree(c);
+ ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
} else if (removeattr(words->type) == word_WhiteSpace) {
- rdaddc(rs, ' ');
+ ret += info_rdadd(id, L' ');
} else if (removeattr(words->type) == word_Quote) {
- rdaddc(rs, quoteaux(words->aux) == quote_Open ? '`' : '\'');
- /* FIXME: configurability */
+ ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
+ cfg->lquote : cfg->rquote);
}
if (towordstyle(words->type) == word_Emph &&
(attraux(words->aux) == attr_Last ||
attraux(words->aux) == attr_Only))
- rdaddc(rs, '_'); /* FIXME: configurability */
+ ret += info_rdadds(id, cfg->endemph);
else if (towordstyle(words->type) == word_Code &&
(attraux(words->aux) == attr_Last ||
attraux(words->aux) == attr_Only))
- rdaddc(rs, '\''); /* FIXME: configurability */
+ ret += info_rdadds(id, cfg->rquote);
break;
case word_UpperXref:
case word_LowerXref:
if (xrefs && words->private_data) {
- rdaddsc(rs, "*Note ");
- rdaddsc(rs, ((node *)words->private_data)->name);
- rdaddsc(rs, "::");
+ /*
+ * This bit is structural and so must be done in char
+ * rather than wchar_t.
+ */
+ ret += info_rdaddsc(id, "*Note ");
+ ret += info_rdaddsc(id, ((node *)words->private_data)->name);
+ ret += info_rdaddsc(id, "::");
}
break;
}
+
+ return ret;
}
-static int info_width_internal(word *words, int xrefs);
+static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
-static int info_width_internal_list(word *words, int xrefs) {
+static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
int w = 0;
while (words) {
- w += info_width_internal(words, xrefs);
+ w += info_width_internal(words, xrefs, cfg);
words = words->next;
}
return w;
}
-static int info_width_internal(word *words, int xrefs) {
+static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
+ int wid;
+ int attr;
+
switch (words->type) {
case word_HyperLink:
case word_HyperEnd:
case word_IndexRef:
return 0;
+ case word_UpperXref:
+ case word_LowerXref:
+ if (xrefs && words->private_data) {
+ /* "*Note " plus "::" comes to 8 characters */
+ return 8 + strwid(((node *)words->private_data)->name,
+ cfg->charset);
+ } else
+ return 0;
+ }
+
+ assert(words->type < word_internal_endattrs);
+
+ wid = 0;
+ attr = towordstyle(words->type);
+
+ if (attr == word_Emph || attr == word_Code) {
+ if (attraux(words->aux) == attr_Only ||
+ attraux(words->aux) == attr_First)
+ wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
+ cfg->charset);
+ }
+ if (attr == word_Emph || attr == word_Code) {
+ if (attraux(words->aux) == attr_Only ||
+ attraux(words->aux) == attr_Last)
+ wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
+ cfg->charset);
+ }
+
+ switch (words->type) {
case word_Normal:
case word_Emph:
case word_Code:
case word_WeakCode:
- return (((words->type == word_Emph ||
- words->type == word_Code)
- ? (attraux(words->aux) == attr_Only ? 2 :
- attraux(words->aux) == attr_Always ? 0 : 1)
- : 0) +
- (info_convert(words->text, NULL) ?
- ustrlen(words->text) :
- info_width_internal_list(words->alt, xrefs)));
+ if (cvt_ok(cfg->charset, words->text) || !words->alt)
+ wid += ustrwid(words->text, cfg->charset);
+ else
+ wid += info_width_internal_list(words->alt, xrefs, cfg);
+ return wid;
case word_WhiteSpace:
case word_EmphSpace:
case word_WkCodeQuote:
assert(words->type != word_CodeQuote &&
words->type != word_WkCodeQuote);
- return (((towordstyle(words->type) == word_Emph ||
- towordstyle(words->type) == word_Code)
- ? (attraux(words->aux) == attr_Only ? 2 :
- attraux(words->aux) == attr_Always ? 0 : 1)
- : 0) + 1);
-
- case word_UpperXref:
- case word_LowerXref:
- if (xrefs && words->private_data) {
- /* "*Note " plus "::" comes to 8 characters */
- return 8 + strlen(((node *)words->private_data)->name);
- }
- break;
+ if (removeattr(words->type) == word_Quote) {
+ if (quoteaux(words->aux) == quote_Open)
+ wid += ustrwid(cfg->lquote, cfg->charset);
+ else
+ wid += ustrwid(cfg->rquote, cfg->charset);
+ } else
+ wid++; /* space */
}
- return 0; /* should never happen */
+ return wid;
}
-static int info_width_noxrefs(word *words)
+static int info_width_noxrefs(void *ctx, word *words)
{
- return info_width_internal(words, FALSE);
+ return info_width_internal(words, FALSE, (infoconfig *)ctx);
}
-static int info_width_xrefs(word *words)
+static int info_width_xrefs(void *ctx, word *words)
{
- return info_width_internal(words, TRUE);
+ return info_width_internal(words, TRUE, (infoconfig *)ctx);
}
-static void info_heading(rdstringc *text, word *tprefix,
- word *words, int width) {
- rdstringc t = { 0, 0, NULL };
- int margin, length;
+static void info_heading(info_data *text, word *tprefix,
+ word *words, alignstruct align,
+ int width, infoconfig *cfg) {
+ int length;
int firstlinewidth, wrapwidth;
- int i;
wrappedline *wrapping, *p;
+ length = 0;
if (tprefix) {
- info_rdaddwc(&t, tprefix, NULL, FALSE);
- rdaddsc(&t, ": "); /* FIXME: configurability */
+ length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
+ length += info_rdadds(text, cfg->sectsuffix);
}
- margin = length = (t.text ? strlen(t.text) : 0);
- margin = 0;
- firstlinewidth = width - length;
wrapwidth = width;
+ firstlinewidth = width - length;
- wrapping = wrap_para(words, firstlinewidth, wrapwidth, info_width_noxrefs);
+ wrapping = wrap_para(words, firstlinewidth, wrapwidth,
+ info_width_noxrefs, cfg, 0);
for (p = wrapping; p; p = p->next) {
- info_rdaddwc(&t, p->begin, p->end, FALSE);
- length = (t.text ? strlen(t.text) : 0);
- for (i = 0; i < margin; i++)
- rdaddc(text, ' ');
- rdaddsc(text, t.text);
- rdaddc(text, '\n');
- for (i = 0; i < margin; i++)
- rdaddc(text, ' ');
- while (length--)
- rdaddc(text, '-');
- rdaddc(text, '\n');
- margin = 0;
- sfree(t.text);
- t = empty_rdstringc;
+ length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
+ info_rdadd(text, L'\n');
+ if (*align.underline) {
+ while (length > 0) {
+ info_rdadds(text, align.underline);
+ length -= ustrwid(align.underline, cfg->charset);
+ }
+ info_rdadd(text, L'\n');
+ }
+ length = 0;
}
wrap_free(wrapping);
- rdaddc(text, '\n');
-
- sfree(t.text);
+ info_rdadd(text, L'\n');
}
-static void info_rule(rdstringc *text, int indent, int width) {
- while (indent--) rdaddc(text, ' ');
- while (width--) rdaddc(text, '-');
- rdaddc(text, '\n');
- rdaddc(text, '\n');
+static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
+{
+ while (indent--) info_rdadd(text, L' ');
+ while (width > 0) {
+ info_rdadds(text, cfg->rule);
+ width -= ustrwid(cfg->rule, cfg->charset);
+ }
+ info_rdadd(text, L'\n');
+ info_rdadd(text, L'\n');
}
-static void info_para(rdstringc *text, word *prefix, char *prefixextra,
- word *input, keywordlist *keywords,
- int indent, int extraindent, int width) {
+static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
+ word *input, keywordlist *keywords, int indent,
+ int extraindent, int width, infoconfig *cfg) {
wrappedline *wrapping, *p;
word *words;
- rdstringc pfx = { 0, 0, NULL };
int e;
int i;
int firstlinewidth = width;
words = info_transform_wordlist(input, keywords);
if (prefix) {
- info_rdaddwc(&pfx, prefix, NULL, FALSE);
- if (prefixextra)
- rdaddsc(&pfx, prefixextra);
for (i = 0; i < indent; i++)
- rdaddc(text, ' ');
- rdaddsc(text, pfx.text);
+ info_rdadd(text, L' ');
+ e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
+ if (prefixextra)
+ e += info_rdadds(text, prefixextra);
/* If the prefix is too long, shorten the first line to fit. */
- e = extraindent - strlen(pfx.text);
+ e = extraindent - e;
if (e < 0) {
firstlinewidth += e; /* this decreases it, since e < 0 */
if (firstlinewidth < 0) {
e = indent + extraindent;
firstlinewidth = width;
- rdaddc(text, '\n');
+ info_rdadd(text, L'\n');
} else
e = 0;
}
- sfree(pfx.text);
} else
e = indent + extraindent;
- wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs);
+ wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
+ cfg, 0);
for (p = wrapping; p; p = p->next) {
for (i = 0; i < e; i++)
- rdaddc(text, ' ');
- info_rdaddwc(text, p->begin, p->end, TRUE);
- rdaddc(text, '\n');
+ info_rdadd(text, L' ');
+ info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
+ info_rdadd(text, L'\n');
e = indent + extraindent;
}
wrap_free(wrapping);
- rdaddc(text, '\n');
+ info_rdadd(text, L'\n');
free_word_list(words);
}
-static void info_codepara(rdstringc *text, word *words,
+static void info_codepara(info_data *text, word *words,
int indent, int width) {
int i;
for (; words; words = words->next) if (words->type == word_WeakCode) {
- char *c;
- info_convert(words->text, &c);
- if (strlen(c) > (size_t)width) {
+ for (i = 0; i < indent; i++)
+ info_rdadd(text, L' ');
+ if (info_rdadds(text, words->text) > width) {
/* FIXME: warn */
}
- for (i = 0; i < indent; i++)
- rdaddc(text, ' ');
- rdaddsc(text, c);
- rdaddc(text, '\n');
- sfree(c);
+ info_rdadd(text, L'\n');
}
- rdaddc(text, '\n');
+ info_rdadd(text, L'\n');
}
-static void info_versionid(rdstringc *text, word *words) {
- rdaddc(text, '['); /* FIXME: configurability */
- info_rdaddwc(text, words, NULL, FALSE);
- rdaddsc(text, "]\n");
+static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
+ info_rdadd(text, L'[');
+ info_rdaddwc(text, words, NULL, FALSE, cfg);
+ info_rdadds(text, L"]\n");
}
-static node *info_node_new(char *name)
+static node *info_node_new(char *name, int charset)
{
node *n;
- n = mknew(node);
- n->text.text = NULL;
- n->text.pos = n->text.size = 0;
+ n = snew(node);
+ n->text = empty_info_data;
+ n->text.charset = charset;
n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
n->name = dupstr(name);
n->started_menu = FALSE;
return n;
}
-static char *info_node_name(paragraph *p)
+static char *info_node_name_core(info_data *id, filepos *fpos)
+{
+ char *p, *q;
+
+ /*
+ * We cannot have commas, colons or parentheses in a node name.
+ * Remove any that we find, with a warning.
+ */
+ p = q = id->output.text;
+ while (*p) {
+ if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
+ err_infonodechar(fpos, *p);
+ } else {
+ *q++ = *p;
+ }
+ p++;
+ }
+ *q = '\0';
+
+ return id->output.text;
+}
+
+static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
{
- rdstringc rsc = { 0, 0, NULL };
- info_rdaddwc(&rsc, p->kwtext ? p->kwtext : p->words, NULL, FALSE);
- return rsc.text;
+ info_data id = EMPTY_INFO_DATA;
+
+ id.charset = cfg->charset;
+ info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
+ NULL, FALSE, cfg);
+ info_rdaddsc(&id, NULL);
+
+ return info_node_name_core(&id, &par->fpos);
}
-static void info_menu_item(rdstringc *text, node *n, paragraph *p)
+static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
+{
+ info_data id = EMPTY_INFO_DATA;
+
+ id.charset = cfg->charset;
+ info_rdadds(&id, text);
+ info_rdaddsc(&id, NULL);
+
+ return info_node_name_core(&id, NULL);
+}
+
+static void info_menu_item(info_data *text, node *n, paragraph *p,
+ infoconfig *cfg)
{
/*
* FIXME: Depending on how we're doing node names in this info
*
* * Chapter number: Node name.
*
- *
+ * This function mostly works in char rather than wchar_t,
+ * because a menu item is a structural component.
*/
- rdaddsc(text, "* ");
- rdaddsc(text, n->name);
- rdaddsc(text, "::");
+ info_rdaddsc(text, "* ");
+ info_rdaddsc(text, n->name);
+ info_rdaddsc(text, "::");
if (p) {
- rdaddc(text, ' ');
- info_rdaddwc(text, p->words, NULL, FALSE);
+ info_rdaddc(text, ' ');
+ info_rdaddwc(text, p->words, NULL, FALSE, cfg);
+ }
+ info_rdaddc(text, '\n');
+}
+
+/*
+ * These functions implement my wrapper on the rdadd* calls which
+ * allows me to switch arbitrarily between literal octet-string
+ * text and charset-translated Unicode. (Because no matter what
+ * character set I write the actual text in, I expect info readers
+ * to treat node names and file names literally and to expect
+ * keywords like `*Note' in their canonical form, so I have to take
+ * steps to ensure that those structural elements of the file
+ * aren't messed with.)
+ */
+static int info_rdadds(info_data *d, wchar_t const *wcs)
+{
+ if (!d->wcmode) {
+ d->state = charset_init_state;
+ d->wcmode = TRUE;
+ }
+
+ if (wcs) {
+ char buf[256];
+ int len, width, ret;
+
+ width = ustrwid(wcs, d->charset);
+
+ len = ustrlen(wcs);
+ while (len > 0) {
+ int prevlen = len;
+
+ ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
+ d->charset, &d->state, NULL);
+
+ assert(len < prevlen);
+
+ if (ret > 0) {
+ buf[ret] = '\0';
+ rdaddsc(&d->output, buf);
+ }
+ }
+
+ return width;
+ } else
+ return 0;
+}
+
+static int info_rdaddsc(info_data *d, char const *cs)
+{
+ if (d->wcmode) {
+ char buf[256];
+ int ret;
+
+ ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
+ d->charset, &d->state, NULL);
+ if (ret > 0) {
+ buf[ret] = '\0';
+ rdaddsc(&d->output, buf);
+ }
+
+ d->wcmode = FALSE;
}
- rdaddc(text, '\n');
+
+ if (cs) {
+ rdaddsc(&d->output, cs);
+ return strwid(cs, d->charset);
+ } else
+ return 0;
+}
+
+static int info_rdadd(info_data *d, wchar_t wc)
+{
+ wchar_t wcs[2];
+ wcs[0] = wc;
+ wcs[1] = L'\0';
+ return info_rdadds(d, wcs);
+}
+
+static int info_rdaddc(info_data *d, char c)
+{
+ char cs[2];
+ cs[0] = c;
+ cs[1] = '\0';
+ return info_rdaddsc(d, cs);
}