mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * Info backend for Halibut
   3  *
   4  * The Info file format isn't well-specified, and what specification
   5  * there is is scattered all over the place.  Sources include:
   6  *   (info), from GNU Texinfo.
   7  *   (texinfo), also from GNU Texinfo.
   8  *   (Emacs)Misc Help, and (emacs)Info Lookup, from GNU Emacs.
   9  *   info.el, from GNU Emacs.
  10  *
  11  * Possible future work:
  12  *
  13  *  - configurable choice of how to allocate node names?
  14  *     + possibly a template-like approach, choosing node names to
  15  *       be the full section title or perhaps the internal keyword?
  16  *     + neither of those seems quite right. Perhaps instead a
  17  *       Windows Help-like mechanism, where a magic config
  18  *       directive allows user choice of name for every node.
  19  *     + Only trouble with that is, now what happens to the section
  20  *       numbers? Do they become completely vestigial and just sit
  21  *       in the title text of each node? Or do we keep them in the
  22  *       menus somehow? I think people might occasionally want to
  23  *       go to a section by number, if only because all the _other_
  24  *       formats of the same document will reference the numbers
  25  *       all the time. So our menu lines could look like one of
  26  *       these:
  27  *        * Nodename: Section 1.2. Title of section.
  28  *        * Section 1.2: Nodename. Title of section.
  29  *
  30  *  - might be helpful to diagnose duplicate node names!
  31  *
  32  *  - Indices generated by makeinfo use a menu rather than a bunch of
  33  *    cross-references, which reduces visual clutter rather.  For
  34  *    singly-referenced items, it looks like:
  35  *      * toner cartridge, replacing:              Toner.
  36  *    It does a horrid job on multiply-referenced entries, though,
  37  *    perhaps because the name before the colon is meant to be unique.
  38  *    Info's 'i' command requires the use of a menu -- it fails to
  39  *    find any index entries at all with Halibut's current index format.
  40  *
  41  *  - The string "*note" is matched case-insensitively, so we could
  42  *    make things slightly less ugly by using the lower-case version
  43  *    when the user asks for \k.  Unfortunately, standalone Info seems
  44  *    to match node names case-sensitively, so we can't downcase that.
  45  *
  46  *  - The character encoding used in an Info file can be configured using
  47  *    an Emacs local variables block at the end, like this:
  48  *      Local Variables:
  49  *      coding: iso-8859-1
  50  *      End:
  51  */
  52
  53 #include <stdio.h>
  54 #include <stdlib.h>
  55 #include <assert.h>
  56 #include "halibut.h"
  57
  58 typedef struct {
  59     wchar_t *underline;
  60 } alignstruct;
  61
  62 typedef struct {
  63     char *filename;
  64     int maxfilesize;
  65     int charset;
  66     int listindentbefore, listindentafter;
  67     int indent_code, width, index_width;
  68     alignstruct atitle, achapter, *asect;
  69     int nasect;
  70     wchar_t *bullet, *listsuffix;
  71     wchar_t *startemph, *endemph;
  72     wchar_t *lquote, *rquote;
  73     wchar_t *sectsuffix;
  74     wchar_t *rule;
  75     wchar_t *index_text;
  76 } infoconfig;
  77
  78 typedef struct {
  79     rdstringc output;
  80     int charset;
  81     charset_state state;
  82     int wcmode;
  83 } info_data;
  84 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  85 static const info_data empty_info_data = EMPTY_INFO_DATA;
  86
  87 typedef struct node_tag node;
  88 struct node_tag {
  89     node *listnext;
  90     node *up, *prev, *next, *lastchild;
  91     int pos, started_menu, filenum;
  92     char *name;
  93     info_data text;
  94 };
  95
  96 typedef struct {
  97     char *text;
  98     int length;
  99     int nnodes, nodesize;
 100     node **nodes;
 101 } info_idx;
 102
 103 static int info_rdadd(info_data *, wchar_t);
 104 static int info_rdadds(info_data *, wchar_t const *);
 105 static int info_rdaddc(info_data *, char);
 106 static int info_rdaddsc(info_data *, char const *);
 107
 108 static void info_heading(info_data *, word *, word *, alignstruct, int,
 109                          infoconfig *);
 110 static void info_rule(info_data *, int, int, infoconfig *);
 111 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
 112                       int, int, int, infoconfig *);
 113 static void info_codepara(info_data *, word *, int, int);
 114 static void info_versionid(info_data *, word *, infoconfig *);
 115 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
 116 static word *info_transform_wordlist(word *, keywordlist *);
 117 static int info_check_index(word *, node *, indexdata *);
 118
 119 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
 120
 121 static node *info_node_new(char *name, int charset);
 122 static char *info_node_name_for_para(paragraph *p, infoconfig *);
 123 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
 124
 125 static infoconfig info_configure(paragraph *source) {
 126     infoconfig ret;
 127     paragraph *p;
 128     int n;
 129
 130     /*
 131      * Defaults.
 132      */
 133     ret.filename = dupstr("output.info");
 134     ret.maxfilesize = 64 << 10;
 135     ret.charset = CS_ASCII;
 136     ret.width = 70;
 137     ret.listindentbefore = 1;
 138     ret.listindentafter = 3;
 139     ret.indent_code = 2;
 140     ret.index_width = 40;
 141     ret.listsuffix = L".";
 142     ret.bullet = L"\x2022\0-\0\0";
 143     ret.rule = L"\x2500\0-\0\0";
 144     ret.startemph = L"_\0_\0\0";
 145     ret.endemph = uadv(ret.startemph);
 146     ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
 147     ret.rquote = uadv(ret.lquote);
 148     ret.sectsuffix = L": ";
 149     /*
 150      * Default underline characters are chosen to match those recognised by
 151      * Info-fontify-node.
 152      */
 153     ret.atitle.underline = L"*\0\0";
 154     ret.achapter.underline = L"=\0\0";
 155     ret.nasect = 2;
 156     ret.asect = snewn(ret.nasect, alignstruct);
 157     ret.asect[0].underline = L"-\0\0";
 158     ret.asect[1].underline = L".\0\0";
 159     ret.index_text = L"Index";
 160
 161     /*
 162      * Two-pass configuration so that we can pick up global config
 163      * (e.g. `quotes') before having it overridden by specific
 164      * config (`info-quotes'), irrespective of the order in which
 165      * they occur.
 166      */
 167     for (p = source; p; p = p->next) {
 168         if (p->type == para_Config) {
 169             if (!ustricmp(p->keyword, L"quotes")) {
 170                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 171                     ret.lquote = uadv(p->keyword);
 172                     ret.rquote = uadv(ret.lquote);
 173                 }
 174             } else if (!ustricmp(p->keyword, L"index")) {
 175                 ret.index_text = uadv(p->keyword);
 176             }
 177         }
 178     }
 179
 180     for (p = source; p; p = p->next) {
 181         if (p->type == para_Config) {
 182             if (!ustricmp(p->keyword, L"info-filename")) {
 183                 sfree(ret.filename);
 184                 ret.filename = dupstr(adv(p->origkeyword));
 185             } else if (!ustricmp(p->keyword, L"info-charset")) {
 186                 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
 187             } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
 188                 ret.maxfilesize = utoi(uadv(p->keyword));
 189             } else if (!ustricmp(p->keyword, L"info-width")) {
 190                 ret.width = utoi(uadv(p->keyword));
 191             } else if (!ustricmp(p->keyword, L"info-indent-code")) {
 192                 ret.indent_code = utoi(uadv(p->keyword));
 193             } else if (!ustricmp(p->keyword, L"info-index-width")) {
 194                 ret.index_width = utoi(uadv(p->keyword));
 195             } else if (!ustricmp(p->keyword, L"info-list-indent")) {
 196                 ret.listindentbefore = utoi(uadv(p->keyword));
 197             } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
 198                 ret.listindentafter = utoi(uadv(p->keyword));
 199             } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
 200                 ret.sectsuffix = uadv(p->keyword);
 201             } else if (!ustricmp(p->keyword, L"info-underline")) {
 202                 ret.atitle.underline = ret.achapter.underline =
 203                     uadv(p->keyword);
 204                 for (n = 0; n < ret.nasect; n++)
 205                     ret.asect[n].underline = ret.atitle.underline;
 206             } else if (!ustricmp(p->keyword, L"info-chapter-underline")) {
 207                 ret.achapter.underline = uadv(p->keyword);
 208             } else if (!ustricmp(p->keyword, L"info-section-underline")) {
 209                 wchar_t *q = uadv(p->keyword);
 210                 int n = 0;
 211                 if (uisdigit(*q)) {
 212                     n = utoi(q);
 213                     q = uadv(q);
 214                 }
 215                 if (n >= ret.nasect) {
 216                     int i;
 217                     ret.asect = sresize(ret.asect, n+1, alignstruct);
 218                     for (i = ret.nasect; i <= n; i++)
 219                         ret.asect[i] = ret.asect[ret.nasect-1];
 220                     ret.nasect = n+1;
 221                 }
 222                 ret.asect[n].underline = q;
 223             } else if (!ustricmp(p->keyword, L"text-title-underline")) {
 224                 ret.atitle.underline = uadv(p->keyword);
 225             } else if (!ustricmp(p->keyword, L"info-bullet")) {
 226                 ret.bullet = uadv(p->keyword);
 227             } else if (!ustricmp(p->keyword, L"info-rule")) {
 228                 ret.rule = uadv(p->keyword);
 229             } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
 230                 ret.listsuffix = uadv(p->keyword);
 231             } else if (!ustricmp(p->keyword, L"info-emphasis")) {
 232                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 233                     ret.startemph = uadv(p->keyword);
 234                     ret.endemph = uadv(ret.startemph);
 235                 }
 236             } else if (!ustricmp(p->keyword, L"info-quotes")) {
 237                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 238                     ret.lquote = uadv(p->keyword);
 239                     ret.rquote = uadv(ret.lquote);
 240                 }
 241             }
 242         }
 243     }
 244
 245     /*
 246      * Now process fallbacks on quote characters, underlines, the
 247      * rule character, the emphasis characters, and bullets.
 248      */
 249     while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
 250            (!cvt_ok(ret.charset, ret.lquote) ||
 251             !cvt_ok(ret.charset, ret.rquote))) {
 252         ret.lquote = uadv(ret.rquote);
 253         ret.rquote = uadv(ret.lquote);
 254     }
 255
 256     while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
 257            (!cvt_ok(ret.charset, ret.startemph) ||
 258             !cvt_ok(ret.charset, ret.endemph))) {
 259         ret.startemph = uadv(ret.endemph);
 260         ret.endemph = uadv(ret.startemph);
 261     }
 262
 263     while (*ret.atitle.underline && *uadv(ret.atitle.underline) &&
 264            !cvt_ok(ret.charset, ret.atitle.underline))
 265         ret.atitle.underline = uadv(ret.atitle.underline);
 266
 267     while (*ret.achapter.underline && *uadv(ret.achapter.underline) &&
 268            !cvt_ok(ret.charset, ret.achapter.underline))
 269         ret.achapter.underline = uadv(ret.achapter.underline);
 270
 271     for (n = 0; n < ret.nasect; n++) {
 272         while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) &&
 273                !cvt_ok(ret.charset, ret.asect[n].underline))
 274             ret.asect[n].underline = uadv(ret.asect[n].underline);
 275     }
 276
 277     while (*ret.bullet && *uadv(ret.bullet) &&
 278            !cvt_ok(ret.charset, ret.bullet))
 279         ret.bullet = uadv(ret.bullet);
 280
 281     while (*ret.rule && *uadv(ret.rule) &&
 282            !cvt_ok(ret.charset, ret.rule))
 283         ret.rule = uadv(ret.rule);
 284
 285     return ret;
 286 }
 287
 288 paragraph *info_config_filename(char *filename)
 289 {
 290     return cmdline_cfg_simple("info-filename", filename, NULL);
 291 }
 292
 293 void info_backend(paragraph *sourceform, keywordlist *keywords,
 294                   indexdata *idx, void *unused) {
 295     paragraph *p;
 296     infoconfig conf;
 297     word *prefix, *body, *wp;
 298     word spaceword;
 299     wchar_t *prefixextra;
 300     int nesting, nestindent;
 301     int indentb, indenta;
 302     int filepos;
 303     int has_index;
 304     info_data intro_text = EMPTY_INFO_DATA;
 305     node *topnode, *currnode;
 306     word bullet;
 307     FILE *fp;
 308
 309     IGNORE(unused);
 310
 311     conf = info_configure(sourceform);
 312
 313     /*
 314      * Go through and create a node for each section.
 315      */
 316     topnode = info_node_new("Top", conf.charset);
 317     currnode = topnode;
 318     for (p = sourceform; p; p = p->next) switch (p->type) {
 319         /*
 320          * Chapter titles.
 321          */
 322       case para_Chapter:
 323       case para_Appendix:
 324       case para_UnnumberedChapter:
 325       case para_Heading:
 326       case para_Subsect:
 327         {
 328             node *newnode, *upnode;
 329             char *nodename;
 330
 331             nodename = info_node_name_for_para(p, &conf);
 332             newnode = info_node_new(nodename, conf.charset);
 333             sfree(nodename);
 334
 335             p->private_data = newnode;
 336
 337             if (p->parent)
 338                 upnode = (node *)p->parent->private_data;
 339             else
 340                 upnode = topnode;
 341             assert(upnode);
 342             newnode->up = upnode;
 343
 344             currnode->next = newnode;
 345             newnode->prev = currnode;
 346
 347             currnode->listnext = newnode;
 348             currnode = newnode;
 349         }
 350         break;
 351       default:
 352         p->private_data = NULL;
 353         break;
 354     }
 355
 356     /*
 357      * Set up the display form of each index entry.
 358      */
 359     {
 360         int i;
 361         indexentry *entry;
 362
 363         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 364             info_idx *ii = snew(info_idx);
 365             info_data id = EMPTY_INFO_DATA;
 366
 367             id.charset = conf.charset;
 368
 369             ii->nnodes = ii->nodesize = 0;
 370             ii->nodes = NULL;
 371
 372             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
 373
 374             ii->text = id.output.text;
 375
 376             entry->backend_data = ii;
 377         }
 378     }
 379
 380     /*
 381      * An Info file begins with a piece of introductory text which
 382      * is apparently never shown anywhere. This seems to me to be a
 383      * good place to put the copyright notice and the version IDs.
 384      * Also, Info directory entries are expected to go here.
 385      */
 386     intro_text.charset = conf.charset;
 387
 388     info_rdaddsc(&intro_text,
 389             "This Info file generated by Halibut, ");
 390     info_rdaddsc(&intro_text, version);
 391     info_rdaddsc(&intro_text, "\n\n");
 392
 393     for (p = sourceform; p; p = p->next)
 394         if (p->type == para_Config &&
 395             !ustricmp(p->keyword, L"info-dir-entry")) {
 396             wchar_t *section, *shortname, *longname, *kw;
 397             char *s;
 398
 399             section = uadv(p->keyword);
 400             shortname = *section ? uadv(section) : L"";
 401             longname = *shortname ? uadv(shortname) : L"";
 402             kw = *longname ? uadv(longname) : L"";
 403
 404             if (!*longname) {
 405                 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
 406                 continue;
 407             }
 408
 409             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 410             info_rdadds(&intro_text, section);
 411             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 412             info_rdadds(&intro_text, shortname);
 413             info_rdaddsc(&intro_text, ": (");
 414             s = dupstr(conf.filename);
 415             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 416                 s[strlen(s)-5] = '\0';
 417             info_rdaddsc(&intro_text, s);
 418             sfree(s);
 419             info_rdaddsc(&intro_text, ")");
 420             if (*kw) {
 421                 keyword *kwl = kw_lookup(keywords, kw);
 422                 if (kwl && kwl->para->private_data) {
 423                     node *n = (node *)kwl->para->private_data;
 424                     info_rdaddsc(&intro_text, n->name);
 425                 }
 426             }
 427             info_rdaddsc(&intro_text, ".   ");
 428             info_rdadds(&intro_text, longname);
 429             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 430         }
 431
 432     for (p = sourceform; p; p = p->next)
 433         if (p->type == para_Copyright)
 434             info_para(&intro_text, NULL, NULL, p->words, keywords,
 435                       0, 0, conf.width, &conf);
 436
 437     for (p = sourceform; p; p = p->next)
 438         if (p->type == para_VersionID)
 439             info_versionid(&intro_text, p->words, &conf);
 440
 441     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 442         info_rdaddc(&intro_text, '\n');
 443
 444     /* Do the title */
 445     for (p = sourceform; p; p = p->next)
 446         if (p->type == para_Title)
 447             info_heading(&topnode->text, NULL, p->words,
 448                          conf.atitle, conf.width, &conf);
 449
 450     nestindent = conf.listindentbefore + conf.listindentafter;
 451     nesting = 0;
 452
 453     currnode = topnode;
 454
 455     /* Do the main document */
 456     for (p = sourceform; p; p = p->next) switch (p->type) {
 457
 458       case para_QuotePush:
 459         nesting += 2;
 460         break;
 461       case para_QuotePop:
 462         nesting -= 2;
 463         assert(nesting >= 0);
 464         break;
 465
 466       case para_LcontPush:
 467         nesting += nestindent;
 468         break;
 469       case para_LcontPop:
 470         nesting -= nestindent;
 471         assert(nesting >= 0);
 472         break;
 473
 474         /*
 475          * Things we ignore because we've already processed them or
 476          * aren't going to touch them in this pass.
 477          */
 478       case para_IM:
 479       case para_BR:
 480       case para_Biblio:                /* only touch BiblioCited */
 481       case para_VersionID:
 482       case para_NoCite:
 483       case para_Title:
 484         break;
 485
 486         /*
 487          * Chapter titles.
 488          */
 489       case para_Chapter:
 490       case para_Appendix:
 491       case para_UnnumberedChapter:
 492       case para_Heading:
 493       case para_Subsect:
 494         currnode = p->private_data;
 495         assert(currnode);
 496         assert(currnode->up);
 497
 498         if (!currnode->up->started_menu) {
 499             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 500             currnode->up->started_menu = TRUE;
 501         }
 502         info_menu_item(&currnode->up->text, currnode, p, &conf);
 503
 504         has_index |= info_check_index(p->words, currnode, idx);
 505         if (p->type == para_Chapter || p->type == para_Appendix ||
 506             p->type == para_UnnumberedChapter)
 507             info_heading(&currnode->text, p->kwtext, p->words,
 508                          conf.achapter, conf.width, &conf);
 509         else
 510             info_heading(&currnode->text, p->kwtext, p->words,
 511                          conf.asect[p->aux>=conf.nasect?conf.nasect-1:p->aux],
 512                          conf.width, &conf);
 513         nesting = 0;
 514         break;
 515
 516       case para_Rule:
 517         info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
 518         break;
 519
 520       case para_Normal:
 521       case para_Copyright:
 522       case para_DescribedThing:
 523       case para_Description:
 524       case para_BiblioCited:
 525       case para_Bullet:
 526       case para_NumberedList:
 527         has_index |= info_check_index(p->words, currnode, idx);
 528         if (p->type == para_Bullet) {
 529             bullet.next = NULL;
 530             bullet.alt = NULL;
 531             bullet.type = word_Normal;
 532             bullet.text = conf.bullet;
 533             prefix = &bullet;
 534             prefixextra = NULL;
 535             indentb = conf.listindentbefore;
 536             indenta = conf.listindentafter;
 537         } else if (p->type == para_NumberedList) {
 538             prefix = p->kwtext;
 539             prefixextra = conf.listsuffix;
 540             indentb = conf.listindentbefore;
 541             indenta = conf.listindentafter;
 542         } else if (p->type == para_Description) {
 543             prefix = NULL;
 544             prefixextra = NULL;
 545             indentb = conf.listindentbefore;
 546             indenta = conf.listindentafter;
 547         } else {
 548             prefix = NULL;
 549             prefixextra = NULL;
 550             indentb = indenta = 0;
 551         }
 552         if (p->type == para_BiblioCited) {
 553             body = dup_word_list(p->kwtext);
 554             for (wp = body; wp->next; wp = wp->next);
 555             wp->next = &spaceword;
 556             spaceword.next = p->words;
 557             spaceword.alt = NULL;
 558             spaceword.type = word_WhiteSpace;
 559             spaceword.text = NULL;
 560         } else {
 561             wp = NULL;
 562             body = p->words;
 563         }
 564         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 565                   nesting + indentb, indenta,
 566                   conf.width - nesting - indentb - indenta, &conf);
 567         if (wp) {
 568             wp->next = NULL;
 569             free_word_list(body);
 570         }
 571         break;
 572
 573       case para_Code:
 574         info_codepara(&currnode->text, p->words,
 575                       nesting + conf.indent_code,
 576                       conf.width - nesting - 2 * conf.indent_code);
 577         break;
 578     }
 579
 580     /*
 581      * Create an index node if required.
 582      */
 583     if (has_index) {
 584         node *newnode;
 585         int i, j, k;
 586         indexentry *entry;
 587         char *nodename;
 588
 589         nodename = info_node_name_for_text(conf.index_text, &conf);
 590         newnode = info_node_new(nodename, conf.charset);
 591         sfree(nodename);
 592
 593         newnode->up = topnode;
 594
 595         currnode->next = newnode;
 596         newnode->prev = currnode;
 597         currnode->listnext = newnode;
 598
 599         k = info_rdadds(&newnode->text, conf.index_text);
 600         info_rdaddsc(&newnode->text, "\n");
 601         while (k > 0) {
 602             info_rdadds(&newnode->text, conf.achapter.underline);
 603             k -= ustrwid(conf.achapter.underline, conf.charset);
 604         }
 605         info_rdaddsc(&newnode->text, "\n\n");
 606
 607         info_menu_item(&topnode->text, newnode, NULL, &conf);
 608
 609         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 610             info_idx *ii = (info_idx *)entry->backend_data;
 611
 612             for (j = 0; j < ii->nnodes; j++) {
 613                 /*
 614                  * When we have multiple references for a single
 615                  * index term, we only display the actual term on
 616                  * the first line, to make it clear that the terms
 617                  * really are the same.
 618                  */
 619                 if (j == 0)
 620                     info_rdaddsc(&newnode->text, ii->text);
 621                 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
 622                     info_rdaddc(&newnode->text, ' ');
 623                 info_rdaddsc(&newnode->text, "  *Note ");
 624                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 625                 info_rdaddsc(&newnode->text, "::\n");
 626             }
 627         }
 628     }
 629
 630     /*
 631      * Finalise the text of each node, by adding the ^_ delimiter
 632      * and the node line at the top.
 633      */
 634     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 635         char *origtext = currnode->text.output.text;
 636         currnode->text = empty_info_data;
 637         currnode->text.charset = conf.charset;
 638         info_rdaddsc(&currnode->text, "\037\nFile: ");
 639         info_rdaddsc(&currnode->text, conf.filename);
 640         info_rdaddsc(&currnode->text, ",  Node: ");
 641         info_rdaddsc(&currnode->text, currnode->name);
 642         if (currnode->prev) {
 643             info_rdaddsc(&currnode->text, ",  Prev: ");
 644             info_rdaddsc(&currnode->text, currnode->prev->name);
 645         }
 646         info_rdaddsc(&currnode->text, ",  Up: ");
 647         info_rdaddsc(&currnode->text, (currnode->up ?
 648                                        currnode->up->name : "(dir)"));
 649         if (currnode->next) {
 650             info_rdaddsc(&currnode->text, ",  Next: ");
 651             info_rdaddsc(&currnode->text, currnode->next->name);
 652         }
 653         info_rdaddsc(&currnode->text, "\n\n");
 654         info_rdaddsc(&currnode->text, origtext);
 655         /*
 656          * Just make _absolutely_ sure we end with a newline.
 657          */
 658         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 659             info_rdaddc(&currnode->text, '\n');
 660
 661         sfree(origtext);
 662     }
 663
 664     /*
 665      * Compute the offsets for the tag table.
 666      */
 667     filepos = intro_text.output.pos;
 668     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 669         currnode->pos = filepos;
 670         filepos += currnode->text.output.pos;
 671     }
 672
 673     /*
 674      * Split into sub-files.
 675      */
 676     if (conf.maxfilesize > 0) {
 677         int currfilesize = intro_text.output.pos, currfilenum = 1;
 678         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 679             if (currfilesize > intro_text.output.pos &&
 680                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 681                 currfilenum++;
 682                 currfilesize = intro_text.output.pos;
 683             }
 684             currnode->filenum = currfilenum;
 685             currfilesize += currnode->text.output.pos;
 686         }
 687     }
 688
 689     /*
 690      * Write the primary output file.
 691      */
 692     fp = fopen(conf.filename, "w");
 693     if (!fp) {
 694         error(err_cantopenw, conf.filename);
 695         return;
 696     }
 697     fputs(intro_text.output.text, fp);
 698     if (conf.maxfilesize == 0) {
 699         for (currnode = topnode; currnode; currnode = currnode->listnext)
 700             fputs(currnode->text.output.text, fp);
 701     } else {
 702         int filenum = 0;
 703         fprintf(fp, "\037\nIndirect:\n");
 704         for (currnode = topnode; currnode; currnode = currnode->listnext)
 705             if (filenum != currnode->filenum) {
 706                 filenum = currnode->filenum;
 707                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 708                         currnode->pos);
 709             }
 710     }
 711     fprintf(fp, "\037\nTag Table:\n");
 712     if (conf.maxfilesize > 0)
 713         fprintf(fp, "(Indirect)\n");
 714     for (currnode = topnode; currnode; currnode = currnode->listnext)
 715         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 716     fprintf(fp, "\037\nEnd Tag Table\n");
 717     fclose(fp);
 718
 719     /*
 720      * Write the subfiles.
 721      */
 722     if (conf.maxfilesize > 0) {
 723         int filenum = 0;
 724         fp = NULL;
 725
 726         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 727             if (filenum != currnode->filenum) {
 728                 char *fname;
 729
 730                 filenum = currnode->filenum;
 731
 732                 if (fp)
 733                     fclose(fp);
 734                 fname = snewn(strlen(conf.filename) + 40, char);
 735                 sprintf(fname, "%s-%d", conf.filename, filenum);
 736                 fp = fopen(fname, "w");
 737                 if (!fp) {
 738                     error(err_cantopenw, fname);
 739                     return;
 740                 }
 741                 sfree(fname);
 742                 fputs(intro_text.output.text, fp);
 743             }
 744             fputs(currnode->text.output.text, fp);
 745         }
 746
 747         if (fp)
 748             fclose(fp);
 749     }
 750 }
 751
 752 static int info_check_index(word *w, node *n, indexdata *idx)
 753 {
 754     int ret = 0;
 755
 756     for (; w; w = w->next) {
 757         if (w->type == word_IndexRef) {
 758             indextag *tag;
 759             int i;
 760
 761             tag = index_findtag(idx, w->text);
 762             if (!tag)
 763                 break;
 764
 765             for (i = 0; i < tag->nrefs; i++) {
 766                 indexentry *entry = tag->refs[i];
 767                 info_idx *ii = (info_idx *)entry->backend_data;
 768
 769                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 770                     /*
 771                      * If the same index term is indexed twice
 772                      * within the same section, we only want to
 773                      * mention it once in the index. So do nothing
 774                      * here.
 775                      */
 776                     continue;
 777                 }
 778
 779                 if (ii->nnodes >= ii->nodesize) {
 780                     ii->nodesize += 32;
 781                     ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
 782                 }
 783
 784                 ii->nodes[ii->nnodes++] = n;
 785
 786                 ret = 1;
 787             }
 788         }
 789     }
 790
 791     return ret;
 792 }
 793
 794 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 795 {
 796     word *ret = dup_word_list(words);
 797     word *w;
 798     keyword *kwl;
 799
 800     for (w = ret; w; w = w->next) {
 801         w->private_data = NULL;
 802         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 803             kwl = kw_lookup(keywords, w->text);
 804             if (kwl) {
 805                 if (kwl->para->type == para_NumberedList ||
 806                     kwl->para->type == para_BiblioCited) {
 807                     /*
 808                      * In Info, we do nothing special for xrefs to
 809                      * numbered list items or bibliography entries.
 810                      */
 811                     continue;
 812                 } else {
 813                     /*
 814                      * An xref to a different section has its text
 815                      * completely replaced.
 816                      */
 817                     word *w2, *w3, *w4;
 818                     w2 = w3 = w->next;
 819                     w4 = NULL;
 820                     while (w2) {
 821                         if (w2->type == word_XrefEnd) {
 822                             w4 = w2->next;
 823                             w2->next = NULL;
 824                             break;
 825                         }
 826                         w2 = w2->next;
 827                     }
 828                     free_word_list(w3);
 829
 830                     /*
 831                      * Now w is the UpperXref / LowerXref we
 832                      * started with, and w4 is the next word after
 833                      * the corresponding XrefEnd (if any). The
 834                      * simplest thing is just to stick a pointer to
 835                      * the target node structure in the private
 836                      * data field of the xref word, and let
 837                      * info_rdaddwc and friends read the node name
 838                      * out from there.
 839                      */
 840                     w->next = w4;
 841                     w->private_data = kwl->para->private_data;
 842                     assert(w->private_data);
 843                 }
 844             }
 845         }
 846     }
 847
 848     return ret;
 849 }
 850
 851 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
 852                         infoconfig *cfg) {
 853     int ret = 0;
 854
 855     for (; words && words != end; words = words->next) switch (words->type) {
 856       case word_HyperLink:
 857       case word_HyperEnd:
 858       case word_XrefEnd:
 859       case word_IndexRef:
 860         break;
 861
 862       case word_Normal:
 863       case word_Emph:
 864       case word_Code:
 865       case word_WeakCode:
 866       case word_WhiteSpace:
 867       case word_EmphSpace:
 868       case word_CodeSpace:
 869       case word_WkCodeSpace:
 870       case word_Quote:
 871       case word_EmphQuote:
 872       case word_CodeQuote:
 873       case word_WkCodeQuote:
 874         assert(words->type != word_CodeQuote &&
 875                words->type != word_WkCodeQuote);
 876         if (towordstyle(words->type) == word_Emph &&
 877             (attraux(words->aux) == attr_First ||
 878              attraux(words->aux) == attr_Only))
 879             ret += info_rdadds(id, cfg->startemph);
 880         else if (towordstyle(words->type) == word_Code &&
 881                  (attraux(words->aux) == attr_First ||
 882                   attraux(words->aux) == attr_Only))
 883             ret += info_rdadds(id, cfg->lquote);
 884         if (removeattr(words->type) == word_Normal) {
 885             if (cvt_ok(id->charset, words->text) || !words->alt)
 886                 ret += info_rdadds(id, words->text);
 887             else
 888                 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
 889         } else if (removeattr(words->type) == word_WhiteSpace) {
 890             ret += info_rdadd(id, L' ');
 891         } else if (removeattr(words->type) == word_Quote) {
 892             ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
 893                                cfg->lquote : cfg->rquote);
 894         }
 895         if (towordstyle(words->type) == word_Emph &&
 896             (attraux(words->aux) == attr_Last ||
 897              attraux(words->aux) == attr_Only))
 898             ret += info_rdadds(id, cfg->endemph);
 899         else if (towordstyle(words->type) == word_Code &&
 900                  (attraux(words->aux) == attr_Last ||
 901                   attraux(words->aux) == attr_Only))
 902             ret += info_rdadds(id, cfg->rquote);
 903         break;
 904
 905       case word_UpperXref:
 906       case word_LowerXref:
 907         if (xrefs && words->private_data) {
 908             /*
 909              * This bit is structural and so must be done in char
 910              * rather than wchar_t.
 911              */
 912             ret += info_rdaddsc(id, "*Note ");
 913             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 914             ret += info_rdaddsc(id, "::");
 915         }
 916         break;
 917     }
 918
 919     return ret;
 920 }
 921
 922 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
 923
 924 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
 925     int w = 0;
 926     while (words) {
 927         w += info_width_internal(words, xrefs, cfg);
 928         words = words->next;
 929     }
 930     return w;
 931 }
 932
 933 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
 934     int wid;
 935     int attr;
 936
 937     switch (words->type) {
 938       case word_HyperLink:
 939       case word_HyperEnd:
 940       case word_XrefEnd:
 941       case word_IndexRef:
 942         return 0;
 943
 944       case word_UpperXref:
 945       case word_LowerXref:
 946         if (xrefs && words->private_data) {
 947             /* "*Note " plus "::" comes to 8 characters */
 948             return 8 + strwid(((node *)words->private_data)->name,
 949                               cfg->charset);
 950         } else
 951             return 0;
 952     }
 953
 954     assert(words->type < word_internal_endattrs);
 955
 956     wid = 0;
 957     attr = towordstyle(words->type);
 958
 959     if (attr == word_Emph || attr == word_Code) {
 960         if (attraux(words->aux) == attr_Only ||
 961             attraux(words->aux) == attr_First)
 962             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 963                            cfg->charset);
 964     }
 965     if (attr == word_Emph || attr == word_Code) {
 966         if (attraux(words->aux) == attr_Only ||
 967             attraux(words->aux) == attr_Last)
 968             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 969                            cfg->charset);
 970     }
 971
 972     switch (words->type) {
 973       case word_Normal:
 974       case word_Emph:
 975       case word_Code:
 976       case word_WeakCode:
 977         if (cvt_ok(cfg->charset, words->text) || !words->alt)
 978             wid += ustrwid(words->text, cfg->charset);
 979         else
 980             wid += info_width_internal_list(words->alt, xrefs, cfg);
 981         return wid;
 982
 983       case word_WhiteSpace:
 984       case word_EmphSpace:
 985       case word_CodeSpace:
 986       case word_WkCodeSpace:
 987       case word_Quote:
 988       case word_EmphQuote:
 989       case word_CodeQuote:
 990       case word_WkCodeQuote:
 991         assert(words->type != word_CodeQuote &&
 992                words->type != word_WkCodeQuote);
 993         if (removeattr(words->type) == word_Quote) {
 994             if (quoteaux(words->aux) == quote_Open)
 995                 wid += ustrwid(cfg->lquote, cfg->charset);
 996             else
 997                 wid += ustrwid(cfg->rquote, cfg->charset);
 998         } else
 999             wid++;                     /* space */
1000     }
1001     return wid;
1002 }
1003
1004 static int info_width_noxrefs(void *ctx, word *words)
1005 {
1006     return info_width_internal(words, FALSE, (infoconfig *)ctx);
1007 }
1008 static int info_width_xrefs(void *ctx, word *words)
1009 {
1010     return info_width_internal(words, TRUE, (infoconfig *)ctx);
1011 }
1012
1013 static void info_heading(info_data *text, word *tprefix,
1014                          word *words, alignstruct align,
1015                          int width, infoconfig *cfg) {
1016     int length;
1017     int firstlinewidth, wrapwidth;
1018     wrappedline *wrapping, *p;
1019
1020     length = 0;
1021     if (tprefix) {
1022         length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
1023         length += info_rdadds(text, cfg->sectsuffix);
1024     }
1025
1026     wrapwidth = width;
1027     firstlinewidth = width - length;
1028
1029     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
1030                          info_width_noxrefs, cfg, 0);
1031     for (p = wrapping; p; p = p->next) {
1032         length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
1033         info_rdadd(text, L'\n');
1034         if (*align.underline) {
1035             while (length > 0) {
1036                 info_rdadds(text, align.underline);
1037                 length -= ustrwid(align.underline, cfg->charset);
1038             }
1039             info_rdadd(text, L'\n');
1040         }
1041         length = 0;
1042     }
1043     wrap_free(wrapping);
1044     info_rdadd(text, L'\n');
1045 }
1046
1047 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
1048 {
1049     while (indent--) info_rdadd(text, L' ');
1050     while (width > 0) {
1051         info_rdadds(text, cfg->rule);
1052         width -= ustrwid(cfg->rule, cfg->charset);
1053     }
1054     info_rdadd(text, L'\n');
1055     info_rdadd(text, L'\n');
1056 }
1057
1058 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
1059                       word *input, keywordlist *keywords, int indent,
1060                       int extraindent, int width, infoconfig *cfg) {
1061     wrappedline *wrapping, *p;
1062     word *words;
1063     int e;
1064     int i;
1065     int firstlinewidth = width;
1066
1067     words = info_transform_wordlist(input, keywords);
1068
1069     if (prefix) {
1070         for (i = 0; i < indent; i++)
1071             info_rdadd(text, L' ');
1072         e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1073         if (prefixextra)
1074             e += info_rdadds(text, prefixextra);
1075         /* If the prefix is too long, shorten the first line to fit. */
1076         e = extraindent - e;
1077         if (e < 0) {
1078             firstlinewidth += e;       /* this decreases it, since e < 0 */
1079             if (firstlinewidth < 0) {
1080                 e = indent + extraindent;
1081                 firstlinewidth = width;
1082                 info_rdadd(text, L'\n');
1083             } else
1084                 e = 0;
1085         }
1086     } else
1087         e = indent + extraindent;
1088
1089     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1090                          cfg, 0);
1091     for (p = wrapping; p; p = p->next) {
1092         for (i = 0; i < e; i++)
1093             info_rdadd(text, L' ');
1094         info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1095         info_rdadd(text, L'\n');
1096         e = indent + extraindent;
1097     }
1098     wrap_free(wrapping);
1099     info_rdadd(text, L'\n');
1100
1101     free_word_list(words);
1102 }
1103
1104 static void info_codepara(info_data *text, word *words,
1105                           int indent, int width) {
1106     int i;
1107
1108     for (; words; words = words->next) if (words->type == word_WeakCode) {
1109         for (i = 0; i < indent; i++)
1110             info_rdadd(text, L' ');
1111         if (info_rdadds(text, words->text) > width) {
1112             /* FIXME: warn */
1113         }
1114         info_rdadd(text, L'\n');
1115     }
1116
1117     info_rdadd(text, L'\n');
1118 }
1119
1120 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1121     info_rdadd(text, L'[');
1122     info_rdaddwc(text, words, NULL, FALSE, cfg);
1123     info_rdadds(text, L"]\n");
1124 }
1125
1126 static node *info_node_new(char *name, int charset)
1127 {
1128     node *n;
1129
1130     n = snew(node);
1131     n->text = empty_info_data;
1132     n->text.charset = charset;
1133     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1134     n->name = dupstr(name);
1135     n->started_menu = FALSE;
1136
1137     return n;
1138 }
1139
1140 static char *info_node_name_core(info_data *id, filepos *fpos)
1141 {
1142     char *p, *q;
1143
1144     /*
1145      * We cannot have commas, colons or parentheses in a node name.
1146      * Remove any that we find, with a warning.
1147      */
1148     p = q = id->output.text;
1149     while (*p) {
1150         if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1151             error(err_infonodechar, fpos, *p);
1152         } else {
1153             *q++ = *p;
1154         }
1155         p++;
1156     }
1157     *q = '\0';
1158
1159     return id->output.text;
1160 }
1161
1162 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1163 {
1164     info_data id = EMPTY_INFO_DATA;
1165
1166     id.charset = cfg->charset;
1167     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1168                  NULL, FALSE, cfg);
1169     info_rdaddsc(&id, NULL);
1170
1171     return info_node_name_core(&id, &par->fpos);
1172 }
1173
1174 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1175 {
1176     info_data id = EMPTY_INFO_DATA;
1177
1178     id.charset = cfg->charset;
1179     info_rdadds(&id, text);
1180     info_rdaddsc(&id, NULL);
1181
1182     return info_node_name_core(&id, NULL);
1183 }
1184
1185 static void info_menu_item(info_data *text, node *n, paragraph *p,
1186                            infoconfig *cfg)
1187 {
1188     /*
1189      * FIXME: Depending on how we're doing node names in this info
1190      * file, we might want to do
1191      *
1192      *   * Node name:: Chapter title
1193      *
1194      * _or_
1195      *
1196      *   * Chapter number: Node name.
1197      *
1198      * This function mostly works in char rather than wchar_t,
1199      * because a menu item is a structural component.
1200      */
1201     info_rdaddsc(text, "* ");
1202     info_rdaddsc(text, n->name);
1203     info_rdaddsc(text, "::");
1204     if (p) {
1205         info_rdaddc(text, ' ');
1206         info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1207     }
1208     info_rdaddc(text, '\n');
1209 }
1210
1211 /*
1212  * These functions implement my wrapper on the rdadd* calls which
1213  * allows me to switch arbitrarily between literal octet-string
1214  * text and charset-translated Unicode. (Because no matter what
1215  * character set I write the actual text in, I expect info readers
1216  * to treat node names and file names literally and to expect
1217  * keywords like `*Note' in their canonical form, so I have to take
1218  * steps to ensure that those structural elements of the file
1219  * aren't messed with.)
1220  */
1221 static int info_rdadds(info_data *d, wchar_t const *wcs)
1222 {
1223     if (!d->wcmode) {
1224         d->state = charset_init_state;
1225         d->wcmode = TRUE;
1226     }
1227
1228     if (wcs) {
1229         char buf[256];
1230         int len, width, ret;
1231
1232         width = ustrwid(wcs, d->charset);
1233
1234         len = ustrlen(wcs);
1235         while (len > 0) {
1236             int prevlen = len;
1237
1238             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1239                                        d->charset, &d->state, NULL);
1240
1241             assert(len < prevlen);
1242
1243             if (ret > 0) {
1244                 buf[ret] = '\0';
1245                 rdaddsc(&d->output, buf);
1246             }
1247         }
1248
1249         return width;
1250     } else
1251         return 0;
1252 }
1253
1254 static int info_rdaddsc(info_data *d, char const *cs)
1255 {
1256     if (d->wcmode) {
1257         char buf[256];
1258         int ret;
1259
1260         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1261                                    d->charset, &d->state, NULL);
1262         if (ret > 0) {
1263             buf[ret] = '\0';
1264             rdaddsc(&d->output, buf);
1265         }
1266
1267         d->wcmode = FALSE;
1268     }
1269
1270     if (cs) {
1271         rdaddsc(&d->output, cs);
1272         return strwid(cs, d->charset);
1273     } else
1274         return 0;
1275 }
1276
1277 static int info_rdadd(info_data *d, wchar_t wc)
1278 {
1279     wchar_t wcs[2];
1280     wcs[0] = wc;
1281     wcs[1] = L'\0';
1282     return info_rdadds(d, wcs);
1283 }
1284
1285 static int info_rdaddc(info_data *d, char c)
1286 {
1287     char cs[2];
1288     cs[0] = c;
1289     cs[1] = '\0';
1290     return info_rdaddsc(d, cs);
1291 }