mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * info backend for Halibut
   3  *
   4  * Possible future work:
   5  *
   6  *  - configurable choice of how to allocate node names?
   7  *     + possibly a template-like approach, choosing node names to
   8  *       be the full section title or perhaps the internal keyword?
   9  *     + neither of those seems quite right. Perhaps instead a
  10  *       Windows Help-like mechanism, where a magic config
  11  *       directive allows user choice of name for every node.
  12  *     + Only trouble with that is, now what happens to the section
  13  *       numbers? Do they become completely vestigial and just sit
  14  *       in the title text of each node? Or do we keep them in the
  15  *       menus somehow? I think people might occasionally want to
  16  *       go to a section by number, if only because all the _other_
  17  *       formats of the same document will reference the numbers
  18  *       all the time. So our menu lines could look like one of
  19  *       these:
  20  *        * Nodename: Section 1.2. Title of section.
  21  *        * Section 1.2: Nodename. Title of section.
  22  *
  23  *  - might be helpful to diagnose duplicate node names!
  24  *
  25  *  - Indices generated by makeinfo use a menu rather than a bunch of
  26  *    cross-references, which reduces visual clutter rather.  For
  27  *    singly-referenced items, it looks like:
  28  *      * toner cartridge, replacing:              Toner.
  29  *    It does a horrid job on multiply-referenced entries, though,
  30  *    perhaps because the name before the colon is meant to be unique.
  31  *    Info's 'i' command requires the use of a menu -- it fails to
  32  *    find any index entries at all with Halibut's current index format.
  33  *
  34  *  - The string "*note" is matched case-insensitively, so we could
  35  *    make things slightly less ugly by using the lower-case version
  36  *    when the user asks for \k.  Unfortunately, standalone Info seems
  37  *    to match node names case-sensitively, so we can't downcase that.
  38  */
  39
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <assert.h>
  43 #include "halibut.h"
  44
  45 typedef struct {
  46     wchar_t *underline;
  47 } alignstruct;
  48
  49 typedef struct {
  50     char *filename;
  51     int maxfilesize;
  52     int charset;
  53     int listindentbefore, listindentafter;
  54     int indent_code, width, index_width;
  55     alignstruct atitle, achapter, *asect;
  56     int nasect;
  57     wchar_t *bullet, *listsuffix;
  58     wchar_t *startemph, *endemph;
  59     wchar_t *lquote, *rquote;
  60     wchar_t *sectsuffix;
  61     wchar_t *rule;
  62     wchar_t *index_text;
  63 } infoconfig;
  64
  65 typedef struct {
  66     rdstringc output;
  67     int charset;
  68     charset_state state;
  69     int wcmode;
  70 } info_data;
  71 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  72 static const info_data empty_info_data = EMPTY_INFO_DATA;
  73
  74 typedef struct node_tag node;
  75 struct node_tag {
  76     node *listnext;
  77     node *up, *prev, *next, *lastchild;
  78     int pos, started_menu, filenum;
  79     char *name;
  80     info_data text;
  81 };
  82
  83 typedef struct {
  84     char *text;
  85     int length;
  86     int nnodes, nodesize;
  87     node **nodes;
  88 } info_idx;
  89
  90 static int info_rdadd(info_data *, wchar_t);
  91 static int info_rdadds(info_data *, wchar_t const *);
  92 static int info_rdaddc(info_data *, char);
  93 static int info_rdaddsc(info_data *, char const *);
  94
  95 static void info_heading(info_data *, word *, word *, alignstruct, int,
  96                          infoconfig *);
  97 static void info_rule(info_data *, int, int, infoconfig *);
  98 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
  99                       int, int, int, infoconfig *);
 100 static void info_codepara(info_data *, word *, int, int);
 101 static void info_versionid(info_data *, word *, infoconfig *);
 102 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
 103 static word *info_transform_wordlist(word *, keywordlist *);
 104 static int info_check_index(word *, node *, indexdata *);
 105
 106 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
 107
 108 static node *info_node_new(char *name, int charset);
 109 static char *info_node_name_for_para(paragraph *p, infoconfig *);
 110 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
 111
 112 static infoconfig info_configure(paragraph *source) {
 113     infoconfig ret;
 114     paragraph *p;
 115     int n;
 116
 117     /*
 118      * Defaults.
 119      */
 120     ret.filename = dupstr("output.info");
 121     ret.maxfilesize = 64 << 10;
 122     ret.charset = CS_ASCII;
 123     ret.width = 70;
 124     ret.listindentbefore = 1;
 125     ret.listindentafter = 3;
 126     ret.indent_code = 2;
 127     ret.index_width = 40;
 128     ret.listsuffix = L".";
 129     ret.bullet = L"\x2022\0-\0\0";
 130     ret.rule = L"\x2500\0-\0\0";
 131     ret.startemph = L"_\0_\0\0";
 132     ret.endemph = uadv(ret.startemph);
 133     ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
 134     ret.rquote = uadv(ret.lquote);
 135     ret.sectsuffix = L": ";
 136     /*
 137      * Default underline characters are chosen to match those recognised by
 138      * Info-fontify-node.
 139      */
 140     ret.atitle.underline = L"*\0\0";
 141     ret.achapter.underline = L"=\0\0";
 142     ret.nasect = 2;
 143     ret.asect = snewn(ret.nasect, alignstruct);
 144     ret.asect[0].underline = L"-\0\0";
 145     ret.asect[1].underline = L".\0\0";
 146     ret.index_text = L"Index";
 147
 148     /*
 149      * Two-pass configuration so that we can pick up global config
 150      * (e.g. `quotes') before having it overridden by specific
 151      * config (`info-quotes'), irrespective of the order in which
 152      * they occur.
 153      */
 154     for (p = source; p; p = p->next) {
 155         if (p->type == para_Config) {
 156             if (!ustricmp(p->keyword, L"quotes")) {
 157                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 158                     ret.lquote = uadv(p->keyword);
 159                     ret.rquote = uadv(ret.lquote);
 160                 }
 161             } else if (!ustricmp(p->keyword, L"index")) {
 162                 ret.index_text = uadv(p->keyword);
 163             }
 164         }
 165     }
 166
 167     for (p = source; p; p = p->next) {
 168         if (p->type == para_Config) {
 169             if (!ustricmp(p->keyword, L"info-filename")) {
 170                 sfree(ret.filename);
 171                 ret.filename = dupstr(adv(p->origkeyword));
 172             } else if (!ustricmp(p->keyword, L"info-charset")) {
 173                 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
 174             } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
 175                 ret.maxfilesize = utoi(uadv(p->keyword));
 176             } else if (!ustricmp(p->keyword, L"info-width")) {
 177                 ret.width = utoi(uadv(p->keyword));
 178             } else if (!ustricmp(p->keyword, L"info-indent-code")) {
 179                 ret.indent_code = utoi(uadv(p->keyword));
 180             } else if (!ustricmp(p->keyword, L"info-index-width")) {
 181                 ret.index_width = utoi(uadv(p->keyword));
 182             } else if (!ustricmp(p->keyword, L"info-list-indent")) {
 183                 ret.listindentbefore = utoi(uadv(p->keyword));
 184             } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
 185                 ret.listindentafter = utoi(uadv(p->keyword));
 186             } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
 187                 ret.sectsuffix = uadv(p->keyword);
 188             } else if (!ustricmp(p->keyword, L"info-underline")) {
 189                 ret.atitle.underline = ret.achapter.underline =
 190                     uadv(p->keyword);
 191                 for (n = 0; n < ret.nasect; n++)
 192                     ret.asect[n].underline = ret.atitle.underline;
 193             } else if (!ustricmp(p->keyword, L"info-chapter-underline")) {
 194                 ret.achapter.underline = uadv(p->keyword);
 195             } else if (!ustricmp(p->keyword, L"info-section-underline")) {
 196                 wchar_t *q = uadv(p->keyword);
 197                 int n = 0;
 198                 if (uisdigit(*q)) {
 199                     n = utoi(q);
 200                     q = uadv(q);
 201                 }
 202                 if (n >= ret.nasect) {
 203                     int i;
 204                     ret.asect = sresize(ret.asect, n+1, alignstruct);
 205                     for (i = ret.nasect; i <= n; i++)
 206                         ret.asect[i] = ret.asect[ret.nasect-1];
 207                     ret.nasect = n+1;
 208                 }
 209                 ret.asect[n].underline = q;
 210             } else if (!ustricmp(p->keyword, L"text-title-underline")) {
 211                 ret.atitle.underline = uadv(p->keyword);
 212             } else if (!ustricmp(p->keyword, L"info-bullet")) {
 213                 ret.bullet = uadv(p->keyword);
 214             } else if (!ustricmp(p->keyword, L"info-rule")) {
 215                 ret.rule = uadv(p->keyword);
 216             } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
 217                 ret.listsuffix = uadv(p->keyword);
 218             } else if (!ustricmp(p->keyword, L"info-emphasis")) {
 219                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 220                     ret.startemph = uadv(p->keyword);
 221                     ret.endemph = uadv(ret.startemph);
 222                 }
 223             } else if (!ustricmp(p->keyword, L"info-quotes")) {
 224                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 225                     ret.lquote = uadv(p->keyword);
 226                     ret.rquote = uadv(ret.lquote);
 227                 }
 228             }
 229         }
 230     }
 231
 232     /*
 233      * Now process fallbacks on quote characters, underlines, the
 234      * rule character, the emphasis characters, and bullets.
 235      */
 236     while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
 237            (!cvt_ok(ret.charset, ret.lquote) ||
 238             !cvt_ok(ret.charset, ret.rquote))) {
 239         ret.lquote = uadv(ret.rquote);
 240         ret.rquote = uadv(ret.lquote);
 241     }
 242
 243     while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
 244            (!cvt_ok(ret.charset, ret.startemph) ||
 245             !cvt_ok(ret.charset, ret.endemph))) {
 246         ret.startemph = uadv(ret.endemph);
 247         ret.endemph = uadv(ret.startemph);
 248     }
 249
 250     while (*ret.atitle.underline && *uadv(ret.atitle.underline) &&
 251            !cvt_ok(ret.charset, ret.atitle.underline))
 252         ret.atitle.underline = uadv(ret.atitle.underline);
 253
 254     while (*ret.achapter.underline && *uadv(ret.achapter.underline) &&
 255            !cvt_ok(ret.charset, ret.achapter.underline))
 256         ret.achapter.underline = uadv(ret.achapter.underline);
 257
 258     for (n = 0; n < ret.nasect; n++) {
 259         while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) &&
 260                !cvt_ok(ret.charset, ret.asect[n].underline))
 261             ret.asect[n].underline = uadv(ret.asect[n].underline);
 262     }
 263
 264     while (*ret.bullet && *uadv(ret.bullet) &&
 265            !cvt_ok(ret.charset, ret.bullet))
 266         ret.bullet = uadv(ret.bullet);
 267
 268     while (*ret.rule && *uadv(ret.rule) &&
 269            !cvt_ok(ret.charset, ret.rule))
 270         ret.rule = uadv(ret.rule);
 271
 272     return ret;
 273 }
 274
 275 paragraph *info_config_filename(char *filename)
 276 {
 277     return cmdline_cfg_simple("info-filename", filename, NULL);
 278 }
 279
 280 void info_backend(paragraph *sourceform, keywordlist *keywords,
 281                   indexdata *idx, void *unused) {
 282     paragraph *p;
 283     infoconfig conf;
 284     word *prefix, *body, *wp;
 285     word spaceword;
 286     wchar_t *prefixextra;
 287     int nesting, nestindent;
 288     int indentb, indenta;
 289     int filepos;
 290     int has_index;
 291     info_data intro_text = EMPTY_INFO_DATA;
 292     node *topnode, *currnode;
 293     word bullet;
 294     FILE *fp;
 295
 296     IGNORE(unused);
 297
 298     conf = info_configure(sourceform);
 299
 300     /*
 301      * Go through and create a node for each section.
 302      */
 303     topnode = info_node_new("Top", conf.charset);
 304     currnode = topnode;
 305     for (p = sourceform; p; p = p->next) switch (p->type) {
 306         /*
 307          * Chapter titles.
 308          */
 309       case para_Chapter:
 310       case para_Appendix:
 311       case para_UnnumberedChapter:
 312       case para_Heading:
 313       case para_Subsect:
 314         {
 315             node *newnode, *upnode;
 316             char *nodename;
 317
 318             nodename = info_node_name_for_para(p, &conf);
 319             newnode = info_node_new(nodename, conf.charset);
 320             sfree(nodename);
 321
 322             p->private_data = newnode;
 323
 324             if (p->parent)
 325                 upnode = (node *)p->parent->private_data;
 326             else
 327                 upnode = topnode;
 328             assert(upnode);
 329             newnode->up = upnode;
 330
 331             currnode->next = newnode;
 332             newnode->prev = currnode;
 333
 334             currnode->listnext = newnode;
 335             currnode = newnode;
 336         }
 337         break;
 338       default:
 339         p->private_data = NULL;
 340         break;
 341     }
 342
 343     /*
 344      * Set up the display form of each index entry.
 345      */
 346     {
 347         int i;
 348         indexentry *entry;
 349
 350         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 351             info_idx *ii = snew(info_idx);
 352             info_data id = EMPTY_INFO_DATA;
 353
 354             id.charset = conf.charset;
 355
 356             ii->nnodes = ii->nodesize = 0;
 357             ii->nodes = NULL;
 358
 359             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
 360
 361             ii->text = id.output.text;
 362
 363             entry->backend_data = ii;
 364         }
 365     }
 366
 367     /*
 368      * An Info file begins with a piece of introductory text which
 369      * is apparently never shown anywhere. This seems to me to be a
 370      * good place to put the copyright notice and the version IDs.
 371      * Also, Info directory entries are expected to go here.
 372      */
 373     intro_text.charset = conf.charset;
 374
 375     info_rdaddsc(&intro_text,
 376             "This Info file generated by Halibut, ");
 377     info_rdaddsc(&intro_text, version);
 378     info_rdaddsc(&intro_text, "\n\n");
 379
 380     for (p = sourceform; p; p = p->next)
 381         if (p->type == para_Config &&
 382             !ustricmp(p->keyword, L"info-dir-entry")) {
 383             wchar_t *section, *shortname, *longname, *kw;
 384             char *s;
 385
 386             section = uadv(p->keyword);
 387             shortname = *section ? uadv(section) : L"";
 388             longname = *shortname ? uadv(shortname) : L"";
 389             kw = *longname ? uadv(longname) : L"";
 390
 391             if (!*longname) {
 392                 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
 393                 continue;
 394             }
 395
 396             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 397             info_rdadds(&intro_text, section);
 398             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 399             info_rdadds(&intro_text, shortname);
 400             info_rdaddsc(&intro_text, ": (");
 401             s = dupstr(conf.filename);
 402             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 403                 s[strlen(s)-5] = '\0';
 404             info_rdaddsc(&intro_text, s);
 405             sfree(s);
 406             info_rdaddsc(&intro_text, ")");
 407             if (*kw) {
 408                 keyword *kwl = kw_lookup(keywords, kw);
 409                 if (kwl && kwl->para->private_data) {
 410                     node *n = (node *)kwl->para->private_data;
 411                     info_rdaddsc(&intro_text, n->name);
 412                 }
 413             }
 414             info_rdaddsc(&intro_text, ".   ");
 415             info_rdadds(&intro_text, longname);
 416             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 417         }
 418
 419     for (p = sourceform; p; p = p->next)
 420         if (p->type == para_Copyright)
 421             info_para(&intro_text, NULL, NULL, p->words, keywords,
 422                       0, 0, conf.width, &conf);
 423
 424     for (p = sourceform; p; p = p->next)
 425         if (p->type == para_VersionID)
 426             info_versionid(&intro_text, p->words, &conf);
 427
 428     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 429         info_rdaddc(&intro_text, '\n');
 430
 431     /* Do the title */
 432     for (p = sourceform; p; p = p->next)
 433         if (p->type == para_Title)
 434             info_heading(&topnode->text, NULL, p->words,
 435                          conf.atitle, conf.width, &conf);
 436
 437     nestindent = conf.listindentbefore + conf.listindentafter;
 438     nesting = 0;
 439
 440     currnode = topnode;
 441
 442     /* Do the main document */
 443     for (p = sourceform; p; p = p->next) switch (p->type) {
 444
 445       case para_QuotePush:
 446         nesting += 2;
 447         break;
 448       case para_QuotePop:
 449         nesting -= 2;
 450         assert(nesting >= 0);
 451         break;
 452
 453       case para_LcontPush:
 454         nesting += nestindent;
 455         break;
 456       case para_LcontPop:
 457         nesting -= nestindent;
 458         assert(nesting >= 0);
 459         break;
 460
 461         /*
 462          * Things we ignore because we've already processed them or
 463          * aren't going to touch them in this pass.
 464          */
 465       case para_IM:
 466       case para_BR:
 467       case para_Biblio:                /* only touch BiblioCited */
 468       case para_VersionID:
 469       case para_NoCite:
 470       case para_Title:
 471         break;
 472
 473         /*
 474          * Chapter titles.
 475          */
 476       case para_Chapter:
 477       case para_Appendix:
 478       case para_UnnumberedChapter:
 479       case para_Heading:
 480       case para_Subsect:
 481         currnode = p->private_data;
 482         assert(currnode);
 483         assert(currnode->up);
 484
 485         if (!currnode->up->started_menu) {
 486             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 487             currnode->up->started_menu = TRUE;
 488         }
 489         info_menu_item(&currnode->up->text, currnode, p, &conf);
 490
 491         has_index |= info_check_index(p->words, currnode, idx);
 492         if (p->type == para_Chapter || p->type == para_Appendix ||
 493             p->type == para_UnnumberedChapter)
 494             info_heading(&currnode->text, p->kwtext, p->words,
 495                          conf.achapter, conf.width, &conf);
 496         else
 497             info_heading(&currnode->text, p->kwtext, p->words,
 498                          conf.asect[p->aux>=conf.nasect?conf.nasect-1:p->aux],
 499                          conf.width, &conf);
 500         nesting = 0;
 501         break;
 502
 503       case para_Rule:
 504         info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
 505         break;
 506
 507       case para_Normal:
 508       case para_Copyright:
 509       case para_DescribedThing:
 510       case para_Description:
 511       case para_BiblioCited:
 512       case para_Bullet:
 513       case para_NumberedList:
 514         has_index |= info_check_index(p->words, currnode, idx);
 515         if (p->type == para_Bullet) {
 516             bullet.next = NULL;
 517             bullet.alt = NULL;
 518             bullet.type = word_Normal;
 519             bullet.text = conf.bullet;
 520             prefix = &bullet;
 521             prefixextra = NULL;
 522             indentb = conf.listindentbefore;
 523             indenta = conf.listindentafter;
 524         } else if (p->type == para_NumberedList) {
 525             prefix = p->kwtext;
 526             prefixextra = conf.listsuffix;
 527             indentb = conf.listindentbefore;
 528             indenta = conf.listindentafter;
 529         } else if (p->type == para_Description) {
 530             prefix = NULL;
 531             prefixextra = NULL;
 532             indentb = conf.listindentbefore;
 533             indenta = conf.listindentafter;
 534         } else {
 535             prefix = NULL;
 536             prefixextra = NULL;
 537             indentb = indenta = 0;
 538         }
 539         if (p->type == para_BiblioCited) {
 540             body = dup_word_list(p->kwtext);
 541             for (wp = body; wp->next; wp = wp->next);
 542             wp->next = &spaceword;
 543             spaceword.next = p->words;
 544             spaceword.alt = NULL;
 545             spaceword.type = word_WhiteSpace;
 546             spaceword.text = NULL;
 547         } else {
 548             wp = NULL;
 549             body = p->words;
 550         }
 551         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 552                   nesting + indentb, indenta,
 553                   conf.width - nesting - indentb - indenta, &conf);
 554         if (wp) {
 555             wp->next = NULL;
 556             free_word_list(body);
 557         }
 558         break;
 559
 560       case para_Code:
 561         info_codepara(&currnode->text, p->words,
 562                       nesting + conf.indent_code,
 563                       conf.width - nesting - 2 * conf.indent_code);
 564         break;
 565     }
 566
 567     /*
 568      * Create an index node if required.
 569      */
 570     if (has_index) {
 571         node *newnode;
 572         int i, j, k;
 573         indexentry *entry;
 574         char *nodename;
 575
 576         nodename = info_node_name_for_text(conf.index_text, &conf);
 577         newnode = info_node_new(nodename, conf.charset);
 578         sfree(nodename);
 579
 580         newnode->up = topnode;
 581
 582         currnode->next = newnode;
 583         newnode->prev = currnode;
 584         currnode->listnext = newnode;
 585
 586         k = info_rdadds(&newnode->text, conf.index_text);
 587         info_rdaddsc(&newnode->text, "\n");
 588         while (k > 0) {
 589             info_rdadds(&newnode->text, conf.achapter.underline);
 590             k -= ustrwid(conf.achapter.underline, conf.charset);
 591         }
 592         info_rdaddsc(&newnode->text, "\n\n");
 593
 594         info_menu_item(&topnode->text, newnode, NULL, &conf);
 595
 596         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 597             info_idx *ii = (info_idx *)entry->backend_data;
 598
 599             for (j = 0; j < ii->nnodes; j++) {
 600                 /*
 601                  * When we have multiple references for a single
 602                  * index term, we only display the actual term on
 603                  * the first line, to make it clear that the terms
 604                  * really are the same.
 605                  */
 606                 if (j == 0)
 607                     info_rdaddsc(&newnode->text, ii->text);
 608                 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
 609                     info_rdaddc(&newnode->text, ' ');
 610                 info_rdaddsc(&newnode->text, "  *Note ");
 611                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 612                 info_rdaddsc(&newnode->text, "::\n");
 613             }
 614         }
 615     }
 616
 617     /*
 618      * Finalise the text of each node, by adding the ^_ delimiter
 619      * and the node line at the top.
 620      */
 621     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 622         char *origtext = currnode->text.output.text;
 623         currnode->text = empty_info_data;
 624         currnode->text.charset = conf.charset;
 625         info_rdaddsc(&currnode->text, "\037\nFile: ");
 626         info_rdaddsc(&currnode->text, conf.filename);
 627         info_rdaddsc(&currnode->text, ",  Node: ");
 628         info_rdaddsc(&currnode->text, currnode->name);
 629         if (currnode->prev) {
 630             info_rdaddsc(&currnode->text, ",  Prev: ");
 631             info_rdaddsc(&currnode->text, currnode->prev->name);
 632         }
 633         info_rdaddsc(&currnode->text, ",  Up: ");
 634         info_rdaddsc(&currnode->text, (currnode->up ?
 635                                        currnode->up->name : "(dir)"));
 636         if (currnode->next) {
 637             info_rdaddsc(&currnode->text, ",  Next: ");
 638             info_rdaddsc(&currnode->text, currnode->next->name);
 639         }
 640         info_rdaddsc(&currnode->text, "\n\n");
 641         info_rdaddsc(&currnode->text, origtext);
 642         /*
 643          * Just make _absolutely_ sure we end with a newline.
 644          */
 645         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 646             info_rdaddc(&currnode->text, '\n');
 647
 648         sfree(origtext);
 649     }
 650
 651     /*
 652      * Compute the offsets for the tag table.
 653      */
 654     filepos = intro_text.output.pos;
 655     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 656         currnode->pos = filepos;
 657         filepos += currnode->text.output.pos;
 658     }
 659
 660     /*
 661      * Split into sub-files.
 662      */
 663     if (conf.maxfilesize > 0) {
 664         int currfilesize = intro_text.output.pos, currfilenum = 1;
 665         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 666             if (currfilesize > intro_text.output.pos &&
 667                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 668                 currfilenum++;
 669                 currfilesize = intro_text.output.pos;
 670             }
 671             currnode->filenum = currfilenum;
 672             currfilesize += currnode->text.output.pos;
 673         }
 674     }
 675
 676     /*
 677      * Write the primary output file.
 678      */
 679     fp = fopen(conf.filename, "w");
 680     if (!fp) {
 681         error(err_cantopenw, conf.filename);
 682         return;
 683     }
 684     fputs(intro_text.output.text, fp);
 685     if (conf.maxfilesize == 0) {
 686         for (currnode = topnode; currnode; currnode = currnode->listnext)
 687             fputs(currnode->text.output.text, fp);
 688     } else {
 689         int filenum = 0;
 690         fprintf(fp, "\037\nIndirect:\n");
 691         for (currnode = topnode; currnode; currnode = currnode->listnext)
 692             if (filenum != currnode->filenum) {
 693                 filenum = currnode->filenum;
 694                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 695                         currnode->pos);
 696             }
 697     }
 698     fprintf(fp, "\037\nTag Table:\n");
 699     if (conf.maxfilesize > 0)
 700         fprintf(fp, "(Indirect)\n");
 701     for (currnode = topnode; currnode; currnode = currnode->listnext)
 702         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 703     fprintf(fp, "\037\nEnd Tag Table\n");
 704     fclose(fp);
 705
 706     /*
 707      * Write the subfiles.
 708      */
 709     if (conf.maxfilesize > 0) {
 710         int filenum = 0;
 711         fp = NULL;
 712
 713         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 714             if (filenum != currnode->filenum) {
 715                 char *fname;
 716
 717                 filenum = currnode->filenum;
 718
 719                 if (fp)
 720                     fclose(fp);
 721                 fname = snewn(strlen(conf.filename) + 40, char);
 722                 sprintf(fname, "%s-%d", conf.filename, filenum);
 723                 fp = fopen(fname, "w");
 724                 if (!fp) {
 725                     error(err_cantopenw, fname);
 726                     return;
 727                 }
 728                 sfree(fname);
 729                 fputs(intro_text.output.text, fp);
 730             }
 731             fputs(currnode->text.output.text, fp);
 732         }
 733
 734         if (fp)
 735             fclose(fp);
 736     }
 737 }
 738
 739 static int info_check_index(word *w, node *n, indexdata *idx)
 740 {
 741     int ret = 0;
 742
 743     for (; w; w = w->next) {
 744         if (w->type == word_IndexRef) {
 745             indextag *tag;
 746             int i;
 747
 748             tag = index_findtag(idx, w->text);
 749             if (!tag)
 750                 break;
 751
 752             for (i = 0; i < tag->nrefs; i++) {
 753                 indexentry *entry = tag->refs[i];
 754                 info_idx *ii = (info_idx *)entry->backend_data;
 755
 756                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 757                     /*
 758                      * If the same index term is indexed twice
 759                      * within the same section, we only want to
 760                      * mention it once in the index. So do nothing
 761                      * here.
 762                      */
 763                     continue;
 764                 }
 765
 766                 if (ii->nnodes >= ii->nodesize) {
 767                     ii->nodesize += 32;
 768                     ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
 769                 }
 770
 771                 ii->nodes[ii->nnodes++] = n;
 772
 773                 ret = 1;
 774             }
 775         }
 776     }
 777
 778     return ret;
 779 }
 780
 781 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 782 {
 783     word *ret = dup_word_list(words);
 784     word *w;
 785     keyword *kwl;
 786
 787     for (w = ret; w; w = w->next) {
 788         w->private_data = NULL;
 789         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 790             kwl = kw_lookup(keywords, w->text);
 791             if (kwl) {
 792                 if (kwl->para->type == para_NumberedList ||
 793                     kwl->para->type == para_BiblioCited) {
 794                     /*
 795                      * In Info, we do nothing special for xrefs to
 796                      * numbered list items or bibliography entries.
 797                      */
 798                     continue;
 799                 } else {
 800                     /*
 801                      * An xref to a different section has its text
 802                      * completely replaced.
 803                      */
 804                     word *w2, *w3, *w4;
 805                     w2 = w3 = w->next;
 806                     w4 = NULL;
 807                     while (w2) {
 808                         if (w2->type == word_XrefEnd) {
 809                             w4 = w2->next;
 810                             w2->next = NULL;
 811                             break;
 812                         }
 813                         w2 = w2->next;
 814                     }
 815                     free_word_list(w3);
 816
 817                     /*
 818                      * Now w is the UpperXref / LowerXref we
 819                      * started with, and w4 is the next word after
 820                      * the corresponding XrefEnd (if any). The
 821                      * simplest thing is just to stick a pointer to
 822                      * the target node structure in the private
 823                      * data field of the xref word, and let
 824                      * info_rdaddwc and friends read the node name
 825                      * out from there.
 826                      */
 827                     w->next = w4;
 828                     w->private_data = kwl->para->private_data;
 829                     assert(w->private_data);
 830                 }
 831             }
 832         }
 833     }
 834
 835     return ret;
 836 }
 837
 838 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
 839                         infoconfig *cfg) {
 840     int ret = 0;
 841
 842     for (; words && words != end; words = words->next) switch (words->type) {
 843       case word_HyperLink:
 844       case word_HyperEnd:
 845       case word_XrefEnd:
 846       case word_IndexRef:
 847         break;
 848
 849       case word_Normal:
 850       case word_Emph:
 851       case word_Code:
 852       case word_WeakCode:
 853       case word_WhiteSpace:
 854       case word_EmphSpace:
 855       case word_CodeSpace:
 856       case word_WkCodeSpace:
 857       case word_Quote:
 858       case word_EmphQuote:
 859       case word_CodeQuote:
 860       case word_WkCodeQuote:
 861         assert(words->type != word_CodeQuote &&
 862                words->type != word_WkCodeQuote);
 863         if (towordstyle(words->type) == word_Emph &&
 864             (attraux(words->aux) == attr_First ||
 865              attraux(words->aux) == attr_Only))
 866             ret += info_rdadds(id, cfg->startemph);
 867         else if (towordstyle(words->type) == word_Code &&
 868                  (attraux(words->aux) == attr_First ||
 869                   attraux(words->aux) == attr_Only))
 870             ret += info_rdadds(id, cfg->lquote);
 871         if (removeattr(words->type) == word_Normal) {
 872             if (cvt_ok(id->charset, words->text) || !words->alt)
 873                 ret += info_rdadds(id, words->text);
 874             else
 875                 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
 876         } else if (removeattr(words->type) == word_WhiteSpace) {
 877             ret += info_rdadd(id, L' ');
 878         } else if (removeattr(words->type) == word_Quote) {
 879             ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
 880                                cfg->lquote : cfg->rquote);
 881         }
 882         if (towordstyle(words->type) == word_Emph &&
 883             (attraux(words->aux) == attr_Last ||
 884              attraux(words->aux) == attr_Only))
 885             ret += info_rdadds(id, cfg->endemph);
 886         else if (towordstyle(words->type) == word_Code &&
 887                  (attraux(words->aux) == attr_Last ||
 888                   attraux(words->aux) == attr_Only))
 889             ret += info_rdadds(id, cfg->rquote);
 890         break;
 891
 892       case word_UpperXref:
 893       case word_LowerXref:
 894         if (xrefs && words->private_data) {
 895             /*
 896              * This bit is structural and so must be done in char
 897              * rather than wchar_t.
 898              */
 899             ret += info_rdaddsc(id, "*Note ");
 900             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 901             ret += info_rdaddsc(id, "::");
 902         }
 903         break;
 904     }
 905
 906     return ret;
 907 }
 908
 909 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
 910
 911 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
 912     int w = 0;
 913     while (words) {
 914         w += info_width_internal(words, xrefs, cfg);
 915         words = words->next;
 916     }
 917     return w;
 918 }
 919
 920 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
 921     int wid;
 922     int attr;
 923
 924     switch (words->type) {
 925       case word_HyperLink:
 926       case word_HyperEnd:
 927       case word_XrefEnd:
 928       case word_IndexRef:
 929         return 0;
 930
 931       case word_UpperXref:
 932       case word_LowerXref:
 933         if (xrefs && words->private_data) {
 934             /* "*Note " plus "::" comes to 8 characters */
 935             return 8 + strwid(((node *)words->private_data)->name,
 936                               cfg->charset);
 937         } else
 938             return 0;
 939     }
 940
 941     assert(words->type < word_internal_endattrs);
 942
 943     wid = 0;
 944     attr = towordstyle(words->type);
 945
 946     if (attr == word_Emph || attr == word_Code) {
 947         if (attraux(words->aux) == attr_Only ||
 948             attraux(words->aux) == attr_First)
 949             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 950                            cfg->charset);
 951     }
 952     if (attr == word_Emph || attr == word_Code) {
 953         if (attraux(words->aux) == attr_Only ||
 954             attraux(words->aux) == attr_Last)
 955             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 956                            cfg->charset);
 957     }
 958
 959     switch (words->type) {
 960       case word_Normal:
 961       case word_Emph:
 962       case word_Code:
 963       case word_WeakCode:
 964         if (cvt_ok(cfg->charset, words->text) || !words->alt)
 965             wid += ustrwid(words->text, cfg->charset);
 966         else
 967             wid += info_width_internal_list(words->alt, xrefs, cfg);
 968         return wid;
 969
 970       case word_WhiteSpace:
 971       case word_EmphSpace:
 972       case word_CodeSpace:
 973       case word_WkCodeSpace:
 974       case word_Quote:
 975       case word_EmphQuote:
 976       case word_CodeQuote:
 977       case word_WkCodeQuote:
 978         assert(words->type != word_CodeQuote &&
 979                words->type != word_WkCodeQuote);
 980         if (removeattr(words->type) == word_Quote) {
 981             if (quoteaux(words->aux) == quote_Open)
 982                 wid += ustrwid(cfg->lquote, cfg->charset);
 983             else
 984                 wid += ustrwid(cfg->rquote, cfg->charset);
 985         } else
 986             wid++;                     /* space */
 987     }
 988     return wid;
 989 }
 990
 991 static int info_width_noxrefs(void *ctx, word *words)
 992 {
 993     return info_width_internal(words, FALSE, (infoconfig *)ctx);
 994 }
 995 static int info_width_xrefs(void *ctx, word *words)
 996 {
 997     return info_width_internal(words, TRUE, (infoconfig *)ctx);
 998 }
 999
1000 static void info_heading(info_data *text, word *tprefix,
1001                          word *words, alignstruct align,
1002                          int width, infoconfig *cfg) {
1003     int length;
1004     int firstlinewidth, wrapwidth;
1005     wrappedline *wrapping, *p;
1006
1007     length = 0;
1008     if (tprefix) {
1009         length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
1010         length += info_rdadds(text, cfg->sectsuffix);
1011     }
1012
1013     wrapwidth = width;
1014     firstlinewidth = width - length;
1015
1016     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
1017                          info_width_noxrefs, cfg, 0);
1018     for (p = wrapping; p; p = p->next) {
1019         length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
1020         info_rdadd(text, L'\n');
1021         if (*align.underline) {
1022             while (length > 0) {
1023                 info_rdadds(text, align.underline);
1024                 length -= ustrwid(align.underline, cfg->charset);
1025             }
1026             info_rdadd(text, L'\n');
1027         }
1028         length = 0;
1029     }
1030     wrap_free(wrapping);
1031     info_rdadd(text, L'\n');
1032 }
1033
1034 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
1035 {
1036     while (indent--) info_rdadd(text, L' ');
1037     while (width > 0) {
1038         info_rdadds(text, cfg->rule);
1039         width -= ustrwid(cfg->rule, cfg->charset);
1040     }
1041     info_rdadd(text, L'\n');
1042     info_rdadd(text, L'\n');
1043 }
1044
1045 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
1046                       word *input, keywordlist *keywords, int indent,
1047                       int extraindent, int width, infoconfig *cfg) {
1048     wrappedline *wrapping, *p;
1049     word *words;
1050     int e;
1051     int i;
1052     int firstlinewidth = width;
1053
1054     words = info_transform_wordlist(input, keywords);
1055
1056     if (prefix) {
1057         for (i = 0; i < indent; i++)
1058             info_rdadd(text, L' ');
1059         e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1060         if (prefixextra)
1061             e += info_rdadds(text, prefixextra);
1062         /* If the prefix is too long, shorten the first line to fit. */
1063         e = extraindent - e;
1064         if (e < 0) {
1065             firstlinewidth += e;       /* this decreases it, since e < 0 */
1066             if (firstlinewidth < 0) {
1067                 e = indent + extraindent;
1068                 firstlinewidth = width;
1069                 info_rdadd(text, L'\n');
1070             } else
1071                 e = 0;
1072         }
1073     } else
1074         e = indent + extraindent;
1075
1076     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1077                          cfg, 0);
1078     for (p = wrapping; p; p = p->next) {
1079         for (i = 0; i < e; i++)
1080             info_rdadd(text, L' ');
1081         info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1082         info_rdadd(text, L'\n');
1083         e = indent + extraindent;
1084     }
1085     wrap_free(wrapping);
1086     info_rdadd(text, L'\n');
1087
1088     free_word_list(words);
1089 }
1090
1091 static void info_codepara(info_data *text, word *words,
1092                           int indent, int width) {
1093     int i;
1094
1095     for (; words; words = words->next) if (words->type == word_WeakCode) {
1096         for (i = 0; i < indent; i++)
1097             info_rdadd(text, L' ');
1098         if (info_rdadds(text, words->text) > width) {
1099             /* FIXME: warn */
1100         }
1101         info_rdadd(text, L'\n');
1102     }
1103
1104     info_rdadd(text, L'\n');
1105 }
1106
1107 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1108     info_rdadd(text, L'[');
1109     info_rdaddwc(text, words, NULL, FALSE, cfg);
1110     info_rdadds(text, L"]\n");
1111 }
1112
1113 static node *info_node_new(char *name, int charset)
1114 {
1115     node *n;
1116
1117     n = snew(node);
1118     n->text = empty_info_data;
1119     n->text.charset = charset;
1120     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1121     n->name = dupstr(name);
1122     n->started_menu = FALSE;
1123
1124     return n;
1125 }
1126
1127 static char *info_node_name_core(info_data *id, filepos *fpos)
1128 {
1129     char *p, *q;
1130
1131     /*
1132      * We cannot have commas, colons or parentheses in a node name.
1133      * Remove any that we find, with a warning.
1134      */
1135     p = q = id->output.text;
1136     while (*p) {
1137         if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1138             error(err_infonodechar, fpos, *p);
1139         } else {
1140             *q++ = *p;
1141         }
1142         p++;
1143     }
1144     *q = '\0';
1145
1146     return id->output.text;
1147 }
1148
1149 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1150 {
1151     info_data id = EMPTY_INFO_DATA;
1152
1153     id.charset = cfg->charset;
1154     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1155                  NULL, FALSE, cfg);
1156     info_rdaddsc(&id, NULL);
1157
1158     return info_node_name_core(&id, &par->fpos);
1159 }
1160
1161 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1162 {
1163     info_data id = EMPTY_INFO_DATA;
1164
1165     id.charset = cfg->charset;
1166     info_rdadds(&id, text);
1167     info_rdaddsc(&id, NULL);
1168
1169     return info_node_name_core(&id, NULL);
1170 }
1171
1172 static void info_menu_item(info_data *text, node *n, paragraph *p,
1173                            infoconfig *cfg)
1174 {
1175     /*
1176      * FIXME: Depending on how we're doing node names in this info
1177      * file, we might want to do
1178      *
1179      *   * Node name:: Chapter title
1180      *
1181      * _or_
1182      *
1183      *   * Chapter number: Node name.
1184      *
1185      * This function mostly works in char rather than wchar_t,
1186      * because a menu item is a structural component.
1187      */
1188     info_rdaddsc(text, "* ");
1189     info_rdaddsc(text, n->name);
1190     info_rdaddsc(text, "::");
1191     if (p) {
1192         info_rdaddc(text, ' ');
1193         info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1194     }
1195     info_rdaddc(text, '\n');
1196 }
1197
1198 /*
1199  * These functions implement my wrapper on the rdadd* calls which
1200  * allows me to switch arbitrarily between literal octet-string
1201  * text and charset-translated Unicode. (Because no matter what
1202  * character set I write the actual text in, I expect info readers
1203  * to treat node names and file names literally and to expect
1204  * keywords like `*Note' in their canonical form, so I have to take
1205  * steps to ensure that those structural elements of the file
1206  * aren't messed with.)
1207  */
1208 static int info_rdadds(info_data *d, wchar_t const *wcs)
1209 {
1210     if (!d->wcmode) {
1211         d->state = charset_init_state;
1212         d->wcmode = TRUE;
1213     }
1214
1215     if (wcs) {
1216         char buf[256];
1217         int len, width, ret;
1218
1219         width = ustrwid(wcs, d->charset);
1220
1221         len = ustrlen(wcs);
1222         while (len > 0) {
1223             int prevlen = len;
1224
1225             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1226                                        d->charset, &d->state, NULL);
1227
1228             assert(len < prevlen);
1229
1230             if (ret > 0) {
1231                 buf[ret] = '\0';
1232                 rdaddsc(&d->output, buf);
1233             }
1234         }
1235
1236         return width;
1237     } else
1238         return 0;
1239 }
1240
1241 static int info_rdaddsc(info_data *d, char const *cs)
1242 {
1243     if (d->wcmode) {
1244         char buf[256];
1245         int ret;
1246
1247         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1248                                    d->charset, &d->state, NULL);
1249         if (ret > 0) {
1250             buf[ret] = '\0';
1251             rdaddsc(&d->output, buf);
1252         }
1253
1254         d->wcmode = FALSE;
1255     }
1256
1257     if (cs) {
1258         rdaddsc(&d->output, cs);
1259         return strwid(cs, d->charset);
1260     } else
1261         return 0;
1262 }
1263
1264 static int info_rdadd(info_data *d, wchar_t wc)
1265 {
1266     wchar_t wcs[2];
1267     wcs[0] = wc;
1268     wcs[1] = L'\0';
1269     return info_rdadds(d, wcs);
1270 }
1271
1272 static int info_rdaddc(info_data *d, char c)
1273 {
1274     char cs[2];
1275     cs[0] = c;
1276     cs[1] = '\0';
1277     return info_rdaddsc(d, cs);
1278 }