mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * info backend for Halibut
   3  *
   4  * Possible future work:
   5  *
   6  *  - configurable choice of how to allocate node names?
   7  *     + possibly a template-like approach, choosing node names to
   8  *       be the full section title or perhaps the internal keyword?
   9  *     + neither of those seems quite right. Perhaps instead a
  10  *       Windows Help-like mechanism, where a magic config
  11  *       directive allows user choice of name for every node.
  12  *     + Only trouble with that is, now what happens to the section
  13  *       numbers? Do they become completely vestigial and just sit
  14  *       in the title text of each node? Or do we keep them in the
  15  *       menus somehow? I think people might occasionally want to
  16  *       go to a section by number, if only because all the _other_
  17  *       formats of the same document will reference the numbers
  18  *       all the time. So our menu lines could look like one of
  19  *       these:
  20  *        * Nodename: Section 1.2. Title of section.
  21  *        * Section 1.2: Nodename. Title of section.
  22  *
  23  *  - might be helpful to diagnose duplicate node names!
  24  *
  25  *  - more flexibility in heading underlines, like text backend.
  26  *     + Given info.el's fontifier, we'd want the following defaults:
  27  *       \cfg{info-title-underline}{*}
  28  *       \cfg{info-chapter-underline}{=}
  29  *       \cfg{info-section-underline}{0}{-}
  30  *       \cfg{info-section-underline}{1}{.}
  31  *
  32  *  - Indices generated by makeinfo use a menu rather than a bunch of
  33  *    cross-references, which reduces visual clutter rather.  For
  34  *    singly-referenced items, it looks like:
  35  *      * toner cartridge, replacing:              Toner.
  36  *    It does a horrid job on multiply-referenced entries, though,
  37  *    perhaps because the name before the colon is meant to be unique.
  38  *    Info's 'i' command requires the use of a menu -- it fails to
  39  *    find any index entries at all with Halibut's current index format.
  40  *
  41  *  - The string "*note" is matched case-insensitively, so we could
  42  *    make things slightly less ugly by using the lower-case version
  43  *    when the user asks for \k.  Unfortunately, standalone Info seems
  44  *    to match node names case-sensitively, so we can't downcase that.
  45  */
  46
  47 #include <stdio.h>
  48 #include <stdlib.h>
  49 #include <assert.h>
  50 #include "halibut.h"
  51
  52 typedef struct {
  53     wchar_t *underline;
  54 } alignstruct;
  55
  56 typedef struct {
  57     char *filename;
  58     int maxfilesize;
  59     int charset;
  60     int listindentbefore, listindentafter;
  61     int indent_code, width, index_width;
  62     alignstruct atitle, achapter, *asect;
  63     int nasect;
  64     wchar_t *bullet, *listsuffix;
  65     wchar_t *startemph, *endemph;
  66     wchar_t *lquote, *rquote;
  67     wchar_t *sectsuffix;
  68     wchar_t *rule;
  69     wchar_t *index_text;
  70 } infoconfig;
  71
  72 typedef struct {
  73     rdstringc output;
  74     int charset;
  75     charset_state state;
  76     int wcmode;
  77 } info_data;
  78 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  79 static const info_data empty_info_data = EMPTY_INFO_DATA;
  80
  81 typedef struct node_tag node;
  82 struct node_tag {
  83     node *listnext;
  84     node *up, *prev, *next, *lastchild;
  85     int pos, started_menu, filenum;
  86     char *name;
  87     info_data text;
  88 };
  89
  90 typedef struct {
  91     char *text;
  92     int length;
  93     int nnodes, nodesize;
  94     node **nodes;
  95 } info_idx;
  96
  97 static int info_rdadd(info_data *, wchar_t);
  98 static int info_rdadds(info_data *, wchar_t const *);
  99 static int info_rdaddc(info_data *, char);
 100 static int info_rdaddsc(info_data *, char const *);
 101
 102 static void info_heading(info_data *, word *, word *, alignstruct, int,
 103                          infoconfig *);
 104 static void info_rule(info_data *, int, int, infoconfig *);
 105 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
 106                       int, int, int, infoconfig *);
 107 static void info_codepara(info_data *, word *, int, int);
 108 static void info_versionid(info_data *, word *, infoconfig *);
 109 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
 110 static word *info_transform_wordlist(word *, keywordlist *);
 111 static int info_check_index(word *, node *, indexdata *);
 112
 113 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
 114
 115 static node *info_node_new(char *name, int charset);
 116 static char *info_node_name_for_para(paragraph *p, infoconfig *);
 117 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
 118
 119 static infoconfig info_configure(paragraph *source) {
 120     infoconfig ret;
 121     paragraph *p;
 122     int n;
 123
 124     /*
 125      * Defaults.
 126      */
 127     ret.filename = dupstr("output.info");
 128     ret.maxfilesize = 64 << 10;
 129     ret.charset = CS_ASCII;
 130     ret.width = 70;
 131     ret.listindentbefore = 1;
 132     ret.listindentafter = 3;
 133     ret.indent_code = 2;
 134     ret.index_width = 40;
 135     ret.listsuffix = L".";
 136     ret.bullet = L"\x2022\0-\0\0";
 137     ret.rule = L"\x2500\0-\0\0";
 138     ret.startemph = L"_\0_\0\0";
 139     ret.endemph = uadv(ret.startemph);
 140     ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
 141     ret.rquote = uadv(ret.lquote);
 142     ret.sectsuffix = L": ";
 143     /*
 144      * Default underline characters are chosen to match those recognised by
 145      * Info-fontify-node.
 146      */
 147     ret.atitle.underline = L"*\0\0";
 148     ret.achapter.underline = L"=\0\0";
 149     ret.nasect = 2;
 150     ret.asect = snewn(ret.nasect, alignstruct);
 151     ret.asect[0].underline = L"-\0\0";
 152     ret.asect[1].underline = L".\0\0";
 153     ret.index_text = L"Index";
 154
 155     /*
 156      * Two-pass configuration so that we can pick up global config
 157      * (e.g. `quotes') before having it overridden by specific
 158      * config (`info-quotes'), irrespective of the order in which
 159      * they occur.
 160      */
 161     for (p = source; p; p = p->next) {
 162         if (p->type == para_Config) {
 163             if (!ustricmp(p->keyword, L"quotes")) {
 164                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 165                     ret.lquote = uadv(p->keyword);
 166                     ret.rquote = uadv(ret.lquote);
 167                 }
 168             } else if (!ustricmp(p->keyword, L"index")) {
 169                 ret.index_text = uadv(p->keyword);
 170             }
 171         }
 172     }
 173
 174     for (p = source; p; p = p->next) {
 175         if (p->type == para_Config) {
 176             if (!ustricmp(p->keyword, L"info-filename")) {
 177                 sfree(ret.filename);
 178                 ret.filename = dupstr(adv(p->origkeyword));
 179             } else if (!ustricmp(p->keyword, L"info-charset")) {
 180                 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
 181             } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
 182                 ret.maxfilesize = utoi(uadv(p->keyword));
 183             } else if (!ustricmp(p->keyword, L"info-width")) {
 184                 ret.width = utoi(uadv(p->keyword));
 185             } else if (!ustricmp(p->keyword, L"info-indent-code")) {
 186                 ret.indent_code = utoi(uadv(p->keyword));
 187             } else if (!ustricmp(p->keyword, L"info-index-width")) {
 188                 ret.index_width = utoi(uadv(p->keyword));
 189             } else if (!ustricmp(p->keyword, L"info-list-indent")) {
 190                 ret.listindentbefore = utoi(uadv(p->keyword));
 191             } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
 192                 ret.listindentafter = utoi(uadv(p->keyword));
 193             } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
 194                 ret.sectsuffix = uadv(p->keyword);
 195             } else if (!ustricmp(p->keyword, L"info-underline")) {
 196                 ret.atitle.underline = ret.achapter.underline =
 197                     uadv(p->keyword);
 198                 for (n = 0; n < ret.nasect; n++)
 199                     ret.asect[n].underline = ret.atitle.underline;
 200             } else if (!ustricmp(p->keyword, L"info-chapter-underline")) {
 201                 ret.achapter.underline = uadv(p->keyword);
 202             } else if (!ustricmp(p->keyword, L"info-section-underline")) {
 203                 wchar_t *q = uadv(p->keyword);
 204                 int n = 0;
 205                 if (uisdigit(*q)) {
 206                     n = utoi(q);
 207                     q = uadv(q);
 208                 }
 209                 if (n >= ret.nasect) {
 210                     int i;
 211                     ret.asect = sresize(ret.asect, n+1, alignstruct);
 212                     for (i = ret.nasect; i <= n; i++)
 213                         ret.asect[i] = ret.asect[ret.nasect-1];
 214                     ret.nasect = n+1;
 215                 }
 216                 ret.asect[n].underline = q;
 217             } else if (!ustricmp(p->keyword, L"text-title-underline")) {
 218                 ret.atitle.underline = uadv(p->keyword);
 219             } else if (!ustricmp(p->keyword, L"info-bullet")) {
 220                 ret.bullet = uadv(p->keyword);
 221             } else if (!ustricmp(p->keyword, L"info-rule")) {
 222                 ret.rule = uadv(p->keyword);
 223             } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
 224                 ret.listsuffix = uadv(p->keyword);
 225             } else if (!ustricmp(p->keyword, L"info-emphasis")) {
 226                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 227                     ret.startemph = uadv(p->keyword);
 228                     ret.endemph = uadv(ret.startemph);
 229                 }
 230             } else if (!ustricmp(p->keyword, L"info-quotes")) {
 231                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 232                     ret.lquote = uadv(p->keyword);
 233                     ret.rquote = uadv(ret.lquote);
 234                 }
 235             }
 236         }
 237     }
 238
 239     /*
 240      * Now process fallbacks on quote characters, underlines, the
 241      * rule character, the emphasis characters, and bullets.
 242      */
 243     while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
 244            (!cvt_ok(ret.charset, ret.lquote) ||
 245             !cvt_ok(ret.charset, ret.rquote))) {
 246         ret.lquote = uadv(ret.rquote);
 247         ret.rquote = uadv(ret.lquote);
 248     }
 249
 250     while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
 251            (!cvt_ok(ret.charset, ret.startemph) ||
 252             !cvt_ok(ret.charset, ret.endemph))) {
 253         ret.startemph = uadv(ret.endemph);
 254         ret.endemph = uadv(ret.startemph);
 255     }
 256
 257     while (*ret.atitle.underline && *uadv(ret.atitle.underline) &&
 258            !cvt_ok(ret.charset, ret.atitle.underline))
 259         ret.atitle.underline = uadv(ret.atitle.underline);
 260
 261     while (*ret.achapter.underline && *uadv(ret.achapter.underline) &&
 262            !cvt_ok(ret.charset, ret.achapter.underline))
 263         ret.achapter.underline = uadv(ret.achapter.underline);
 264
 265     for (n = 0; n < ret.nasect; n++) {
 266         while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) &&
 267                !cvt_ok(ret.charset, ret.asect[n].underline))
 268             ret.asect[n].underline = uadv(ret.asect[n].underline);
 269     }
 270
 271     while (*ret.bullet && *uadv(ret.bullet) &&
 272            !cvt_ok(ret.charset, ret.bullet))
 273         ret.bullet = uadv(ret.bullet);
 274
 275     while (*ret.rule && *uadv(ret.rule) &&
 276            !cvt_ok(ret.charset, ret.rule))
 277         ret.rule = uadv(ret.rule);
 278
 279     return ret;
 280 }
 281
 282 paragraph *info_config_filename(char *filename)
 283 {
 284     return cmdline_cfg_simple("info-filename", filename, NULL);
 285 }
 286
 287 void info_backend(paragraph *sourceform, keywordlist *keywords,
 288                   indexdata *idx, void *unused) {
 289     paragraph *p;
 290     infoconfig conf;
 291     word *prefix, *body, *wp;
 292     word spaceword;
 293     wchar_t *prefixextra;
 294     int nesting, nestindent;
 295     int indentb, indenta;
 296     int filepos;
 297     int has_index;
 298     info_data intro_text = EMPTY_INFO_DATA;
 299     node *topnode, *currnode;
 300     word bullet;
 301     FILE *fp;
 302
 303     IGNORE(unused);
 304
 305     conf = info_configure(sourceform);
 306
 307     /*
 308      * Go through and create a node for each section.
 309      */
 310     topnode = info_node_new("Top", conf.charset);
 311     currnode = topnode;
 312     for (p = sourceform; p; p = p->next) switch (p->type) {
 313         /*
 314          * Chapter titles.
 315          */
 316       case para_Chapter:
 317       case para_Appendix:
 318       case para_UnnumberedChapter:
 319       case para_Heading:
 320       case para_Subsect:
 321         {
 322             node *newnode, *upnode;
 323             char *nodename;
 324
 325             nodename = info_node_name_for_para(p, &conf);
 326             newnode = info_node_new(nodename, conf.charset);
 327             sfree(nodename);
 328
 329             p->private_data = newnode;
 330
 331             if (p->parent)
 332                 upnode = (node *)p->parent->private_data;
 333             else
 334                 upnode = topnode;
 335             assert(upnode);
 336             newnode->up = upnode;
 337
 338             currnode->next = newnode;
 339             newnode->prev = currnode;
 340
 341             currnode->listnext = newnode;
 342             currnode = newnode;
 343         }
 344         break;
 345       default:
 346         p->private_data = NULL;
 347         break;
 348     }
 349
 350     /*
 351      * Set up the display form of each index entry.
 352      */
 353     {
 354         int i;
 355         indexentry *entry;
 356
 357         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 358             info_idx *ii = snew(info_idx);
 359             info_data id = EMPTY_INFO_DATA;
 360
 361             id.charset = conf.charset;
 362
 363             ii->nnodes = ii->nodesize = 0;
 364             ii->nodes = NULL;
 365
 366             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
 367
 368             ii->text = id.output.text;
 369
 370             entry->backend_data = ii;
 371         }
 372     }
 373
 374     /*
 375      * An Info file begins with a piece of introductory text which
 376      * is apparently never shown anywhere. This seems to me to be a
 377      * good place to put the copyright notice and the version IDs.
 378      * Also, Info directory entries are expected to go here.
 379      */
 380     intro_text.charset = conf.charset;
 381
 382     info_rdaddsc(&intro_text,
 383             "This Info file generated by Halibut, ");
 384     info_rdaddsc(&intro_text, version);
 385     info_rdaddsc(&intro_text, "\n\n");
 386
 387     for (p = sourceform; p; p = p->next)
 388         if (p->type == para_Config &&
 389             !ustricmp(p->keyword, L"info-dir-entry")) {
 390             wchar_t *section, *shortname, *longname, *kw;
 391             char *s;
 392
 393             section = uadv(p->keyword);
 394             shortname = *section ? uadv(section) : L"";
 395             longname = *shortname ? uadv(shortname) : L"";
 396             kw = *longname ? uadv(longname) : L"";
 397
 398             if (!*longname) {
 399                 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
 400                 continue;
 401             }
 402
 403             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 404             info_rdadds(&intro_text, section);
 405             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 406             info_rdadds(&intro_text, shortname);
 407             info_rdaddsc(&intro_text, ": (");
 408             s = dupstr(conf.filename);
 409             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 410                 s[strlen(s)-5] = '\0';
 411             info_rdaddsc(&intro_text, s);
 412             sfree(s);
 413             info_rdaddsc(&intro_text, ")");
 414             if (*kw) {
 415                 keyword *kwl = kw_lookup(keywords, kw);
 416                 if (kwl && kwl->para->private_data) {
 417                     node *n = (node *)kwl->para->private_data;
 418                     info_rdaddsc(&intro_text, n->name);
 419                 }
 420             }
 421             info_rdaddsc(&intro_text, ".   ");
 422             info_rdadds(&intro_text, longname);
 423             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 424         }
 425
 426     for (p = sourceform; p; p = p->next)
 427         if (p->type == para_Copyright)
 428             info_para(&intro_text, NULL, NULL, p->words, keywords,
 429                       0, 0, conf.width, &conf);
 430
 431     for (p = sourceform; p; p = p->next)
 432         if (p->type == para_VersionID)
 433             info_versionid(&intro_text, p->words, &conf);
 434
 435     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 436         info_rdaddc(&intro_text, '\n');
 437
 438     /* Do the title */
 439     for (p = sourceform; p; p = p->next)
 440         if (p->type == para_Title)
 441             info_heading(&topnode->text, NULL, p->words,
 442                          conf.atitle, conf.width, &conf);
 443
 444     nestindent = conf.listindentbefore + conf.listindentafter;
 445     nesting = 0;
 446
 447     currnode = topnode;
 448
 449     /* Do the main document */
 450     for (p = sourceform; p; p = p->next) switch (p->type) {
 451
 452       case para_QuotePush:
 453         nesting += 2;
 454         break;
 455       case para_QuotePop:
 456         nesting -= 2;
 457         assert(nesting >= 0);
 458         break;
 459
 460       case para_LcontPush:
 461         nesting += nestindent;
 462         break;
 463       case para_LcontPop:
 464         nesting -= nestindent;
 465         assert(nesting >= 0);
 466         break;
 467
 468         /*
 469          * Things we ignore because we've already processed them or
 470          * aren't going to touch them in this pass.
 471          */
 472       case para_IM:
 473       case para_BR:
 474       case para_Biblio:                /* only touch BiblioCited */
 475       case para_VersionID:
 476       case para_NoCite:
 477       case para_Title:
 478         break;
 479
 480         /*
 481          * Chapter titles.
 482          */
 483       case para_Chapter:
 484       case para_Appendix:
 485       case para_UnnumberedChapter:
 486       case para_Heading:
 487       case para_Subsect:
 488         currnode = p->private_data;
 489         assert(currnode);
 490         assert(currnode->up);
 491
 492         if (!currnode->up->started_menu) {
 493             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 494             currnode->up->started_menu = TRUE;
 495         }
 496         info_menu_item(&currnode->up->text, currnode, p, &conf);
 497
 498         has_index |= info_check_index(p->words, currnode, idx);
 499         if (p->type == para_Chapter || p->type == para_Appendix ||
 500             p->type == para_UnnumberedChapter)
 501             info_heading(&currnode->text, p->kwtext, p->words,
 502                          conf.achapter, conf.width, &conf);
 503         else
 504             info_heading(&currnode->text, p->kwtext, p->words,
 505                          conf.asect[p->aux>=conf.nasect?conf.nasect-1:p->aux],
 506                          conf.width, &conf);
 507         nesting = 0;
 508         break;
 509
 510       case para_Rule:
 511         info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
 512         break;
 513
 514       case para_Normal:
 515       case para_Copyright:
 516       case para_DescribedThing:
 517       case para_Description:
 518       case para_BiblioCited:
 519       case para_Bullet:
 520       case para_NumberedList:
 521         has_index |= info_check_index(p->words, currnode, idx);
 522         if (p->type == para_Bullet) {
 523             bullet.next = NULL;
 524             bullet.alt = NULL;
 525             bullet.type = word_Normal;
 526             bullet.text = conf.bullet;
 527             prefix = &bullet;
 528             prefixextra = NULL;
 529             indentb = conf.listindentbefore;
 530             indenta = conf.listindentafter;
 531         } else if (p->type == para_NumberedList) {
 532             prefix = p->kwtext;
 533             prefixextra = conf.listsuffix;
 534             indentb = conf.listindentbefore;
 535             indenta = conf.listindentafter;
 536         } else if (p->type == para_Description) {
 537             prefix = NULL;
 538             prefixextra = NULL;
 539             indentb = conf.listindentbefore;
 540             indenta = conf.listindentafter;
 541         } else {
 542             prefix = NULL;
 543             prefixextra = NULL;
 544             indentb = indenta = 0;
 545         }
 546         if (p->type == para_BiblioCited) {
 547             body = dup_word_list(p->kwtext);
 548             for (wp = body; wp->next; wp = wp->next);
 549             wp->next = &spaceword;
 550             spaceword.next = p->words;
 551             spaceword.alt = NULL;
 552             spaceword.type = word_WhiteSpace;
 553             spaceword.text = NULL;
 554         } else {
 555             wp = NULL;
 556             body = p->words;
 557         }
 558         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 559                   nesting + indentb, indenta,
 560                   conf.width - nesting - indentb - indenta, &conf);
 561         if (wp) {
 562             wp->next = NULL;
 563             free_word_list(body);
 564         }
 565         break;
 566
 567       case para_Code:
 568         info_codepara(&currnode->text, p->words,
 569                       nesting + conf.indent_code,
 570                       conf.width - nesting - 2 * conf.indent_code);
 571         break;
 572     }
 573
 574     /*
 575      * Create an index node if required.
 576      */
 577     if (has_index) {
 578         node *newnode;
 579         int i, j, k;
 580         indexentry *entry;
 581         char *nodename;
 582
 583         nodename = info_node_name_for_text(conf.index_text, &conf);
 584         newnode = info_node_new(nodename, conf.charset);
 585         sfree(nodename);
 586
 587         newnode->up = topnode;
 588
 589         currnode->next = newnode;
 590         newnode->prev = currnode;
 591         currnode->listnext = newnode;
 592
 593         k = info_rdadds(&newnode->text, conf.index_text);
 594         info_rdaddsc(&newnode->text, "\n");
 595         while (k > 0) {
 596             info_rdadds(&newnode->text, conf.achapter.underline);
 597             k -= ustrwid(conf.achapter.underline, conf.charset);
 598         }
 599         info_rdaddsc(&newnode->text, "\n\n");
 600
 601         info_menu_item(&topnode->text, newnode, NULL, &conf);
 602
 603         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 604             info_idx *ii = (info_idx *)entry->backend_data;
 605
 606             for (j = 0; j < ii->nnodes; j++) {
 607                 /*
 608                  * When we have multiple references for a single
 609                  * index term, we only display the actual term on
 610                  * the first line, to make it clear that the terms
 611                  * really are the same.
 612                  */
 613                 if (j == 0)
 614                     info_rdaddsc(&newnode->text, ii->text);
 615                 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
 616                     info_rdaddc(&newnode->text, ' ');
 617                 info_rdaddsc(&newnode->text, "  *Note ");
 618                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 619                 info_rdaddsc(&newnode->text, "::\n");
 620             }
 621         }
 622     }
 623
 624     /*
 625      * Finalise the text of each node, by adding the ^_ delimiter
 626      * and the node line at the top.
 627      */
 628     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 629         char *origtext = currnode->text.output.text;
 630         currnode->text = empty_info_data;
 631         currnode->text.charset = conf.charset;
 632         info_rdaddsc(&currnode->text, "\037\nFile: ");
 633         info_rdaddsc(&currnode->text, conf.filename);
 634         info_rdaddsc(&currnode->text, ",  Node: ");
 635         info_rdaddsc(&currnode->text, currnode->name);
 636         if (currnode->prev) {
 637             info_rdaddsc(&currnode->text, ",  Prev: ");
 638             info_rdaddsc(&currnode->text, currnode->prev->name);
 639         }
 640         info_rdaddsc(&currnode->text, ",  Up: ");
 641         info_rdaddsc(&currnode->text, (currnode->up ?
 642                                        currnode->up->name : "(dir)"));
 643         if (currnode->next) {
 644             info_rdaddsc(&currnode->text, ",  Next: ");
 645             info_rdaddsc(&currnode->text, currnode->next->name);
 646         }
 647         info_rdaddsc(&currnode->text, "\n\n");
 648         info_rdaddsc(&currnode->text, origtext);
 649         /*
 650          * Just make _absolutely_ sure we end with a newline.
 651          */
 652         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 653             info_rdaddc(&currnode->text, '\n');
 654
 655         sfree(origtext);
 656     }
 657
 658     /*
 659      * Compute the offsets for the tag table.
 660      */
 661     filepos = intro_text.output.pos;
 662     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 663         currnode->pos = filepos;
 664         filepos += currnode->text.output.pos;
 665     }
 666
 667     /*
 668      * Split into sub-files.
 669      */
 670     if (conf.maxfilesize > 0) {
 671         int currfilesize = intro_text.output.pos, currfilenum = 1;
 672         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 673             if (currfilesize > intro_text.output.pos &&
 674                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 675                 currfilenum++;
 676                 currfilesize = intro_text.output.pos;
 677             }
 678             currnode->filenum = currfilenum;
 679             currfilesize += currnode->text.output.pos;
 680         }
 681     }
 682
 683     /*
 684      * Write the primary output file.
 685      */
 686     fp = fopen(conf.filename, "w");
 687     if (!fp) {
 688         error(err_cantopenw, conf.filename);
 689         return;
 690     }
 691     fputs(intro_text.output.text, fp);
 692     if (conf.maxfilesize == 0) {
 693         for (currnode = topnode; currnode; currnode = currnode->listnext)
 694             fputs(currnode->text.output.text, fp);
 695     } else {
 696         int filenum = 0;
 697         fprintf(fp, "\037\nIndirect:\n");
 698         for (currnode = topnode; currnode; currnode = currnode->listnext)
 699             if (filenum != currnode->filenum) {
 700                 filenum = currnode->filenum;
 701                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 702                         currnode->pos);
 703             }
 704     }
 705     fprintf(fp, "\037\nTag Table:\n");
 706     if (conf.maxfilesize > 0)
 707         fprintf(fp, "(Indirect)\n");
 708     for (currnode = topnode; currnode; currnode = currnode->listnext)
 709         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 710     fprintf(fp, "\037\nEnd Tag Table\n");
 711     fclose(fp);
 712
 713     /*
 714      * Write the subfiles.
 715      */
 716     if (conf.maxfilesize > 0) {
 717         int filenum = 0;
 718         fp = NULL;
 719
 720         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 721             if (filenum != currnode->filenum) {
 722                 char *fname;
 723
 724                 filenum = currnode->filenum;
 725
 726                 if (fp)
 727                     fclose(fp);
 728                 fname = snewn(strlen(conf.filename) + 40, char);
 729                 sprintf(fname, "%s-%d", conf.filename, filenum);
 730                 fp = fopen(fname, "w");
 731                 if (!fp) {
 732                     error(err_cantopenw, fname);
 733                     return;
 734                 }
 735                 sfree(fname);
 736                 fputs(intro_text.output.text, fp);
 737             }
 738             fputs(currnode->text.output.text, fp);
 739         }
 740
 741         if (fp)
 742             fclose(fp);
 743     }
 744 }
 745
 746 static int info_check_index(word *w, node *n, indexdata *idx)
 747 {
 748     int ret = 0;
 749
 750     for (; w; w = w->next) {
 751         if (w->type == word_IndexRef) {
 752             indextag *tag;
 753             int i;
 754
 755             tag = index_findtag(idx, w->text);
 756             if (!tag)
 757                 break;
 758
 759             for (i = 0; i < tag->nrefs; i++) {
 760                 indexentry *entry = tag->refs[i];
 761                 info_idx *ii = (info_idx *)entry->backend_data;
 762
 763                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 764                     /*
 765                      * If the same index term is indexed twice
 766                      * within the same section, we only want to
 767                      * mention it once in the index. So do nothing
 768                      * here.
 769                      */
 770                     continue;
 771                 }
 772
 773                 if (ii->nnodes >= ii->nodesize) {
 774                     ii->nodesize += 32;
 775                     ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
 776                 }
 777
 778                 ii->nodes[ii->nnodes++] = n;
 779
 780                 ret = 1;
 781             }
 782         }
 783     }
 784
 785     return ret;
 786 }
 787
 788 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 789 {
 790     word *ret = dup_word_list(words);
 791     word *w;
 792     keyword *kwl;
 793
 794     for (w = ret; w; w = w->next) {
 795         w->private_data = NULL;
 796         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 797             kwl = kw_lookup(keywords, w->text);
 798             if (kwl) {
 799                 if (kwl->para->type == para_NumberedList ||
 800                     kwl->para->type == para_BiblioCited) {
 801                     /*
 802                      * In Info, we do nothing special for xrefs to
 803                      * numbered list items or bibliography entries.
 804                      */
 805                     continue;
 806                 } else {
 807                     /*
 808                      * An xref to a different section has its text
 809                      * completely replaced.
 810                      */
 811                     word *w2, *w3, *w4;
 812                     w2 = w3 = w->next;
 813                     w4 = NULL;
 814                     while (w2) {
 815                         if (w2->type == word_XrefEnd) {
 816                             w4 = w2->next;
 817                             w2->next = NULL;
 818                             break;
 819                         }
 820                         w2 = w2->next;
 821                     }
 822                     free_word_list(w3);
 823
 824                     /*
 825                      * Now w is the UpperXref / LowerXref we
 826                      * started with, and w4 is the next word after
 827                      * the corresponding XrefEnd (if any). The
 828                      * simplest thing is just to stick a pointer to
 829                      * the target node structure in the private
 830                      * data field of the xref word, and let
 831                      * info_rdaddwc and friends read the node name
 832                      * out from there.
 833                      */
 834                     w->next = w4;
 835                     w->private_data = kwl->para->private_data;
 836                     assert(w->private_data);
 837                 }
 838             }
 839         }
 840     }
 841
 842     return ret;
 843 }
 844
 845 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
 846                         infoconfig *cfg) {
 847     int ret = 0;
 848
 849     for (; words && words != end; words = words->next) switch (words->type) {
 850       case word_HyperLink:
 851       case word_HyperEnd:
 852       case word_XrefEnd:
 853       case word_IndexRef:
 854         break;
 855
 856       case word_Normal:
 857       case word_Emph:
 858       case word_Code:
 859       case word_WeakCode:
 860       case word_WhiteSpace:
 861       case word_EmphSpace:
 862       case word_CodeSpace:
 863       case word_WkCodeSpace:
 864       case word_Quote:
 865       case word_EmphQuote:
 866       case word_CodeQuote:
 867       case word_WkCodeQuote:
 868         assert(words->type != word_CodeQuote &&
 869                words->type != word_WkCodeQuote);
 870         if (towordstyle(words->type) == word_Emph &&
 871             (attraux(words->aux) == attr_First ||
 872              attraux(words->aux) == attr_Only))
 873             ret += info_rdadds(id, cfg->startemph);
 874         else if (towordstyle(words->type) == word_Code &&
 875                  (attraux(words->aux) == attr_First ||
 876                   attraux(words->aux) == attr_Only))
 877             ret += info_rdadds(id, cfg->lquote);
 878         if (removeattr(words->type) == word_Normal) {
 879             if (cvt_ok(id->charset, words->text) || !words->alt)
 880                 ret += info_rdadds(id, words->text);
 881             else
 882                 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
 883         } else if (removeattr(words->type) == word_WhiteSpace) {
 884             ret += info_rdadd(id, L' ');
 885         } else if (removeattr(words->type) == word_Quote) {
 886             ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
 887                                cfg->lquote : cfg->rquote);
 888         }
 889         if (towordstyle(words->type) == word_Emph &&
 890             (attraux(words->aux) == attr_Last ||
 891              attraux(words->aux) == attr_Only))
 892             ret += info_rdadds(id, cfg->endemph);
 893         else if (towordstyle(words->type) == word_Code &&
 894                  (attraux(words->aux) == attr_Last ||
 895                   attraux(words->aux) == attr_Only))
 896             ret += info_rdadds(id, cfg->rquote);
 897         break;
 898
 899       case word_UpperXref:
 900       case word_LowerXref:
 901         if (xrefs && words->private_data) {
 902             /*
 903              * This bit is structural and so must be done in char
 904              * rather than wchar_t.
 905              */
 906             ret += info_rdaddsc(id, "*Note ");
 907             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 908             ret += info_rdaddsc(id, "::");
 909         }
 910         break;
 911     }
 912
 913     return ret;
 914 }
 915
 916 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
 917
 918 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
 919     int w = 0;
 920     while (words) {
 921         w += info_width_internal(words, xrefs, cfg);
 922         words = words->next;
 923     }
 924     return w;
 925 }
 926
 927 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
 928     int wid;
 929     int attr;
 930
 931     switch (words->type) {
 932       case word_HyperLink:
 933       case word_HyperEnd:
 934       case word_XrefEnd:
 935       case word_IndexRef:
 936         return 0;
 937
 938       case word_UpperXref:
 939       case word_LowerXref:
 940         if (xrefs && words->private_data) {
 941             /* "*Note " plus "::" comes to 8 characters */
 942             return 8 + strwid(((node *)words->private_data)->name,
 943                               cfg->charset);
 944         } else
 945             return 0;
 946     }
 947
 948     assert(words->type < word_internal_endattrs);
 949
 950     wid = 0;
 951     attr = towordstyle(words->type);
 952
 953     if (attr == word_Emph || attr == word_Code) {
 954         if (attraux(words->aux) == attr_Only ||
 955             attraux(words->aux) == attr_First)
 956             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 957                            cfg->charset);
 958     }
 959     if (attr == word_Emph || attr == word_Code) {
 960         if (attraux(words->aux) == attr_Only ||
 961             attraux(words->aux) == attr_Last)
 962             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 963                            cfg->charset);
 964     }
 965
 966     switch (words->type) {
 967       case word_Normal:
 968       case word_Emph:
 969       case word_Code:
 970       case word_WeakCode:
 971         if (cvt_ok(cfg->charset, words->text) || !words->alt)
 972             wid += ustrwid(words->text, cfg->charset);
 973         else
 974             wid += info_width_internal_list(words->alt, xrefs, cfg);
 975         return wid;
 976
 977       case word_WhiteSpace:
 978       case word_EmphSpace:
 979       case word_CodeSpace:
 980       case word_WkCodeSpace:
 981       case word_Quote:
 982       case word_EmphQuote:
 983       case word_CodeQuote:
 984       case word_WkCodeQuote:
 985         assert(words->type != word_CodeQuote &&
 986                words->type != word_WkCodeQuote);
 987         if (removeattr(words->type) == word_Quote) {
 988             if (quoteaux(words->aux) == quote_Open)
 989                 wid += ustrwid(cfg->lquote, cfg->charset);
 990             else
 991                 wid += ustrwid(cfg->rquote, cfg->charset);
 992         } else
 993             wid++;                     /* space */
 994     }
 995     return wid;
 996 }
 997
 998 static int info_width_noxrefs(void *ctx, word *words)
 999 {
1000     return info_width_internal(words, FALSE, (infoconfig *)ctx);
1001 }
1002 static int info_width_xrefs(void *ctx, word *words)
1003 {
1004     return info_width_internal(words, TRUE, (infoconfig *)ctx);
1005 }
1006
1007 static void info_heading(info_data *text, word *tprefix,
1008                          word *words, alignstruct align,
1009                          int width, infoconfig *cfg) {
1010     int length;
1011     int firstlinewidth, wrapwidth;
1012     wrappedline *wrapping, *p;
1013
1014     length = 0;
1015     if (tprefix) {
1016         length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
1017         length += info_rdadds(text, cfg->sectsuffix);
1018     }
1019
1020     wrapwidth = width;
1021     firstlinewidth = width - length;
1022
1023     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
1024                          info_width_noxrefs, cfg, 0);
1025     for (p = wrapping; p; p = p->next) {
1026         length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
1027         info_rdadd(text, L'\n');
1028         if (*align.underline) {
1029             while (length > 0) {
1030                 info_rdadds(text, align.underline);
1031                 length -= ustrwid(align.underline, cfg->charset);
1032             }
1033             info_rdadd(text, L'\n');
1034         }
1035         length = 0;
1036     }
1037     wrap_free(wrapping);
1038     info_rdadd(text, L'\n');
1039 }
1040
1041 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
1042 {
1043     while (indent--) info_rdadd(text, L' ');
1044     while (width > 0) {
1045         info_rdadds(text, cfg->rule);
1046         width -= ustrwid(cfg->rule, cfg->charset);
1047     }
1048     info_rdadd(text, L'\n');
1049     info_rdadd(text, L'\n');
1050 }
1051
1052 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
1053                       word *input, keywordlist *keywords, int indent,
1054                       int extraindent, int width, infoconfig *cfg) {
1055     wrappedline *wrapping, *p;
1056     word *words;
1057     int e;
1058     int i;
1059     int firstlinewidth = width;
1060
1061     words = info_transform_wordlist(input, keywords);
1062
1063     if (prefix) {
1064         for (i = 0; i < indent; i++)
1065             info_rdadd(text, L' ');
1066         e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1067         if (prefixextra)
1068             e += info_rdadds(text, prefixextra);
1069         /* If the prefix is too long, shorten the first line to fit. */
1070         e = extraindent - e;
1071         if (e < 0) {
1072             firstlinewidth += e;       /* this decreases it, since e < 0 */
1073             if (firstlinewidth < 0) {
1074                 e = indent + extraindent;
1075                 firstlinewidth = width;
1076                 info_rdadd(text, L'\n');
1077             } else
1078                 e = 0;
1079         }
1080     } else
1081         e = indent + extraindent;
1082
1083     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1084                          cfg, 0);
1085     for (p = wrapping; p; p = p->next) {
1086         for (i = 0; i < e; i++)
1087             info_rdadd(text, L' ');
1088         info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1089         info_rdadd(text, L'\n');
1090         e = indent + extraindent;
1091     }
1092     wrap_free(wrapping);
1093     info_rdadd(text, L'\n');
1094
1095     free_word_list(words);
1096 }
1097
1098 static void info_codepara(info_data *text, word *words,
1099                           int indent, int width) {
1100     int i;
1101
1102     for (; words; words = words->next) if (words->type == word_WeakCode) {
1103         for (i = 0; i < indent; i++)
1104             info_rdadd(text, L' ');
1105         if (info_rdadds(text, words->text) > width) {
1106             /* FIXME: warn */
1107         }
1108         info_rdadd(text, L'\n');
1109     }
1110
1111     info_rdadd(text, L'\n');
1112 }
1113
1114 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1115     info_rdadd(text, L'[');
1116     info_rdaddwc(text, words, NULL, FALSE, cfg);
1117     info_rdadds(text, L"]\n");
1118 }
1119
1120 static node *info_node_new(char *name, int charset)
1121 {
1122     node *n;
1123
1124     n = snew(node);
1125     n->text = empty_info_data;
1126     n->text.charset = charset;
1127     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1128     n->name = dupstr(name);
1129     n->started_menu = FALSE;
1130
1131     return n;
1132 }
1133
1134 static char *info_node_name_core(info_data *id, filepos *fpos)
1135 {
1136     char *p, *q;
1137
1138     /*
1139      * We cannot have commas, colons or parentheses in a node name.
1140      * Remove any that we find, with a warning.
1141      */
1142     p = q = id->output.text;
1143     while (*p) {
1144         if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1145             error(err_infonodechar, fpos, *p);
1146         } else {
1147             *q++ = *p;
1148         }
1149         p++;
1150     }
1151     *q = '\0';
1152
1153     return id->output.text;
1154 }
1155
1156 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1157 {
1158     info_data id = EMPTY_INFO_DATA;
1159
1160     id.charset = cfg->charset;
1161     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1162                  NULL, FALSE, cfg);
1163     info_rdaddsc(&id, NULL);
1164
1165     return info_node_name_core(&id, &par->fpos);
1166 }
1167
1168 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1169 {
1170     info_data id = EMPTY_INFO_DATA;
1171
1172     id.charset = cfg->charset;
1173     info_rdadds(&id, text);
1174     info_rdaddsc(&id, NULL);
1175
1176     return info_node_name_core(&id, NULL);
1177 }
1178
1179 static void info_menu_item(info_data *text, node *n, paragraph *p,
1180                            infoconfig *cfg)
1181 {
1182     /*
1183      * FIXME: Depending on how we're doing node names in this info
1184      * file, we might want to do
1185      *
1186      *   * Node name:: Chapter title
1187      *
1188      * _or_
1189      *
1190      *   * Chapter number: Node name.
1191      *
1192      * This function mostly works in char rather than wchar_t,
1193      * because a menu item is a structural component.
1194      */
1195     info_rdaddsc(text, "* ");
1196     info_rdaddsc(text, n->name);
1197     info_rdaddsc(text, "::");
1198     if (p) {
1199         info_rdaddc(text, ' ');
1200         info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1201     }
1202     info_rdaddc(text, '\n');
1203 }
1204
1205 /*
1206  * These functions implement my wrapper on the rdadd* calls which
1207  * allows me to switch arbitrarily between literal octet-string
1208  * text and charset-translated Unicode. (Because no matter what
1209  * character set I write the actual text in, I expect info readers
1210  * to treat node names and file names literally and to expect
1211  * keywords like `*Note' in their canonical form, so I have to take
1212  * steps to ensure that those structural elements of the file
1213  * aren't messed with.)
1214  */
1215 static int info_rdadds(info_data *d, wchar_t const *wcs)
1216 {
1217     if (!d->wcmode) {
1218         d->state = charset_init_state;
1219         d->wcmode = TRUE;
1220     }
1221
1222     if (wcs) {
1223         char buf[256];
1224         int len, width, ret;
1225
1226         width = ustrwid(wcs, d->charset);
1227
1228         len = ustrlen(wcs);
1229         while (len > 0) {
1230             int prevlen = len;
1231
1232             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1233                                        d->charset, &d->state, NULL);
1234
1235             assert(len < prevlen);
1236
1237             if (ret > 0) {
1238                 buf[ret] = '\0';
1239                 rdaddsc(&d->output, buf);
1240             }
1241         }
1242
1243         return width;
1244     } else
1245         return 0;
1246 }
1247
1248 static int info_rdaddsc(info_data *d, char const *cs)
1249 {
1250     if (d->wcmode) {
1251         char buf[256];
1252         int ret;
1253
1254         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1255                                    d->charset, &d->state, NULL);
1256         if (ret > 0) {
1257             buf[ret] = '\0';
1258             rdaddsc(&d->output, buf);
1259         }
1260
1261         d->wcmode = FALSE;
1262     }
1263
1264     if (cs) {
1265         rdaddsc(&d->output, cs);
1266         return strwid(cs, d->charset);
1267     } else
1268         return 0;
1269 }
1270
1271 static int info_rdadd(info_data *d, wchar_t wc)
1272 {
1273     wchar_t wcs[2];
1274     wcs[0] = wc;
1275     wcs[1] = L'\0';
1276     return info_rdadds(d, wcs);
1277 }
1278
1279 static int info_rdaddc(info_data *d, char c)
1280 {
1281     char cs[2];
1282     cs[0] = c;
1283     cs[1] = '\0';
1284     return info_rdaddsc(d, cs);
1285 }