mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * info backend for Halibut
   3  *
   4  * Possible future work:
   5  *
   6  *  - configurable choice of how to allocate node names?
   7  *     + possibly a template-like approach, choosing node names to
   8  *       be the full section title or perhaps the internal keyword?
   9  *     + neither of those seems quite right. Perhaps instead a
  10  *       Windows Help-like mechanism, where a magic config
  11  *       directive allows user choice of name for every node.
  12  *     + Only trouble with that is, now what happens to the section
  13  *       numbers? Do they become completely vestigial and just sit
  14  *       in the title text of each node? Or do we keep them in the
  15  *       menus somehow? I think people might occasionally want to
  16  *       go to a section by number, if only because all the _other_
  17  *       formats of the same document will reference the numbers
  18  *       all the time. So our menu lines could look like one of
  19  *       these:
  20  *        * Nodename: Section 1.2. Title of section.
  21  *        * Section 1.2: Nodename. Title of section.
  22  *
  23  *  - might be helpful to diagnose duplicate node names!
  24  *
  25  *  - more flexibility in heading underlines, like text backend.
  26  *     + Given info.el's fontifier, we'd want the following defaults:
  27  *       \cfg{info-title-underline}{*}
  28  *       \cfg{info-chapter-underline}{=}
  29  *       \cfg{info-section-underline}{0}{-}
  30  *       \cfg{info-section-underline}{1}{.}
  31  *
  32  *  - Indices generated by makeinfo use a menu rather than a bunch of
  33  *    cross-references, which reduces visual clutter rather.  For
  34  *    singly-referenced items, it looks like:
  35  *      * toner cartridge, replacing:              Toner.
  36  *    It does a horrid job on multiply-referenced entries, though,
  37  *    perhaps because the name before the colon is meant to be unique.
  38  *
  39  *  - The string "*note" is matched case-insensitively, so we could
  40  *    make things slightly less ugly by using the lower-case version
  41  *    when the user asks for \k.  Unfortunately, standalone Info seems
  42  *    to match node names case-sensitively, so we can't downcase that.
  43  */
  44
  45 #include <stdio.h>
  46 #include <stdlib.h>
  47 #include <assert.h>
  48 #include "halibut.h"
  49
  50 typedef struct {
  51     char *filename;
  52     int maxfilesize;
  53     int charset;
  54     int listindentbefore, listindentafter;
  55     int indent_code, width, index_width;
  56     wchar_t *bullet, *listsuffix;
  57     wchar_t *startemph, *endemph;
  58     wchar_t *lquote, *rquote;
  59     wchar_t *sectsuffix, *underline;
  60     wchar_t *rule;
  61     wchar_t *index_text;
  62 } infoconfig;
  63
  64 typedef struct {
  65     rdstringc output;
  66     int charset;
  67     charset_state state;
  68     int wcmode;
  69 } info_data;
  70 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  71 static const info_data empty_info_data = EMPTY_INFO_DATA;
  72
  73 typedef struct node_tag node;
  74 struct node_tag {
  75     node *listnext;
  76     node *up, *prev, *next, *lastchild;
  77     int pos, started_menu, filenum;
  78     char *name;
  79     info_data text;
  80 };
  81
  82 typedef struct {
  83     char *text;
  84     int length;
  85     int nnodes, nodesize;
  86     node **nodes;
  87 } info_idx;
  88
  89 static int info_rdadd(info_data *, wchar_t);
  90 static int info_rdadds(info_data *, wchar_t const *);
  91 static int info_rdaddc(info_data *, char);
  92 static int info_rdaddsc(info_data *, char const *);
  93
  94 static void info_heading(info_data *, word *, word *, int, infoconfig *);
  95 static void info_rule(info_data *, int, int, infoconfig *);
  96 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
  97                       int, int, int, infoconfig *);
  98 static void info_codepara(info_data *, word *, int, int);
  99 static void info_versionid(info_data *, word *, infoconfig *);
 100 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
 101 static word *info_transform_wordlist(word *, keywordlist *);
 102 static int info_check_index(word *, node *, indexdata *);
 103
 104 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
 105
 106 static node *info_node_new(char *name, int charset);
 107 static char *info_node_name_for_para(paragraph *p, infoconfig *);
 108 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
 109
 110 static infoconfig info_configure(paragraph *source) {
 111     infoconfig ret;
 112     paragraph *p;
 113
 114     /*
 115      * Defaults.
 116      */
 117     ret.filename = dupstr("output.info");
 118     ret.maxfilesize = 64 << 10;
 119     ret.charset = CS_ASCII;
 120     ret.width = 70;
 121     ret.listindentbefore = 1;
 122     ret.listindentafter = 3;
 123     ret.indent_code = 2;
 124     ret.index_width = 40;
 125     ret.listsuffix = L".";
 126     ret.bullet = L"\x2022\0-\0\0";
 127     ret.rule = L"\x2500\0-\0\0";
 128     ret.startemph = L"_\0_\0\0";
 129     ret.endemph = uadv(ret.startemph);
 130     ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
 131     ret.rquote = uadv(ret.lquote);
 132     ret.sectsuffix = L": ";
 133     ret.underline = L"\x203E\0-\0\0";
 134     ret.index_text = L"Index";
 135
 136     /*
 137      * Two-pass configuration so that we can pick up global config
 138      * (e.g. `quotes') before having it overridden by specific
 139      * config (`info-quotes'), irrespective of the order in which
 140      * they occur.
 141      */
 142     for (p = source; p; p = p->next) {
 143         if (p->type == para_Config) {
 144             if (!ustricmp(p->keyword, L"quotes")) {
 145                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 146                     ret.lquote = uadv(p->keyword);
 147                     ret.rquote = uadv(ret.lquote);
 148                 }
 149             } else if (!ustricmp(p->keyword, L"index")) {
 150                 ret.index_text = uadv(p->keyword);
 151             }
 152         }
 153     }
 154
 155     for (p = source; p; p = p->next) {
 156         if (p->type == para_Config) {
 157             if (!ustricmp(p->keyword, L"info-filename")) {
 158                 sfree(ret.filename);
 159                 ret.filename = dupstr(adv(p->origkeyword));
 160             } else if (!ustricmp(p->keyword, L"info-charset")) {
 161                 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
 162             } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
 163                 ret.maxfilesize = utoi(uadv(p->keyword));
 164             } else if (!ustricmp(p->keyword, L"info-width")) {
 165                 ret.width = utoi(uadv(p->keyword));
 166             } else if (!ustricmp(p->keyword, L"info-indent-code")) {
 167                 ret.indent_code = utoi(uadv(p->keyword));
 168             } else if (!ustricmp(p->keyword, L"info-index-width")) {
 169                 ret.index_width = utoi(uadv(p->keyword));
 170             } else if (!ustricmp(p->keyword, L"info-list-indent")) {
 171                 ret.listindentbefore = utoi(uadv(p->keyword));
 172             } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
 173                 ret.listindentafter = utoi(uadv(p->keyword));
 174             } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
 175                 ret.sectsuffix = uadv(p->keyword);
 176             } else if (!ustricmp(p->keyword, L"info-underline")) {
 177                 ret.underline = uadv(p->keyword);
 178             } else if (!ustricmp(p->keyword, L"info-bullet")) {
 179                 ret.bullet = uadv(p->keyword);
 180             } else if (!ustricmp(p->keyword, L"info-rule")) {
 181                 ret.rule = uadv(p->keyword);
 182             } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
 183                 ret.listsuffix = uadv(p->keyword);
 184             } else if (!ustricmp(p->keyword, L"info-emphasis")) {
 185                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 186                     ret.startemph = uadv(p->keyword);
 187                     ret.endemph = uadv(ret.startemph);
 188                 }
 189             } else if (!ustricmp(p->keyword, L"info-quotes")) {
 190                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 191                     ret.lquote = uadv(p->keyword);
 192                     ret.rquote = uadv(ret.lquote);
 193                 }
 194             }
 195         }
 196     }
 197
 198     /*
 199      * Now process fallbacks on quote characters, underlines, the
 200      * rule character, the emphasis characters, and bullets.
 201      */
 202     while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
 203            (!cvt_ok(ret.charset, ret.lquote) ||
 204             !cvt_ok(ret.charset, ret.rquote))) {
 205         ret.lquote = uadv(ret.rquote);
 206         ret.rquote = uadv(ret.lquote);
 207     }
 208
 209     while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
 210            (!cvt_ok(ret.charset, ret.startemph) ||
 211             !cvt_ok(ret.charset, ret.endemph))) {
 212         ret.startemph = uadv(ret.endemph);
 213         ret.endemph = uadv(ret.startemph);
 214     }
 215
 216     while (*ret.underline && *uadv(ret.underline) &&
 217            !cvt_ok(ret.charset, ret.underline))
 218         ret.underline = uadv(ret.underline);
 219
 220     while (*ret.bullet && *uadv(ret.bullet) &&
 221            !cvt_ok(ret.charset, ret.bullet))
 222         ret.bullet = uadv(ret.bullet);
 223
 224     while (*ret.rule && *uadv(ret.rule) &&
 225            !cvt_ok(ret.charset, ret.rule))
 226         ret.rule = uadv(ret.rule);
 227
 228     return ret;
 229 }
 230
 231 paragraph *info_config_filename(char *filename)
 232 {
 233     return cmdline_cfg_simple("info-filename", filename, NULL);
 234 }
 235
 236 void info_backend(paragraph *sourceform, keywordlist *keywords,
 237                   indexdata *idx, void *unused) {
 238     paragraph *p;
 239     infoconfig conf;
 240     word *prefix, *body, *wp;
 241     word spaceword;
 242     wchar_t *prefixextra;
 243     int nesting, nestindent;
 244     int indentb, indenta;
 245     int filepos;
 246     int has_index;
 247     info_data intro_text = EMPTY_INFO_DATA;
 248     node *topnode, *currnode;
 249     word bullet;
 250     FILE *fp;
 251
 252     IGNORE(unused);
 253
 254     conf = info_configure(sourceform);
 255
 256     /*
 257      * Go through and create a node for each section.
 258      */
 259     topnode = info_node_new("Top", conf.charset);
 260     currnode = topnode;
 261     for (p = sourceform; p; p = p->next) switch (p->type) {
 262         /*
 263          * Chapter titles.
 264          */
 265       case para_Chapter:
 266       case para_Appendix:
 267       case para_UnnumberedChapter:
 268       case para_Heading:
 269       case para_Subsect:
 270         {
 271             node *newnode, *upnode;
 272             char *nodename;
 273
 274             nodename = info_node_name_for_para(p, &conf);
 275             newnode = info_node_new(nodename, conf.charset);
 276             sfree(nodename);
 277
 278             p->private_data = newnode;
 279
 280             if (p->parent)
 281                 upnode = (node *)p->parent->private_data;
 282             else
 283                 upnode = topnode;
 284             assert(upnode);
 285             newnode->up = upnode;
 286
 287             currnode->next = newnode;
 288             newnode->prev = currnode;
 289
 290             currnode->listnext = newnode;
 291             currnode = newnode;
 292         }
 293         break;
 294       default:
 295         p->private_data = NULL;
 296         break;
 297     }
 298
 299     /*
 300      * Set up the display form of each index entry.
 301      */
 302     {
 303         int i;
 304         indexentry *entry;
 305
 306         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 307             info_idx *ii = snew(info_idx);
 308             info_data id = EMPTY_INFO_DATA;
 309
 310             id.charset = conf.charset;
 311
 312             ii->nnodes = ii->nodesize = 0;
 313             ii->nodes = NULL;
 314
 315             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
 316
 317             ii->text = id.output.text;
 318
 319             entry->backend_data = ii;
 320         }
 321     }
 322
 323     /*
 324      * An Info file begins with a piece of introductory text which
 325      * is apparently never shown anywhere. This seems to me to be a
 326      * good place to put the copyright notice and the version IDs.
 327      * Also, Info directory entries are expected to go here.
 328      */
 329     intro_text.charset = conf.charset;
 330
 331     info_rdaddsc(&intro_text,
 332             "This Info file generated by Halibut, ");
 333     info_rdaddsc(&intro_text, version);
 334     info_rdaddsc(&intro_text, "\n\n");
 335
 336     for (p = sourceform; p; p = p->next)
 337         if (p->type == para_Config &&
 338             !ustricmp(p->keyword, L"info-dir-entry")) {
 339             wchar_t *section, *shortname, *longname, *kw;
 340             char *s;
 341
 342             section = uadv(p->keyword);
 343             shortname = *section ? uadv(section) : L"";
 344             longname = *shortname ? uadv(shortname) : L"";
 345             kw = *longname ? uadv(longname) : L"";
 346
 347             if (!*longname) {
 348                 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
 349                 continue;
 350             }
 351
 352             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 353             info_rdadds(&intro_text, section);
 354             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 355             info_rdadds(&intro_text, shortname);
 356             info_rdaddsc(&intro_text, ": (");
 357             s = dupstr(conf.filename);
 358             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 359                 s[strlen(s)-5] = '\0';
 360             info_rdaddsc(&intro_text, s);
 361             sfree(s);
 362             info_rdaddsc(&intro_text, ")");
 363             if (*kw) {
 364                 keyword *kwl = kw_lookup(keywords, kw);
 365                 if (kwl && kwl->para->private_data) {
 366                     node *n = (node *)kwl->para->private_data;
 367                     info_rdaddsc(&intro_text, n->name);
 368                 }
 369             }
 370             info_rdaddsc(&intro_text, ".   ");
 371             info_rdadds(&intro_text, longname);
 372             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 373         }
 374
 375     for (p = sourceform; p; p = p->next)
 376         if (p->type == para_Copyright)
 377             info_para(&intro_text, NULL, NULL, p->words, keywords,
 378                       0, 0, conf.width, &conf);
 379
 380     for (p = sourceform; p; p = p->next)
 381         if (p->type == para_VersionID)
 382             info_versionid(&intro_text, p->words, &conf);
 383
 384     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 385         info_rdaddc(&intro_text, '\n');
 386
 387     /* Do the title */
 388     for (p = sourceform; p; p = p->next)
 389         if (p->type == para_Title)
 390             info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
 391
 392     nestindent = conf.listindentbefore + conf.listindentafter;
 393     nesting = 0;
 394
 395     currnode = topnode;
 396
 397     /* Do the main document */
 398     for (p = sourceform; p; p = p->next) switch (p->type) {
 399
 400       case para_QuotePush:
 401         nesting += 2;
 402         break;
 403       case para_QuotePop:
 404         nesting -= 2;
 405         assert(nesting >= 0);
 406         break;
 407
 408       case para_LcontPush:
 409         nesting += nestindent;
 410         break;
 411       case para_LcontPop:
 412         nesting -= nestindent;
 413         assert(nesting >= 0);
 414         break;
 415
 416         /*
 417          * Things we ignore because we've already processed them or
 418          * aren't going to touch them in this pass.
 419          */
 420       case para_IM:
 421       case para_BR:
 422       case para_Biblio:                /* only touch BiblioCited */
 423       case para_VersionID:
 424       case para_NoCite:
 425       case para_Title:
 426         break;
 427
 428         /*
 429          * Chapter titles.
 430          */
 431       case para_Chapter:
 432       case para_Appendix:
 433       case para_UnnumberedChapter:
 434       case para_Heading:
 435       case para_Subsect:
 436         currnode = p->private_data;
 437         assert(currnode);
 438         assert(currnode->up);
 439
 440         if (!currnode->up->started_menu) {
 441             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 442             currnode->up->started_menu = TRUE;
 443         }
 444         info_menu_item(&currnode->up->text, currnode, p, &conf);
 445
 446         has_index |= info_check_index(p->words, currnode, idx);
 447         info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
 448         nesting = 0;
 449         break;
 450
 451       case para_Rule:
 452         info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
 453         break;
 454
 455       case para_Normal:
 456       case para_Copyright:
 457       case para_DescribedThing:
 458       case para_Description:
 459       case para_BiblioCited:
 460       case para_Bullet:
 461       case para_NumberedList:
 462         has_index |= info_check_index(p->words, currnode, idx);
 463         if (p->type == para_Bullet) {
 464             bullet.next = NULL;
 465             bullet.alt = NULL;
 466             bullet.type = word_Normal;
 467             bullet.text = conf.bullet;
 468             prefix = &bullet;
 469             prefixextra = NULL;
 470             indentb = conf.listindentbefore;
 471             indenta = conf.listindentafter;
 472         } else if (p->type == para_NumberedList) {
 473             prefix = p->kwtext;
 474             prefixextra = conf.listsuffix;
 475             indentb = conf.listindentbefore;
 476             indenta = conf.listindentafter;
 477         } else if (p->type == para_Description) {
 478             prefix = NULL;
 479             prefixextra = NULL;
 480             indentb = conf.listindentbefore;
 481             indenta = conf.listindentafter;
 482         } else {
 483             prefix = NULL;
 484             prefixextra = NULL;
 485             indentb = indenta = 0;
 486         }
 487         if (p->type == para_BiblioCited) {
 488             body = dup_word_list(p->kwtext);
 489             for (wp = body; wp->next; wp = wp->next);
 490             wp->next = &spaceword;
 491             spaceword.next = p->words;
 492             spaceword.alt = NULL;
 493             spaceword.type = word_WhiteSpace;
 494             spaceword.text = NULL;
 495         } else {
 496             wp = NULL;
 497             body = p->words;
 498         }
 499         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 500                   nesting + indentb, indenta,
 501                   conf.width - nesting - indentb - indenta, &conf);
 502         if (wp) {
 503             wp->next = NULL;
 504             free_word_list(body);
 505         }
 506         break;
 507
 508       case para_Code:
 509         info_codepara(&currnode->text, p->words,
 510                       nesting + conf.indent_code,
 511                       conf.width - nesting - 2 * conf.indent_code);
 512         break;
 513     }
 514
 515     /*
 516      * Create an index node if required.
 517      */
 518     if (has_index) {
 519         node *newnode;
 520         int i, j, k;
 521         indexentry *entry;
 522         char *nodename;
 523
 524         nodename = info_node_name_for_text(conf.index_text, &conf);
 525         newnode = info_node_new(nodename, conf.charset);
 526         sfree(nodename);
 527
 528         newnode->up = topnode;
 529
 530         currnode->next = newnode;
 531         newnode->prev = currnode;
 532         currnode->listnext = newnode;
 533
 534         k = info_rdadds(&newnode->text, conf.index_text);
 535         info_rdaddsc(&newnode->text, "\n");
 536         while (k > 0) {
 537             info_rdadds(&newnode->text, conf.underline);
 538             k -= ustrwid(conf.underline, conf.charset);
 539         }
 540         info_rdaddsc(&newnode->text, "\n\n");
 541
 542         info_menu_item(&topnode->text, newnode, NULL, &conf);
 543
 544         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 545             info_idx *ii = (info_idx *)entry->backend_data;
 546
 547             for (j = 0; j < ii->nnodes; j++) {
 548                 /*
 549                  * When we have multiple references for a single
 550                  * index term, we only display the actual term on
 551                  * the first line, to make it clear that the terms
 552                  * really are the same.
 553                  */
 554                 if (j == 0)
 555                     info_rdaddsc(&newnode->text, ii->text);
 556                 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
 557                     info_rdaddc(&newnode->text, ' ');
 558                 info_rdaddsc(&newnode->text, "  *Note ");
 559                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 560                 info_rdaddsc(&newnode->text, "::\n");
 561             }
 562         }
 563     }
 564
 565     /*
 566      * Finalise the text of each node, by adding the ^_ delimiter
 567      * and the node line at the top.
 568      */
 569     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 570         char *origtext = currnode->text.output.text;
 571         currnode->text = empty_info_data;
 572         currnode->text.charset = conf.charset;
 573         info_rdaddsc(&currnode->text, "\037\nFile: ");
 574         info_rdaddsc(&currnode->text, conf.filename);
 575         info_rdaddsc(&currnode->text, ",  Node: ");
 576         info_rdaddsc(&currnode->text, currnode->name);
 577         if (currnode->prev) {
 578             info_rdaddsc(&currnode->text, ",  Prev: ");
 579             info_rdaddsc(&currnode->text, currnode->prev->name);
 580         }
 581         info_rdaddsc(&currnode->text, ",  Up: ");
 582         info_rdaddsc(&currnode->text, (currnode->up ?
 583                                        currnode->up->name : "(dir)"));
 584         if (currnode->next) {
 585             info_rdaddsc(&currnode->text, ",  Next: ");
 586             info_rdaddsc(&currnode->text, currnode->next->name);
 587         }
 588         info_rdaddsc(&currnode->text, "\n\n");
 589         info_rdaddsc(&currnode->text, origtext);
 590         /*
 591          * Just make _absolutely_ sure we end with a newline.
 592          */
 593         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 594             info_rdaddc(&currnode->text, '\n');
 595
 596         sfree(origtext);
 597     }
 598
 599     /*
 600      * Compute the offsets for the tag table.
 601      */
 602     filepos = intro_text.output.pos;
 603     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 604         currnode->pos = filepos;
 605         filepos += currnode->text.output.pos;
 606     }
 607
 608     /*
 609      * Split into sub-files.
 610      */
 611     if (conf.maxfilesize > 0) {
 612         int currfilesize = intro_text.output.pos, currfilenum = 1;
 613         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 614             if (currfilesize > intro_text.output.pos &&
 615                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 616                 currfilenum++;
 617                 currfilesize = intro_text.output.pos;
 618             }
 619             currnode->filenum = currfilenum;
 620             currfilesize += currnode->text.output.pos;
 621         }
 622     }
 623
 624     /*
 625      * Write the primary output file.
 626      */
 627     fp = fopen(conf.filename, "w");
 628     if (!fp) {
 629         error(err_cantopenw, conf.filename);
 630         return;
 631     }
 632     fputs(intro_text.output.text, fp);
 633     if (conf.maxfilesize == 0) {
 634         for (currnode = topnode; currnode; currnode = currnode->listnext)
 635             fputs(currnode->text.output.text, fp);
 636     } else {
 637         int filenum = 0;
 638         fprintf(fp, "\037\nIndirect:\n");
 639         for (currnode = topnode; currnode; currnode = currnode->listnext)
 640             if (filenum != currnode->filenum) {
 641                 filenum = currnode->filenum;
 642                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 643                         currnode->pos);
 644             }
 645     }
 646     fprintf(fp, "\037\nTag Table:\n");
 647     if (conf.maxfilesize > 0)
 648         fprintf(fp, "(Indirect)\n");
 649     for (currnode = topnode; currnode; currnode = currnode->listnext)
 650         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 651     fprintf(fp, "\037\nEnd Tag Table\n");
 652     fclose(fp);
 653
 654     /*
 655      * Write the subfiles.
 656      */
 657     if (conf.maxfilesize > 0) {
 658         int filenum = 0;
 659         fp = NULL;
 660
 661         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 662             if (filenum != currnode->filenum) {
 663                 char *fname;
 664
 665                 filenum = currnode->filenum;
 666
 667                 if (fp)
 668                     fclose(fp);
 669                 fname = snewn(strlen(conf.filename) + 40, char);
 670                 sprintf(fname, "%s-%d", conf.filename, filenum);
 671                 fp = fopen(fname, "w");
 672                 if (!fp) {
 673                     error(err_cantopenw, fname);
 674                     return;
 675                 }
 676                 sfree(fname);
 677                 fputs(intro_text.output.text, fp);
 678             }
 679             fputs(currnode->text.output.text, fp);
 680         }
 681
 682         if (fp)
 683             fclose(fp);
 684     }
 685 }
 686
 687 static int info_check_index(word *w, node *n, indexdata *idx)
 688 {
 689     int ret = 0;
 690
 691     for (; w; w = w->next) {
 692         if (w->type == word_IndexRef) {
 693             indextag *tag;
 694             int i;
 695
 696             tag = index_findtag(idx, w->text);
 697             if (!tag)
 698                 break;
 699
 700             for (i = 0; i < tag->nrefs; i++) {
 701                 indexentry *entry = tag->refs[i];
 702                 info_idx *ii = (info_idx *)entry->backend_data;
 703
 704                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 705                     /*
 706                      * If the same index term is indexed twice
 707                      * within the same section, we only want to
 708                      * mention it once in the index. So do nothing
 709                      * here.
 710                      */
 711                     continue;
 712                 }
 713
 714                 if (ii->nnodes >= ii->nodesize) {
 715                     ii->nodesize += 32;
 716                     ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
 717                 }
 718
 719                 ii->nodes[ii->nnodes++] = n;
 720
 721                 ret = 1;
 722             }
 723         }
 724     }
 725
 726     return ret;
 727 }
 728
 729 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 730 {
 731     word *ret = dup_word_list(words);
 732     word *w;
 733     keyword *kwl;
 734
 735     for (w = ret; w; w = w->next) {
 736         w->private_data = NULL;
 737         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 738             kwl = kw_lookup(keywords, w->text);
 739             if (kwl) {
 740                 if (kwl->para->type == para_NumberedList ||
 741                     kwl->para->type == para_BiblioCited) {
 742                     /*
 743                      * In Info, we do nothing special for xrefs to
 744                      * numbered list items or bibliography entries.
 745                      */
 746                     continue;
 747                 } else {
 748                     /*
 749                      * An xref to a different section has its text
 750                      * completely replaced.
 751                      */
 752                     word *w2, *w3, *w4;
 753                     w2 = w3 = w->next;
 754                     w4 = NULL;
 755                     while (w2) {
 756                         if (w2->type == word_XrefEnd) {
 757                             w4 = w2->next;
 758                             w2->next = NULL;
 759                             break;
 760                         }
 761                         w2 = w2->next;
 762                     }
 763                     free_word_list(w3);
 764
 765                     /*
 766                      * Now w is the UpperXref / LowerXref we
 767                      * started with, and w4 is the next word after
 768                      * the corresponding XrefEnd (if any). The
 769                      * simplest thing is just to stick a pointer to
 770                      * the target node structure in the private
 771                      * data field of the xref word, and let
 772                      * info_rdaddwc and friends read the node name
 773                      * out from there.
 774                      */
 775                     w->next = w4;
 776                     w->private_data = kwl->para->private_data;
 777                     assert(w->private_data);
 778                 }
 779             }
 780         }
 781     }
 782
 783     return ret;
 784 }
 785
 786 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
 787                         infoconfig *cfg) {
 788     int ret = 0;
 789
 790     for (; words && words != end; words = words->next) switch (words->type) {
 791       case word_HyperLink:
 792       case word_HyperEnd:
 793       case word_XrefEnd:
 794       case word_IndexRef:
 795         break;
 796
 797       case word_Normal:
 798       case word_Emph:
 799       case word_Code:
 800       case word_WeakCode:
 801       case word_WhiteSpace:
 802       case word_EmphSpace:
 803       case word_CodeSpace:
 804       case word_WkCodeSpace:
 805       case word_Quote:
 806       case word_EmphQuote:
 807       case word_CodeQuote:
 808       case word_WkCodeQuote:
 809         assert(words->type != word_CodeQuote &&
 810                words->type != word_WkCodeQuote);
 811         if (towordstyle(words->type) == word_Emph &&
 812             (attraux(words->aux) == attr_First ||
 813              attraux(words->aux) == attr_Only))
 814             ret += info_rdadds(id, cfg->startemph);
 815         else if (towordstyle(words->type) == word_Code &&
 816                  (attraux(words->aux) == attr_First ||
 817                   attraux(words->aux) == attr_Only))
 818             ret += info_rdadds(id, cfg->lquote);
 819         if (removeattr(words->type) == word_Normal) {
 820             if (cvt_ok(id->charset, words->text) || !words->alt)
 821                 ret += info_rdadds(id, words->text);
 822             else
 823                 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
 824         } else if (removeattr(words->type) == word_WhiteSpace) {
 825             ret += info_rdadd(id, L' ');
 826         } else if (removeattr(words->type) == word_Quote) {
 827             ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
 828                                cfg->lquote : cfg->rquote);
 829         }
 830         if (towordstyle(words->type) == word_Emph &&
 831             (attraux(words->aux) == attr_Last ||
 832              attraux(words->aux) == attr_Only))
 833             ret += info_rdadds(id, cfg->endemph);
 834         else if (towordstyle(words->type) == word_Code &&
 835                  (attraux(words->aux) == attr_Last ||
 836                   attraux(words->aux) == attr_Only))
 837             ret += info_rdadds(id, cfg->rquote);
 838         break;
 839
 840       case word_UpperXref:
 841       case word_LowerXref:
 842         if (xrefs && words->private_data) {
 843             /*
 844              * This bit is structural and so must be done in char
 845              * rather than wchar_t.
 846              */
 847             ret += info_rdaddsc(id, "*Note ");
 848             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 849             ret += info_rdaddsc(id, "::");
 850         }
 851         break;
 852     }
 853
 854     return ret;
 855 }
 856
 857 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
 858
 859 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
 860     int w = 0;
 861     while (words) {
 862         w += info_width_internal(words, xrefs, cfg);
 863         words = words->next;
 864     }
 865     return w;
 866 }
 867
 868 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
 869     int wid;
 870     int attr;
 871
 872     switch (words->type) {
 873       case word_HyperLink:
 874       case word_HyperEnd:
 875       case word_XrefEnd:
 876       case word_IndexRef:
 877         return 0;
 878
 879       case word_UpperXref:
 880       case word_LowerXref:
 881         if (xrefs && words->private_data) {
 882             /* "*Note " plus "::" comes to 8 characters */
 883             return 8 + strwid(((node *)words->private_data)->name,
 884                               cfg->charset);
 885         } else
 886             return 0;
 887     }
 888
 889     assert(words->type < word_internal_endattrs);
 890
 891     wid = 0;
 892     attr = towordstyle(words->type);
 893
 894     if (attr == word_Emph || attr == word_Code) {
 895         if (attraux(words->aux) == attr_Only ||
 896             attraux(words->aux) == attr_First)
 897             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 898                            cfg->charset);
 899     }
 900     if (attr == word_Emph || attr == word_Code) {
 901         if (attraux(words->aux) == attr_Only ||
 902             attraux(words->aux) == attr_Last)
 903             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 904                            cfg->charset);
 905     }
 906
 907     switch (words->type) {
 908       case word_Normal:
 909       case word_Emph:
 910       case word_Code:
 911       case word_WeakCode:
 912         if (cvt_ok(cfg->charset, words->text) || !words->alt)
 913             wid += ustrwid(words->text, cfg->charset);
 914         else
 915             wid += info_width_internal_list(words->alt, xrefs, cfg);
 916         return wid;
 917
 918       case word_WhiteSpace:
 919       case word_EmphSpace:
 920       case word_CodeSpace:
 921       case word_WkCodeSpace:
 922       case word_Quote:
 923       case word_EmphQuote:
 924       case word_CodeQuote:
 925       case word_WkCodeQuote:
 926         assert(words->type != word_CodeQuote &&
 927                words->type != word_WkCodeQuote);
 928         if (removeattr(words->type) == word_Quote) {
 929             if (quoteaux(words->aux) == quote_Open)
 930                 wid += ustrwid(cfg->lquote, cfg->charset);
 931             else
 932                 wid += ustrwid(cfg->rquote, cfg->charset);
 933         } else
 934             wid++;                     /* space */
 935     }
 936     return wid;
 937 }
 938
 939 static int info_width_noxrefs(void *ctx, word *words)
 940 {
 941     return info_width_internal(words, FALSE, (infoconfig *)ctx);
 942 }
 943 static int info_width_xrefs(void *ctx, word *words)
 944 {
 945     return info_width_internal(words, TRUE, (infoconfig *)ctx);
 946 }
 947
 948 static void info_heading(info_data *text, word *tprefix,
 949                          word *words, int width, infoconfig *cfg) {
 950     int length;
 951     int firstlinewidth, wrapwidth;
 952     wrappedline *wrapping, *p;
 953
 954     length = 0;
 955     if (tprefix) {
 956         length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
 957         length += info_rdadds(text, cfg->sectsuffix);
 958     }
 959
 960     wrapwidth = width;
 961     firstlinewidth = width - length;
 962
 963     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
 964                          info_width_noxrefs, cfg, 0);
 965     for (p = wrapping; p; p = p->next) {
 966         length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
 967         info_rdadd(text, L'\n');
 968         while (length > 0) {
 969             info_rdadds(text, cfg->underline);
 970             length -= ustrwid(cfg->underline, cfg->charset);
 971         }
 972         info_rdadd(text, L'\n');
 973         length = 0;
 974     }
 975     wrap_free(wrapping);
 976     info_rdadd(text, L'\n');
 977 }
 978
 979 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
 980 {
 981     while (indent--) info_rdadd(text, L' ');
 982     while (width > 0) {
 983         info_rdadds(text, cfg->rule);
 984         width -= ustrwid(cfg->rule, cfg->charset);
 985     }
 986     info_rdadd(text, L'\n');
 987     info_rdadd(text, L'\n');
 988 }
 989
 990 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
 991                       word *input, keywordlist *keywords, int indent,
 992                       int extraindent, int width, infoconfig *cfg) {
 993     wrappedline *wrapping, *p;
 994     word *words;
 995     int e;
 996     int i;
 997     int firstlinewidth = width;
 998
 999     words = info_transform_wordlist(input, keywords);
1000
1001     if (prefix) {
1002         for (i = 0; i < indent; i++)
1003             info_rdadd(text, L' ');
1004         e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1005         if (prefixextra)
1006             e += info_rdadds(text, prefixextra);
1007         /* If the prefix is too long, shorten the first line to fit. */
1008         e = extraindent - e;
1009         if (e < 0) {
1010             firstlinewidth += e;       /* this decreases it, since e < 0 */
1011             if (firstlinewidth < 0) {
1012                 e = indent + extraindent;
1013                 firstlinewidth = width;
1014                 info_rdadd(text, L'\n');
1015             } else
1016                 e = 0;
1017         }
1018     } else
1019         e = indent + extraindent;
1020
1021     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1022                          cfg, 0);
1023     for (p = wrapping; p; p = p->next) {
1024         for (i = 0; i < e; i++)
1025             info_rdadd(text, L' ');
1026         info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1027         info_rdadd(text, L'\n');
1028         e = indent + extraindent;
1029     }
1030     wrap_free(wrapping);
1031     info_rdadd(text, L'\n');
1032
1033     free_word_list(words);
1034 }
1035
1036 static void info_codepara(info_data *text, word *words,
1037                           int indent, int width) {
1038     int i;
1039
1040     for (; words; words = words->next) if (words->type == word_WeakCode) {
1041         for (i = 0; i < indent; i++)
1042             info_rdadd(text, L' ');
1043         if (info_rdadds(text, words->text) > width) {
1044             /* FIXME: warn */
1045         }
1046         info_rdadd(text, L'\n');
1047     }
1048
1049     info_rdadd(text, L'\n');
1050 }
1051
1052 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1053     info_rdadd(text, L'[');
1054     info_rdaddwc(text, words, NULL, FALSE, cfg);
1055     info_rdadds(text, L"]\n");
1056 }
1057
1058 static node *info_node_new(char *name, int charset)
1059 {
1060     node *n;
1061
1062     n = snew(node);
1063     n->text = empty_info_data;
1064     n->text.charset = charset;
1065     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1066     n->name = dupstr(name);
1067     n->started_menu = FALSE;
1068
1069     return n;
1070 }
1071
1072 static char *info_node_name_core(info_data *id, filepos *fpos)
1073 {
1074     char *p, *q;
1075
1076     /*
1077      * We cannot have commas, colons or parentheses in a node name.
1078      * Remove any that we find, with a warning.
1079      */
1080     p = q = id->output.text;
1081     while (*p) {
1082         if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1083             error(err_infonodechar, fpos, *p);
1084         } else {
1085             *q++ = *p;
1086         }
1087         p++;
1088     }
1089     *q = '\0';
1090
1091     return id->output.text;
1092 }
1093
1094 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1095 {
1096     info_data id = EMPTY_INFO_DATA;
1097
1098     id.charset = cfg->charset;
1099     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1100                  NULL, FALSE, cfg);
1101     info_rdaddsc(&id, NULL);
1102
1103     return info_node_name_core(&id, &par->fpos);
1104 }
1105
1106 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1107 {
1108     info_data id = EMPTY_INFO_DATA;
1109
1110     id.charset = cfg->charset;
1111     info_rdadds(&id, text);
1112     info_rdaddsc(&id, NULL);
1113
1114     return info_node_name_core(&id, NULL);
1115 }
1116
1117 static void info_menu_item(info_data *text, node *n, paragraph *p,
1118                            infoconfig *cfg)
1119 {
1120     /*
1121      * FIXME: Depending on how we're doing node names in this info
1122      * file, we might want to do
1123      *
1124      *   * Node name:: Chapter title
1125      *
1126      * _or_
1127      *
1128      *   * Chapter number: Node name.
1129      *
1130      * This function mostly works in char rather than wchar_t,
1131      * because a menu item is a structural component.
1132      */
1133     info_rdaddsc(text, "* ");
1134     info_rdaddsc(text, n->name);
1135     info_rdaddsc(text, "::");
1136     if (p) {
1137         info_rdaddc(text, ' ');
1138         info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1139     }
1140     info_rdaddc(text, '\n');
1141 }
1142
1143 /*
1144  * These functions implement my wrapper on the rdadd* calls which
1145  * allows me to switch arbitrarily between literal octet-string
1146  * text and charset-translated Unicode. (Because no matter what
1147  * character set I write the actual text in, I expect info readers
1148  * to treat node names and file names literally and to expect
1149  * keywords like `*Note' in their canonical form, so I have to take
1150  * steps to ensure that those structural elements of the file
1151  * aren't messed with.)
1152  */
1153 static int info_rdadds(info_data *d, wchar_t const *wcs)
1154 {
1155     if (!d->wcmode) {
1156         d->state = charset_init_state;
1157         d->wcmode = TRUE;
1158     }
1159
1160     if (wcs) {
1161         char buf[256];
1162         int len, width, ret;
1163
1164         width = ustrwid(wcs, d->charset);
1165
1166         len = ustrlen(wcs);
1167         while (len > 0) {
1168             int prevlen = len;
1169
1170             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1171                                        d->charset, &d->state, NULL);
1172
1173             assert(len < prevlen);
1174
1175             if (ret > 0) {
1176                 buf[ret] = '\0';
1177                 rdaddsc(&d->output, buf);
1178             }
1179         }
1180
1181         return width;
1182     } else
1183         return 0;
1184 }
1185
1186 static int info_rdaddsc(info_data *d, char const *cs)
1187 {
1188     if (d->wcmode) {
1189         char buf[256];
1190         int ret;
1191
1192         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1193                                    d->charset, &d->state, NULL);
1194         if (ret > 0) {
1195             buf[ret] = '\0';
1196             rdaddsc(&d->output, buf);
1197         }
1198
1199         d->wcmode = FALSE;
1200     }
1201
1202     if (cs) {
1203         rdaddsc(&d->output, cs);
1204         return strwid(cs, d->charset);
1205     } else
1206         return 0;
1207 }
1208
1209 static int info_rdadd(info_data *d, wchar_t wc)
1210 {
1211     wchar_t wcs[2];
1212     wcs[0] = wc;
1213     wcs[1] = L'\0';
1214     return info_rdadds(d, wcs);
1215 }
1216
1217 static int info_rdaddc(info_data *d, char c)
1218 {
1219     char cs[2];
1220     cs[0] = c;
1221     cs[1] = '\0';
1222     return info_rdaddsc(d, cs);
1223 }