mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * info backend for Halibut
   3  *
   4  * Possible future work:
   5  *
   6  *  - configurable choice of how to allocate node names?
   7  *     + possibly a template-like approach, choosing node names to
   8  *       be the full section title or perhaps the internal keyword?
   9  *     + neither of those seems quite right. Perhaps instead a
  10  *       Windows Help-like mechanism, where a magic config
  11  *       directive allows user choice of name for every node.
  12  *     + Only trouble with that is, now what happens to the section
  13  *       numbers? Do they become completely vestigial and just sit
  14  *       in the title text of each node? Or do we keep them in the
  15  *       menus somehow? I think people might occasionally want to
  16  *       go to a section by number, if only because all the _other_
  17  *       formats of the same document will reference the numbers
  18  *       all the time. So our menu lines could look like one of
  19  *       these:
  20  *        * Nodename: Section 1.2. Title of section.
  21  *        * Section 1.2: Nodename. Title of section.
  22  *
  23  *  - might be helpful to diagnose duplicate node names!
  24  *
  25  *  - more flexibility in heading underlines, like text backend.
  26  *     + Given info.el's fontifier, we'd want the following defaults:
  27  *       \cfg{info-title-underline}{*}
  28  *       \cfg{info-chapter-underline}{=}
  29  *       \cfg{info-section-underline}{0}{-}
  30  *       \cfg{info-section-underline}{1}{.}
  31  *
  32  *  - Indices generated by makeinfo use a menu rather than a bunch of
  33  *    cross-references, which reduces visual clutter rather.  For
  34  *    singly-referenced items, it looks like:
  35  *      * toner cartridge, replacing:              Toner.
  36  *    It does a horrid job on multiply-referenced entries, though,
  37  *    perhaps because the name before the colon is meant to be unique.
  38  *    Info's 'i' command requires the use of a menu -- it fails to
  39  *    find any index entries at all with Halibut's current index format.
  40  *
  41  *  - The string "*note" is matched case-insensitively, so we could
  42  *    make things slightly less ugly by using the lower-case version
  43  *    when the user asks for \k.  Unfortunately, standalone Info seems
  44  *    to match node names case-sensitively, so we can't downcase that.
  45  */
  46
  47 #include <stdio.h>
  48 #include <stdlib.h>
  49 #include <assert.h>
  50 #include "halibut.h"
  51
  52 typedef struct {
  53     char *filename;
  54     int maxfilesize;
  55     int charset;
  56     int listindentbefore, listindentafter;
  57     int indent_code, width, index_width;
  58     wchar_t *bullet, *listsuffix;
  59     wchar_t *startemph, *endemph;
  60     wchar_t *lquote, *rquote;
  61     wchar_t *sectsuffix, *underline;
  62     wchar_t *rule;
  63     wchar_t *index_text;
  64 } infoconfig;
  65
  66 typedef struct {
  67     rdstringc output;
  68     int charset;
  69     charset_state state;
  70     int wcmode;
  71 } info_data;
  72 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  73 static const info_data empty_info_data = EMPTY_INFO_DATA;
  74
  75 typedef struct node_tag node;
  76 struct node_tag {
  77     node *listnext;
  78     node *up, *prev, *next, *lastchild;
  79     int pos, started_menu, filenum;
  80     char *name;
  81     info_data text;
  82 };
  83
  84 typedef struct {
  85     char *text;
  86     int length;
  87     int nnodes, nodesize;
  88     node **nodes;
  89 } info_idx;
  90
  91 static int info_rdadd(info_data *, wchar_t);
  92 static int info_rdadds(info_data *, wchar_t const *);
  93 static int info_rdaddc(info_data *, char);
  94 static int info_rdaddsc(info_data *, char const *);
  95
  96 static void info_heading(info_data *, word *, word *, int, infoconfig *);
  97 static void info_rule(info_data *, int, int, infoconfig *);
  98 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
  99                       int, int, int, infoconfig *);
 100 static void info_codepara(info_data *, word *, int, int);
 101 static void info_versionid(info_data *, word *, infoconfig *);
 102 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
 103 static word *info_transform_wordlist(word *, keywordlist *);
 104 static int info_check_index(word *, node *, indexdata *);
 105
 106 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
 107
 108 static node *info_node_new(char *name, int charset);
 109 static char *info_node_name_for_para(paragraph *p, infoconfig *);
 110 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
 111
 112 static infoconfig info_configure(paragraph *source) {
 113     infoconfig ret;
 114     paragraph *p;
 115
 116     /*
 117      * Defaults.
 118      */
 119     ret.filename = dupstr("output.info");
 120     ret.maxfilesize = 64 << 10;
 121     ret.charset = CS_ASCII;
 122     ret.width = 70;
 123     ret.listindentbefore = 1;
 124     ret.listindentafter = 3;
 125     ret.indent_code = 2;
 126     ret.index_width = 40;
 127     ret.listsuffix = L".";
 128     ret.bullet = L"\x2022\0-\0\0";
 129     ret.rule = L"\x2500\0-\0\0";
 130     ret.startemph = L"_\0_\0\0";
 131     ret.endemph = uadv(ret.startemph);
 132     ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
 133     ret.rquote = uadv(ret.lquote);
 134     ret.sectsuffix = L": ";
 135     ret.underline = L"\x203E\0-\0\0";
 136     ret.index_text = L"Index";
 137
 138     /*
 139      * Two-pass configuration so that we can pick up global config
 140      * (e.g. `quotes') before having it overridden by specific
 141      * config (`info-quotes'), irrespective of the order in which
 142      * they occur.
 143      */
 144     for (p = source; p; p = p->next) {
 145         if (p->type == para_Config) {
 146             if (!ustricmp(p->keyword, L"quotes")) {
 147                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 148                     ret.lquote = uadv(p->keyword);
 149                     ret.rquote = uadv(ret.lquote);
 150                 }
 151             } else if (!ustricmp(p->keyword, L"index")) {
 152                 ret.index_text = uadv(p->keyword);
 153             }
 154         }
 155     }
 156
 157     for (p = source; p; p = p->next) {
 158         if (p->type == para_Config) {
 159             if (!ustricmp(p->keyword, L"info-filename")) {
 160                 sfree(ret.filename);
 161                 ret.filename = dupstr(adv(p->origkeyword));
 162             } else if (!ustricmp(p->keyword, L"info-charset")) {
 163                 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
 164             } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
 165                 ret.maxfilesize = utoi(uadv(p->keyword));
 166             } else if (!ustricmp(p->keyword, L"info-width")) {
 167                 ret.width = utoi(uadv(p->keyword));
 168             } else if (!ustricmp(p->keyword, L"info-indent-code")) {
 169                 ret.indent_code = utoi(uadv(p->keyword));
 170             } else if (!ustricmp(p->keyword, L"info-index-width")) {
 171                 ret.index_width = utoi(uadv(p->keyword));
 172             } else if (!ustricmp(p->keyword, L"info-list-indent")) {
 173                 ret.listindentbefore = utoi(uadv(p->keyword));
 174             } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
 175                 ret.listindentafter = utoi(uadv(p->keyword));
 176             } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
 177                 ret.sectsuffix = uadv(p->keyword);
 178             } else if (!ustricmp(p->keyword, L"info-underline")) {
 179                 ret.underline = uadv(p->keyword);
 180             } else if (!ustricmp(p->keyword, L"info-bullet")) {
 181                 ret.bullet = uadv(p->keyword);
 182             } else if (!ustricmp(p->keyword, L"info-rule")) {
 183                 ret.rule = uadv(p->keyword);
 184             } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
 185                 ret.listsuffix = uadv(p->keyword);
 186             } else if (!ustricmp(p->keyword, L"info-emphasis")) {
 187                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 188                     ret.startemph = uadv(p->keyword);
 189                     ret.endemph = uadv(ret.startemph);
 190                 }
 191             } else if (!ustricmp(p->keyword, L"info-quotes")) {
 192                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 193                     ret.lquote = uadv(p->keyword);
 194                     ret.rquote = uadv(ret.lquote);
 195                 }
 196             }
 197         }
 198     }
 199
 200     /*
 201      * Now process fallbacks on quote characters, underlines, the
 202      * rule character, the emphasis characters, and bullets.
 203      */
 204     while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
 205            (!cvt_ok(ret.charset, ret.lquote) ||
 206             !cvt_ok(ret.charset, ret.rquote))) {
 207         ret.lquote = uadv(ret.rquote);
 208         ret.rquote = uadv(ret.lquote);
 209     }
 210
 211     while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
 212            (!cvt_ok(ret.charset, ret.startemph) ||
 213             !cvt_ok(ret.charset, ret.endemph))) {
 214         ret.startemph = uadv(ret.endemph);
 215         ret.endemph = uadv(ret.startemph);
 216     }
 217
 218     while (*ret.underline && *uadv(ret.underline) &&
 219            !cvt_ok(ret.charset, ret.underline))
 220         ret.underline = uadv(ret.underline);
 221
 222     while (*ret.bullet && *uadv(ret.bullet) &&
 223            !cvt_ok(ret.charset, ret.bullet))
 224         ret.bullet = uadv(ret.bullet);
 225
 226     while (*ret.rule && *uadv(ret.rule) &&
 227            !cvt_ok(ret.charset, ret.rule))
 228         ret.rule = uadv(ret.rule);
 229
 230     return ret;
 231 }
 232
 233 paragraph *info_config_filename(char *filename)
 234 {
 235     return cmdline_cfg_simple("info-filename", filename, NULL);
 236 }
 237
 238 void info_backend(paragraph *sourceform, keywordlist *keywords,
 239                   indexdata *idx, void *unused) {
 240     paragraph *p;
 241     infoconfig conf;
 242     word *prefix, *body, *wp;
 243     word spaceword;
 244     wchar_t *prefixextra;
 245     int nesting, nestindent;
 246     int indentb, indenta;
 247     int filepos;
 248     int has_index;
 249     info_data intro_text = EMPTY_INFO_DATA;
 250     node *topnode, *currnode;
 251     word bullet;
 252     FILE *fp;
 253
 254     IGNORE(unused);
 255
 256     conf = info_configure(sourceform);
 257
 258     /*
 259      * Go through and create a node for each section.
 260      */
 261     topnode = info_node_new("Top", conf.charset);
 262     currnode = topnode;
 263     for (p = sourceform; p; p = p->next) switch (p->type) {
 264         /*
 265          * Chapter titles.
 266          */
 267       case para_Chapter:
 268       case para_Appendix:
 269       case para_UnnumberedChapter:
 270       case para_Heading:
 271       case para_Subsect:
 272         {
 273             node *newnode, *upnode;
 274             char *nodename;
 275
 276             nodename = info_node_name_for_para(p, &conf);
 277             newnode = info_node_new(nodename, conf.charset);
 278             sfree(nodename);
 279
 280             p->private_data = newnode;
 281
 282             if (p->parent)
 283                 upnode = (node *)p->parent->private_data;
 284             else
 285                 upnode = topnode;
 286             assert(upnode);
 287             newnode->up = upnode;
 288
 289             currnode->next = newnode;
 290             newnode->prev = currnode;
 291
 292             currnode->listnext = newnode;
 293             currnode = newnode;
 294         }
 295         break;
 296       default:
 297         p->private_data = NULL;
 298         break;
 299     }
 300
 301     /*
 302      * Set up the display form of each index entry.
 303      */
 304     {
 305         int i;
 306         indexentry *entry;
 307
 308         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 309             info_idx *ii = snew(info_idx);
 310             info_data id = EMPTY_INFO_DATA;
 311
 312             id.charset = conf.charset;
 313
 314             ii->nnodes = ii->nodesize = 0;
 315             ii->nodes = NULL;
 316
 317             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
 318
 319             ii->text = id.output.text;
 320
 321             entry->backend_data = ii;
 322         }
 323     }
 324
 325     /*
 326      * An Info file begins with a piece of introductory text which
 327      * is apparently never shown anywhere. This seems to me to be a
 328      * good place to put the copyright notice and the version IDs.
 329      * Also, Info directory entries are expected to go here.
 330      */
 331     intro_text.charset = conf.charset;
 332
 333     info_rdaddsc(&intro_text,
 334             "This Info file generated by Halibut, ");
 335     info_rdaddsc(&intro_text, version);
 336     info_rdaddsc(&intro_text, "\n\n");
 337
 338     for (p = sourceform; p; p = p->next)
 339         if (p->type == para_Config &&
 340             !ustricmp(p->keyword, L"info-dir-entry")) {
 341             wchar_t *section, *shortname, *longname, *kw;
 342             char *s;
 343
 344             section = uadv(p->keyword);
 345             shortname = *section ? uadv(section) : L"";
 346             longname = *shortname ? uadv(shortname) : L"";
 347             kw = *longname ? uadv(longname) : L"";
 348
 349             if (!*longname) {
 350                 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
 351                 continue;
 352             }
 353
 354             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 355             info_rdadds(&intro_text, section);
 356             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 357             info_rdadds(&intro_text, shortname);
 358             info_rdaddsc(&intro_text, ": (");
 359             s = dupstr(conf.filename);
 360             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 361                 s[strlen(s)-5] = '\0';
 362             info_rdaddsc(&intro_text, s);
 363             sfree(s);
 364             info_rdaddsc(&intro_text, ")");
 365             if (*kw) {
 366                 keyword *kwl = kw_lookup(keywords, kw);
 367                 if (kwl && kwl->para->private_data) {
 368                     node *n = (node *)kwl->para->private_data;
 369                     info_rdaddsc(&intro_text, n->name);
 370                 }
 371             }
 372             info_rdaddsc(&intro_text, ".   ");
 373             info_rdadds(&intro_text, longname);
 374             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 375         }
 376
 377     for (p = sourceform; p; p = p->next)
 378         if (p->type == para_Copyright)
 379             info_para(&intro_text, NULL, NULL, p->words, keywords,
 380                       0, 0, conf.width, &conf);
 381
 382     for (p = sourceform; p; p = p->next)
 383         if (p->type == para_VersionID)
 384             info_versionid(&intro_text, p->words, &conf);
 385
 386     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 387         info_rdaddc(&intro_text, '\n');
 388
 389     /* Do the title */
 390     for (p = sourceform; p; p = p->next)
 391         if (p->type == para_Title)
 392             info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
 393
 394     nestindent = conf.listindentbefore + conf.listindentafter;
 395     nesting = 0;
 396
 397     currnode = topnode;
 398
 399     /* Do the main document */
 400     for (p = sourceform; p; p = p->next) switch (p->type) {
 401
 402       case para_QuotePush:
 403         nesting += 2;
 404         break;
 405       case para_QuotePop:
 406         nesting -= 2;
 407         assert(nesting >= 0);
 408         break;
 409
 410       case para_LcontPush:
 411         nesting += nestindent;
 412         break;
 413       case para_LcontPop:
 414         nesting -= nestindent;
 415         assert(nesting >= 0);
 416         break;
 417
 418         /*
 419          * Things we ignore because we've already processed them or
 420          * aren't going to touch them in this pass.
 421          */
 422       case para_IM:
 423       case para_BR:
 424       case para_Biblio:                /* only touch BiblioCited */
 425       case para_VersionID:
 426       case para_NoCite:
 427       case para_Title:
 428         break;
 429
 430         /*
 431          * Chapter titles.
 432          */
 433       case para_Chapter:
 434       case para_Appendix:
 435       case para_UnnumberedChapter:
 436       case para_Heading:
 437       case para_Subsect:
 438         currnode = p->private_data;
 439         assert(currnode);
 440         assert(currnode->up);
 441
 442         if (!currnode->up->started_menu) {
 443             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 444             currnode->up->started_menu = TRUE;
 445         }
 446         info_menu_item(&currnode->up->text, currnode, p, &conf);
 447
 448         has_index |= info_check_index(p->words, currnode, idx);
 449         info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
 450         nesting = 0;
 451         break;
 452
 453       case para_Rule:
 454         info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
 455         break;
 456
 457       case para_Normal:
 458       case para_Copyright:
 459       case para_DescribedThing:
 460       case para_Description:
 461       case para_BiblioCited:
 462       case para_Bullet:
 463       case para_NumberedList:
 464         has_index |= info_check_index(p->words, currnode, idx);
 465         if (p->type == para_Bullet) {
 466             bullet.next = NULL;
 467             bullet.alt = NULL;
 468             bullet.type = word_Normal;
 469             bullet.text = conf.bullet;
 470             prefix = &bullet;
 471             prefixextra = NULL;
 472             indentb = conf.listindentbefore;
 473             indenta = conf.listindentafter;
 474         } else if (p->type == para_NumberedList) {
 475             prefix = p->kwtext;
 476             prefixextra = conf.listsuffix;
 477             indentb = conf.listindentbefore;
 478             indenta = conf.listindentafter;
 479         } else if (p->type == para_Description) {
 480             prefix = NULL;
 481             prefixextra = NULL;
 482             indentb = conf.listindentbefore;
 483             indenta = conf.listindentafter;
 484         } else {
 485             prefix = NULL;
 486             prefixextra = NULL;
 487             indentb = indenta = 0;
 488         }
 489         if (p->type == para_BiblioCited) {
 490             body = dup_word_list(p->kwtext);
 491             for (wp = body; wp->next; wp = wp->next);
 492             wp->next = &spaceword;
 493             spaceword.next = p->words;
 494             spaceword.alt = NULL;
 495             spaceword.type = word_WhiteSpace;
 496             spaceword.text = NULL;
 497         } else {
 498             wp = NULL;
 499             body = p->words;
 500         }
 501         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 502                   nesting + indentb, indenta,
 503                   conf.width - nesting - indentb - indenta, &conf);
 504         if (wp) {
 505             wp->next = NULL;
 506             free_word_list(body);
 507         }
 508         break;
 509
 510       case para_Code:
 511         info_codepara(&currnode->text, p->words,
 512                       nesting + conf.indent_code,
 513                       conf.width - nesting - 2 * conf.indent_code);
 514         break;
 515     }
 516
 517     /*
 518      * Create an index node if required.
 519      */
 520     if (has_index) {
 521         node *newnode;
 522         int i, j, k;
 523         indexentry *entry;
 524         char *nodename;
 525
 526         nodename = info_node_name_for_text(conf.index_text, &conf);
 527         newnode = info_node_new(nodename, conf.charset);
 528         sfree(nodename);
 529
 530         newnode->up = topnode;
 531
 532         currnode->next = newnode;
 533         newnode->prev = currnode;
 534         currnode->listnext = newnode;
 535
 536         k = info_rdadds(&newnode->text, conf.index_text);
 537         info_rdaddsc(&newnode->text, "\n");
 538         while (k > 0) {
 539             info_rdadds(&newnode->text, conf.underline);
 540             k -= ustrwid(conf.underline, conf.charset);
 541         }
 542         info_rdaddsc(&newnode->text, "\n\n");
 543
 544         info_menu_item(&topnode->text, newnode, NULL, &conf);
 545
 546         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 547             info_idx *ii = (info_idx *)entry->backend_data;
 548
 549             for (j = 0; j < ii->nnodes; j++) {
 550                 /*
 551                  * When we have multiple references for a single
 552                  * index term, we only display the actual term on
 553                  * the first line, to make it clear that the terms
 554                  * really are the same.
 555                  */
 556                 if (j == 0)
 557                     info_rdaddsc(&newnode->text, ii->text);
 558                 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
 559                     info_rdaddc(&newnode->text, ' ');
 560                 info_rdaddsc(&newnode->text, "  *Note ");
 561                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 562                 info_rdaddsc(&newnode->text, "::\n");
 563             }
 564         }
 565     }
 566
 567     /*
 568      * Finalise the text of each node, by adding the ^_ delimiter
 569      * and the node line at the top.
 570      */
 571     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 572         char *origtext = currnode->text.output.text;
 573         currnode->text = empty_info_data;
 574         currnode->text.charset = conf.charset;
 575         info_rdaddsc(&currnode->text, "\037\nFile: ");
 576         info_rdaddsc(&currnode->text, conf.filename);
 577         info_rdaddsc(&currnode->text, ",  Node: ");
 578         info_rdaddsc(&currnode->text, currnode->name);
 579         if (currnode->prev) {
 580             info_rdaddsc(&currnode->text, ",  Prev: ");
 581             info_rdaddsc(&currnode->text, currnode->prev->name);
 582         }
 583         info_rdaddsc(&currnode->text, ",  Up: ");
 584         info_rdaddsc(&currnode->text, (currnode->up ?
 585                                        currnode->up->name : "(dir)"));
 586         if (currnode->next) {
 587             info_rdaddsc(&currnode->text, ",  Next: ");
 588             info_rdaddsc(&currnode->text, currnode->next->name);
 589         }
 590         info_rdaddsc(&currnode->text, "\n\n");
 591         info_rdaddsc(&currnode->text, origtext);
 592         /*
 593          * Just make _absolutely_ sure we end with a newline.
 594          */
 595         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 596             info_rdaddc(&currnode->text, '\n');
 597
 598         sfree(origtext);
 599     }
 600
 601     /*
 602      * Compute the offsets for the tag table.
 603      */
 604     filepos = intro_text.output.pos;
 605     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 606         currnode->pos = filepos;
 607         filepos += currnode->text.output.pos;
 608     }
 609
 610     /*
 611      * Split into sub-files.
 612      */
 613     if (conf.maxfilesize > 0) {
 614         int currfilesize = intro_text.output.pos, currfilenum = 1;
 615         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 616             if (currfilesize > intro_text.output.pos &&
 617                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 618                 currfilenum++;
 619                 currfilesize = intro_text.output.pos;
 620             }
 621             currnode->filenum = currfilenum;
 622             currfilesize += currnode->text.output.pos;
 623         }
 624     }
 625
 626     /*
 627      * Write the primary output file.
 628      */
 629     fp = fopen(conf.filename, "w");
 630     if (!fp) {
 631         error(err_cantopenw, conf.filename);
 632         return;
 633     }
 634     fputs(intro_text.output.text, fp);
 635     if (conf.maxfilesize == 0) {
 636         for (currnode = topnode; currnode; currnode = currnode->listnext)
 637             fputs(currnode->text.output.text, fp);
 638     } else {
 639         int filenum = 0;
 640         fprintf(fp, "\037\nIndirect:\n");
 641         for (currnode = topnode; currnode; currnode = currnode->listnext)
 642             if (filenum != currnode->filenum) {
 643                 filenum = currnode->filenum;
 644                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 645                         currnode->pos);
 646             }
 647     }
 648     fprintf(fp, "\037\nTag Table:\n");
 649     if (conf.maxfilesize > 0)
 650         fprintf(fp, "(Indirect)\n");
 651     for (currnode = topnode; currnode; currnode = currnode->listnext)
 652         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 653     fprintf(fp, "\037\nEnd Tag Table\n");
 654     fclose(fp);
 655
 656     /*
 657      * Write the subfiles.
 658      */
 659     if (conf.maxfilesize > 0) {
 660         int filenum = 0;
 661         fp = NULL;
 662
 663         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 664             if (filenum != currnode->filenum) {
 665                 char *fname;
 666
 667                 filenum = currnode->filenum;
 668
 669                 if (fp)
 670                     fclose(fp);
 671                 fname = snewn(strlen(conf.filename) + 40, char);
 672                 sprintf(fname, "%s-%d", conf.filename, filenum);
 673                 fp = fopen(fname, "w");
 674                 if (!fp) {
 675                     error(err_cantopenw, fname);
 676                     return;
 677                 }
 678                 sfree(fname);
 679                 fputs(intro_text.output.text, fp);
 680             }
 681             fputs(currnode->text.output.text, fp);
 682         }
 683
 684         if (fp)
 685             fclose(fp);
 686     }
 687 }
 688
 689 static int info_check_index(word *w, node *n, indexdata *idx)
 690 {
 691     int ret = 0;
 692
 693     for (; w; w = w->next) {
 694         if (w->type == word_IndexRef) {
 695             indextag *tag;
 696             int i;
 697
 698             tag = index_findtag(idx, w->text);
 699             if (!tag)
 700                 break;
 701
 702             for (i = 0; i < tag->nrefs; i++) {
 703                 indexentry *entry = tag->refs[i];
 704                 info_idx *ii = (info_idx *)entry->backend_data;
 705
 706                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 707                     /*
 708                      * If the same index term is indexed twice
 709                      * within the same section, we only want to
 710                      * mention it once in the index. So do nothing
 711                      * here.
 712                      */
 713                     continue;
 714                 }
 715
 716                 if (ii->nnodes >= ii->nodesize) {
 717                     ii->nodesize += 32;
 718                     ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
 719                 }
 720
 721                 ii->nodes[ii->nnodes++] = n;
 722
 723                 ret = 1;
 724             }
 725         }
 726     }
 727
 728     return ret;
 729 }
 730
 731 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 732 {
 733     word *ret = dup_word_list(words);
 734     word *w;
 735     keyword *kwl;
 736
 737     for (w = ret; w; w = w->next) {
 738         w->private_data = NULL;
 739         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 740             kwl = kw_lookup(keywords, w->text);
 741             if (kwl) {
 742                 if (kwl->para->type == para_NumberedList ||
 743                     kwl->para->type == para_BiblioCited) {
 744                     /*
 745                      * In Info, we do nothing special for xrefs to
 746                      * numbered list items or bibliography entries.
 747                      */
 748                     continue;
 749                 } else {
 750                     /*
 751                      * An xref to a different section has its text
 752                      * completely replaced.
 753                      */
 754                     word *w2, *w3, *w4;
 755                     w2 = w3 = w->next;
 756                     w4 = NULL;
 757                     while (w2) {
 758                         if (w2->type == word_XrefEnd) {
 759                             w4 = w2->next;
 760                             w2->next = NULL;
 761                             break;
 762                         }
 763                         w2 = w2->next;
 764                     }
 765                     free_word_list(w3);
 766
 767                     /*
 768                      * Now w is the UpperXref / LowerXref we
 769                      * started with, and w4 is the next word after
 770                      * the corresponding XrefEnd (if any). The
 771                      * simplest thing is just to stick a pointer to
 772                      * the target node structure in the private
 773                      * data field of the xref word, and let
 774                      * info_rdaddwc and friends read the node name
 775                      * out from there.
 776                      */
 777                     w->next = w4;
 778                     w->private_data = kwl->para->private_data;
 779                     assert(w->private_data);
 780                 }
 781             }
 782         }
 783     }
 784
 785     return ret;
 786 }
 787
 788 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
 789                         infoconfig *cfg) {
 790     int ret = 0;
 791
 792     for (; words && words != end; words = words->next) switch (words->type) {
 793       case word_HyperLink:
 794       case word_HyperEnd:
 795       case word_XrefEnd:
 796       case word_IndexRef:
 797         break;
 798
 799       case word_Normal:
 800       case word_Emph:
 801       case word_Code:
 802       case word_WeakCode:
 803       case word_WhiteSpace:
 804       case word_EmphSpace:
 805       case word_CodeSpace:
 806       case word_WkCodeSpace:
 807       case word_Quote:
 808       case word_EmphQuote:
 809       case word_CodeQuote:
 810       case word_WkCodeQuote:
 811         assert(words->type != word_CodeQuote &&
 812                words->type != word_WkCodeQuote);
 813         if (towordstyle(words->type) == word_Emph &&
 814             (attraux(words->aux) == attr_First ||
 815              attraux(words->aux) == attr_Only))
 816             ret += info_rdadds(id, cfg->startemph);
 817         else if (towordstyle(words->type) == word_Code &&
 818                  (attraux(words->aux) == attr_First ||
 819                   attraux(words->aux) == attr_Only))
 820             ret += info_rdadds(id, cfg->lquote);
 821         if (removeattr(words->type) == word_Normal) {
 822             if (cvt_ok(id->charset, words->text) || !words->alt)
 823                 ret += info_rdadds(id, words->text);
 824             else
 825                 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
 826         } else if (removeattr(words->type) == word_WhiteSpace) {
 827             ret += info_rdadd(id, L' ');
 828         } else if (removeattr(words->type) == word_Quote) {
 829             ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
 830                                cfg->lquote : cfg->rquote);
 831         }
 832         if (towordstyle(words->type) == word_Emph &&
 833             (attraux(words->aux) == attr_Last ||
 834              attraux(words->aux) == attr_Only))
 835             ret += info_rdadds(id, cfg->endemph);
 836         else if (towordstyle(words->type) == word_Code &&
 837                  (attraux(words->aux) == attr_Last ||
 838                   attraux(words->aux) == attr_Only))
 839             ret += info_rdadds(id, cfg->rquote);
 840         break;
 841
 842       case word_UpperXref:
 843       case word_LowerXref:
 844         if (xrefs && words->private_data) {
 845             /*
 846              * This bit is structural and so must be done in char
 847              * rather than wchar_t.
 848              */
 849             ret += info_rdaddsc(id, "*Note ");
 850             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 851             ret += info_rdaddsc(id, "::");
 852         }
 853         break;
 854     }
 855
 856     return ret;
 857 }
 858
 859 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
 860
 861 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
 862     int w = 0;
 863     while (words) {
 864         w += info_width_internal(words, xrefs, cfg);
 865         words = words->next;
 866     }
 867     return w;
 868 }
 869
 870 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
 871     int wid;
 872     int attr;
 873
 874     switch (words->type) {
 875       case word_HyperLink:
 876       case word_HyperEnd:
 877       case word_XrefEnd:
 878       case word_IndexRef:
 879         return 0;
 880
 881       case word_UpperXref:
 882       case word_LowerXref:
 883         if (xrefs && words->private_data) {
 884             /* "*Note " plus "::" comes to 8 characters */
 885             return 8 + strwid(((node *)words->private_data)->name,
 886                               cfg->charset);
 887         } else
 888             return 0;
 889     }
 890
 891     assert(words->type < word_internal_endattrs);
 892
 893     wid = 0;
 894     attr = towordstyle(words->type);
 895
 896     if (attr == word_Emph || attr == word_Code) {
 897         if (attraux(words->aux) == attr_Only ||
 898             attraux(words->aux) == attr_First)
 899             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 900                            cfg->charset);
 901     }
 902     if (attr == word_Emph || attr == word_Code) {
 903         if (attraux(words->aux) == attr_Only ||
 904             attraux(words->aux) == attr_Last)
 905             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 906                            cfg->charset);
 907     }
 908
 909     switch (words->type) {
 910       case word_Normal:
 911       case word_Emph:
 912       case word_Code:
 913       case word_WeakCode:
 914         if (cvt_ok(cfg->charset, words->text) || !words->alt)
 915             wid += ustrwid(words->text, cfg->charset);
 916         else
 917             wid += info_width_internal_list(words->alt, xrefs, cfg);
 918         return wid;
 919
 920       case word_WhiteSpace:
 921       case word_EmphSpace:
 922       case word_CodeSpace:
 923       case word_WkCodeSpace:
 924       case word_Quote:
 925       case word_EmphQuote:
 926       case word_CodeQuote:
 927       case word_WkCodeQuote:
 928         assert(words->type != word_CodeQuote &&
 929                words->type != word_WkCodeQuote);
 930         if (removeattr(words->type) == word_Quote) {
 931             if (quoteaux(words->aux) == quote_Open)
 932                 wid += ustrwid(cfg->lquote, cfg->charset);
 933             else
 934                 wid += ustrwid(cfg->rquote, cfg->charset);
 935         } else
 936             wid++;                     /* space */
 937     }
 938     return wid;
 939 }
 940
 941 static int info_width_noxrefs(void *ctx, word *words)
 942 {
 943     return info_width_internal(words, FALSE, (infoconfig *)ctx);
 944 }
 945 static int info_width_xrefs(void *ctx, word *words)
 946 {
 947     return info_width_internal(words, TRUE, (infoconfig *)ctx);
 948 }
 949
 950 static void info_heading(info_data *text, word *tprefix,
 951                          word *words, int width, infoconfig *cfg) {
 952     int length;
 953     int firstlinewidth, wrapwidth;
 954     wrappedline *wrapping, *p;
 955
 956     length = 0;
 957     if (tprefix) {
 958         length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
 959         length += info_rdadds(text, cfg->sectsuffix);
 960     }
 961
 962     wrapwidth = width;
 963     firstlinewidth = width - length;
 964
 965     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
 966                          info_width_noxrefs, cfg, 0);
 967     for (p = wrapping; p; p = p->next) {
 968         length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
 969         info_rdadd(text, L'\n');
 970         while (length > 0) {
 971             info_rdadds(text, cfg->underline);
 972             length -= ustrwid(cfg->underline, cfg->charset);
 973         }
 974         info_rdadd(text, L'\n');
 975         length = 0;
 976     }
 977     wrap_free(wrapping);
 978     info_rdadd(text, L'\n');
 979 }
 980
 981 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
 982 {
 983     while (indent--) info_rdadd(text, L' ');
 984     while (width > 0) {
 985         info_rdadds(text, cfg->rule);
 986         width -= ustrwid(cfg->rule, cfg->charset);
 987     }
 988     info_rdadd(text, L'\n');
 989     info_rdadd(text, L'\n');
 990 }
 991
 992 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
 993                       word *input, keywordlist *keywords, int indent,
 994                       int extraindent, int width, infoconfig *cfg) {
 995     wrappedline *wrapping, *p;
 996     word *words;
 997     int e;
 998     int i;
 999     int firstlinewidth = width;
1000
1001     words = info_transform_wordlist(input, keywords);
1002
1003     if (prefix) {
1004         for (i = 0; i < indent; i++)
1005             info_rdadd(text, L' ');
1006         e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1007         if (prefixextra)
1008             e += info_rdadds(text, prefixextra);
1009         /* If the prefix is too long, shorten the first line to fit. */
1010         e = extraindent - e;
1011         if (e < 0) {
1012             firstlinewidth += e;       /* this decreases it, since e < 0 */
1013             if (firstlinewidth < 0) {
1014                 e = indent + extraindent;
1015                 firstlinewidth = width;
1016                 info_rdadd(text, L'\n');
1017             } else
1018                 e = 0;
1019         }
1020     } else
1021         e = indent + extraindent;
1022
1023     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1024                          cfg, 0);
1025     for (p = wrapping; p; p = p->next) {
1026         for (i = 0; i < e; i++)
1027             info_rdadd(text, L' ');
1028         info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1029         info_rdadd(text, L'\n');
1030         e = indent + extraindent;
1031     }
1032     wrap_free(wrapping);
1033     info_rdadd(text, L'\n');
1034
1035     free_word_list(words);
1036 }
1037
1038 static void info_codepara(info_data *text, word *words,
1039                           int indent, int width) {
1040     int i;
1041
1042     for (; words; words = words->next) if (words->type == word_WeakCode) {
1043         for (i = 0; i < indent; i++)
1044             info_rdadd(text, L' ');
1045         if (info_rdadds(text, words->text) > width) {
1046             /* FIXME: warn */
1047         }
1048         info_rdadd(text, L'\n');
1049     }
1050
1051     info_rdadd(text, L'\n');
1052 }
1053
1054 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1055     info_rdadd(text, L'[');
1056     info_rdaddwc(text, words, NULL, FALSE, cfg);
1057     info_rdadds(text, L"]\n");
1058 }
1059
1060 static node *info_node_new(char *name, int charset)
1061 {
1062     node *n;
1063
1064     n = snew(node);
1065     n->text = empty_info_data;
1066     n->text.charset = charset;
1067     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1068     n->name = dupstr(name);
1069     n->started_menu = FALSE;
1070
1071     return n;
1072 }
1073
1074 static char *info_node_name_core(info_data *id, filepos *fpos)
1075 {
1076     char *p, *q;
1077
1078     /*
1079      * We cannot have commas, colons or parentheses in a node name.
1080      * Remove any that we find, with a warning.
1081      */
1082     p = q = id->output.text;
1083     while (*p) {
1084         if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1085             error(err_infonodechar, fpos, *p);
1086         } else {
1087             *q++ = *p;
1088         }
1089         p++;
1090     }
1091     *q = '\0';
1092
1093     return id->output.text;
1094 }
1095
1096 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1097 {
1098     info_data id = EMPTY_INFO_DATA;
1099
1100     id.charset = cfg->charset;
1101     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1102                  NULL, FALSE, cfg);
1103     info_rdaddsc(&id, NULL);
1104
1105     return info_node_name_core(&id, &par->fpos);
1106 }
1107
1108 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1109 {
1110     info_data id = EMPTY_INFO_DATA;
1111
1112     id.charset = cfg->charset;
1113     info_rdadds(&id, text);
1114     info_rdaddsc(&id, NULL);
1115
1116     return info_node_name_core(&id, NULL);
1117 }
1118
1119 static void info_menu_item(info_data *text, node *n, paragraph *p,
1120                            infoconfig *cfg)
1121 {
1122     /*
1123      * FIXME: Depending on how we're doing node names in this info
1124      * file, we might want to do
1125      *
1126      *   * Node name:: Chapter title
1127      *
1128      * _or_
1129      *
1130      *   * Chapter number: Node name.
1131      *
1132      * This function mostly works in char rather than wchar_t,
1133      * because a menu item is a structural component.
1134      */
1135     info_rdaddsc(text, "* ");
1136     info_rdaddsc(text, n->name);
1137     info_rdaddsc(text, "::");
1138     if (p) {
1139         info_rdaddc(text, ' ');
1140         info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1141     }
1142     info_rdaddc(text, '\n');
1143 }
1144
1145 /*
1146  * These functions implement my wrapper on the rdadd* calls which
1147  * allows me to switch arbitrarily between literal octet-string
1148  * text and charset-translated Unicode. (Because no matter what
1149  * character set I write the actual text in, I expect info readers
1150  * to treat node names and file names literally and to expect
1151  * keywords like `*Note' in their canonical form, so I have to take
1152  * steps to ensure that those structural elements of the file
1153  * aren't messed with.)
1154  */
1155 static int info_rdadds(info_data *d, wchar_t const *wcs)
1156 {
1157     if (!d->wcmode) {
1158         d->state = charset_init_state;
1159         d->wcmode = TRUE;
1160     }
1161
1162     if (wcs) {
1163         char buf[256];
1164         int len, width, ret;
1165
1166         width = ustrwid(wcs, d->charset);
1167
1168         len = ustrlen(wcs);
1169         while (len > 0) {
1170             int prevlen = len;
1171
1172             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1173                                        d->charset, &d->state, NULL);
1174
1175             assert(len < prevlen);
1176
1177             if (ret > 0) {
1178                 buf[ret] = '\0';
1179                 rdaddsc(&d->output, buf);
1180             }
1181         }
1182
1183         return width;
1184     } else
1185         return 0;
1186 }
1187
1188 static int info_rdaddsc(info_data *d, char const *cs)
1189 {
1190     if (d->wcmode) {
1191         char buf[256];
1192         int ret;
1193
1194         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1195                                    d->charset, &d->state, NULL);
1196         if (ret > 0) {
1197             buf[ret] = '\0';
1198             rdaddsc(&d->output, buf);
1199         }
1200
1201         d->wcmode = FALSE;
1202     }
1203
1204     if (cs) {
1205         rdaddsc(&d->output, cs);
1206         return strwid(cs, d->charset);
1207     } else
1208         return 0;
1209 }
1210
1211 static int info_rdadd(info_data *d, wchar_t wc)
1212 {
1213     wchar_t wcs[2];
1214     wcs[0] = wc;
1215     wcs[1] = L'\0';
1216     return info_rdadds(d, wcs);
1217 }
1218
1219 static int info_rdaddc(info_data *d, char c)
1220 {
1221     char cs[2];
1222     cs[0] = c;
1223     cs[1] = '\0';
1224     return info_rdaddsc(d, cs);
1225 }