mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * info backend for Halibut
   3  *
   4  * Possible future work:
   5  *
   6  *  - configurable choice of how to allocate node names?
   7  *     + possibly a template-like approach, choosing node names to
   8  *       be the full section title or perhaps the internal keyword?
   9  *     + neither of those seems quite right. Perhaps instead a
  10  *       Windows Help-like mechanism, where a magic config
  11  *       directive allows user choice of name for every node.
  12  *     + Only trouble with that is, now what happens to the section
  13  *       numbers? Do they become completely vestigial and just sit
  14  *       in the title text of each node? Or do we keep them in the
  15  *       menus somehow? I think people might occasionally want to
  16  *       go to a section by number, if only because all the _other_
  17  *       formats of the same document will reference the numbers
  18  *       all the time. So our menu lines could look like one of
  19  *       these:
  20  *        * Nodename: Section 1.2. Title of section.
  21  *        * Section 1.2: Nodename. Title of section.
  22  *
  23  *  - might be helpful to diagnose duplicate node names!
  24  */
  25
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <assert.h>
  29 #include "halibut.h"
  30
  31 typedef struct {
  32     char *filename;
  33     int maxfilesize;
  34     int charset;
  35     int listindentbefore, listindentafter;
  36     int indent_code, width, index_width;
  37     wchar_t *bullet, *listsuffix;
  38     wchar_t *startemph, *endemph;
  39     wchar_t *lquote, *rquote;
  40     wchar_t *sectsuffix, *underline;
  41     wchar_t *rule;
  42     wchar_t *index_text;
  43 } infoconfig;
  44
  45 typedef struct {
  46     rdstringc output;
  47     int charset;
  48     charset_state state;
  49     int wcmode;
  50 } info_data;
  51 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  52 static const info_data empty_info_data = EMPTY_INFO_DATA;
  53
  54 typedef struct node_tag node;
  55 struct node_tag {
  56     node *listnext;
  57     node *up, *prev, *next, *lastchild;
  58     int pos, started_menu, filenum;
  59     char *name;
  60     info_data text;
  61 };
  62
  63 typedef struct {
  64     char *text;
  65     int length;
  66     int nnodes, nodesize;
  67     node **nodes;
  68 } info_idx;
  69
  70 static int info_rdadd(info_data *, wchar_t);
  71 static int info_rdadds(info_data *, wchar_t const *);
  72 static int info_rdaddc(info_data *, char);
  73 static int info_rdaddsc(info_data *, char const *);
  74
  75 static void info_heading(info_data *, word *, word *, int, infoconfig *);
  76 static void info_rule(info_data *, int, int, infoconfig *);
  77 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
  78                       int, int, int, infoconfig *);
  79 static void info_codepara(info_data *, word *, int, int);
  80 static void info_versionid(info_data *, word *, infoconfig *);
  81 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
  82 static word *info_transform_wordlist(word *, keywordlist *);
  83 static int info_check_index(word *, node *, indexdata *);
  84
  85 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
  86
  87 static node *info_node_new(char *name, int charset);
  88 static char *info_node_name_for_para(paragraph *p, infoconfig *);
  89 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
  90
  91 static infoconfig info_configure(paragraph *source) {
  92     infoconfig ret;
  93     paragraph *p;
  94
  95     /*
  96      * Defaults.
  97      */
  98     ret.filename = dupstr("output.info");
  99     ret.maxfilesize = 64 << 10;
 100     ret.charset = CS_ASCII;
 101     ret.width = 70;
 102     ret.listindentbefore = 1;
 103     ret.listindentafter = 3;
 104     ret.indent_code = 2;
 105     ret.index_width = 40;
 106     ret.listsuffix = L".";
 107     ret.bullet = L"\x2022\0-\0\0";
 108     ret.rule = L"\x2500\0-\0\0";
 109     ret.startemph = L"_\0_\0\0";
 110     ret.endemph = uadv(ret.startemph);
 111     ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
 112     ret.rquote = uadv(ret.lquote);
 113     ret.sectsuffix = L": ";
 114     ret.underline = L"\x203E\0-\0\0";
 115     ret.index_text = L"Index";
 116
 117     /*
 118      * Two-pass configuration so that we can pick up global config
 119      * (e.g. `quotes') before having it overridden by specific
 120      * config (`info-quotes'), irrespective of the order in which
 121      * they occur.
 122      */
 123     for (p = source; p; p = p->next) {
 124         if (p->type == para_Config) {
 125             if (!ustricmp(p->keyword, L"quotes")) {
 126                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 127                     ret.lquote = uadv(p->keyword);
 128                     ret.rquote = uadv(ret.lquote);
 129                 }
 130             } else if (!ustricmp(p->keyword, L"index")) {
 131                 ret.index_text = uadv(p->keyword);
 132             }
 133         }
 134     }
 135
 136     for (p = source; p; p = p->next) {
 137         if (p->type == para_Config) {
 138             if (!ustricmp(p->keyword, L"info-filename")) {
 139                 sfree(ret.filename);
 140                 ret.filename = dupstr(adv(p->origkeyword));
 141             } else if (!ustricmp(p->keyword, L"info-charset")) {
 142                 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
 143             } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
 144                 ret.maxfilesize = utoi(uadv(p->keyword));
 145             } else if (!ustricmp(p->keyword, L"info-width")) {
 146                 ret.width = utoi(uadv(p->keyword));
 147             } else if (!ustricmp(p->keyword, L"info-indent-code")) {
 148                 ret.indent_code = utoi(uadv(p->keyword));
 149             } else if (!ustricmp(p->keyword, L"info-index-width")) {
 150                 ret.index_width = utoi(uadv(p->keyword));
 151             } else if (!ustricmp(p->keyword, L"info-list-indent")) {
 152                 ret.listindentbefore = utoi(uadv(p->keyword));
 153             } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
 154                 ret.listindentafter = utoi(uadv(p->keyword));
 155             } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
 156                 ret.sectsuffix = uadv(p->keyword);
 157             } else if (!ustricmp(p->keyword, L"info-underline")) {
 158                 ret.underline = uadv(p->keyword);
 159             } else if (!ustricmp(p->keyword, L"info-bullet")) {
 160                 ret.bullet = uadv(p->keyword);
 161             } else if (!ustricmp(p->keyword, L"info-rule")) {
 162                 ret.rule = uadv(p->keyword);
 163             } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
 164                 ret.listsuffix = uadv(p->keyword);
 165             } else if (!ustricmp(p->keyword, L"info-emphasis")) {
 166                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 167                     ret.startemph = uadv(p->keyword);
 168                     ret.endemph = uadv(ret.startemph);
 169                 }
 170             } else if (!ustricmp(p->keyword, L"info-quotes")) {
 171                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 172                     ret.lquote = uadv(p->keyword);
 173                     ret.rquote = uadv(ret.lquote);
 174                 }
 175             }
 176         }
 177     }
 178
 179     /*
 180      * Now process fallbacks on quote characters, underlines, the
 181      * rule character, the emphasis characters, and bullets.
 182      */
 183     while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
 184            (!cvt_ok(ret.charset, ret.lquote) ||
 185             !cvt_ok(ret.charset, ret.rquote))) {
 186         ret.lquote = uadv(ret.rquote);
 187         ret.rquote = uadv(ret.lquote);
 188     }
 189
 190     while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
 191            (!cvt_ok(ret.charset, ret.startemph) ||
 192             !cvt_ok(ret.charset, ret.endemph))) {
 193         ret.startemph = uadv(ret.endemph);
 194         ret.endemph = uadv(ret.startemph);
 195     }
 196
 197     while (*ret.underline && *uadv(ret.underline) &&
 198            !cvt_ok(ret.charset, ret.underline))
 199         ret.underline = uadv(ret.underline);
 200
 201     while (*ret.bullet && *uadv(ret.bullet) &&
 202            !cvt_ok(ret.charset, ret.bullet))
 203         ret.bullet = uadv(ret.bullet);
 204
 205     while (*ret.rule && *uadv(ret.rule) &&
 206            !cvt_ok(ret.charset, ret.rule))
 207         ret.rule = uadv(ret.rule);
 208
 209     return ret;
 210 }
 211
 212 paragraph *info_config_filename(char *filename)
 213 {
 214     return cmdline_cfg_simple("info-filename", filename, NULL);
 215 }
 216
 217 void info_backend(paragraph *sourceform, keywordlist *keywords,
 218                   indexdata *idx, void *unused) {
 219     paragraph *p;
 220     infoconfig conf;
 221     word *prefix, *body, *wp;
 222     word spaceword;
 223     wchar_t *prefixextra;
 224     int nesting, nestindent;
 225     int indentb, indenta;
 226     int filepos;
 227     int has_index;
 228     info_data intro_text = EMPTY_INFO_DATA;
 229     node *topnode, *currnode;
 230     word bullet;
 231     FILE *fp;
 232
 233     IGNORE(unused);
 234
 235     conf = info_configure(sourceform);
 236
 237     /*
 238      * Go through and create a node for each section.
 239      */
 240     topnode = info_node_new("Top", conf.charset);
 241     currnode = topnode;
 242     for (p = sourceform; p; p = p->next) switch (p->type) {
 243         /*
 244          * Chapter titles.
 245          */
 246       case para_Chapter:
 247       case para_Appendix:
 248       case para_UnnumberedChapter:
 249       case para_Heading:
 250       case para_Subsect:
 251         {
 252             node *newnode, *upnode;
 253             char *nodename;
 254
 255             nodename = info_node_name_for_para(p, &conf);
 256             newnode = info_node_new(nodename, conf.charset);
 257             sfree(nodename);
 258
 259             p->private_data = newnode;
 260
 261             if (p->parent)
 262                 upnode = (node *)p->parent->private_data;
 263             else
 264                 upnode = topnode;
 265             assert(upnode);
 266             newnode->up = upnode;
 267
 268             currnode->next = newnode;
 269             newnode->prev = currnode;
 270
 271             currnode->listnext = newnode;
 272             currnode = newnode;
 273         }
 274         break;
 275       default:
 276         p->private_data = NULL;
 277         break;
 278     }
 279
 280     /*
 281      * Set up the display form of each index entry.
 282      */
 283     {
 284         int i;
 285         indexentry *entry;
 286
 287         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 288             info_idx *ii = snew(info_idx);
 289             info_data id = EMPTY_INFO_DATA;
 290
 291             id.charset = conf.charset;
 292
 293             ii->nnodes = ii->nodesize = 0;
 294             ii->nodes = NULL;
 295
 296             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
 297
 298             ii->text = id.output.text;
 299
 300             entry->backend_data = ii;
 301         }
 302     }
 303
 304     /*
 305      * An Info file begins with a piece of introductory text which
 306      * is apparently never shown anywhere. This seems to me to be a
 307      * good place to put the copyright notice and the version IDs.
 308      * Also, Info directory entries are expected to go here.
 309      */
 310     intro_text.charset = conf.charset;
 311
 312     info_rdaddsc(&intro_text,
 313             "This Info file generated by Halibut, ");
 314     info_rdaddsc(&intro_text, version);
 315     info_rdaddsc(&intro_text, "\n\n");
 316
 317     for (p = sourceform; p; p = p->next)
 318         if (p->type == para_Config &&
 319             !ustricmp(p->keyword, L"info-dir-entry")) {
 320             wchar_t *section, *shortname, *longname, *kw;
 321             char *s;
 322
 323             section = uadv(p->keyword);
 324             shortname = *section ? uadv(section) : L"";
 325             longname = *shortname ? uadv(shortname) : L"";
 326             kw = *longname ? uadv(longname) : L"";
 327
 328             if (!*longname) {
 329                 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
 330                 continue;
 331             }
 332
 333             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 334             info_rdadds(&intro_text, section);
 335             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 336             info_rdadds(&intro_text, shortname);
 337             info_rdaddsc(&intro_text, ": (");
 338             s = dupstr(conf.filename);
 339             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 340                 s[strlen(s)-5] = '\0';
 341             info_rdaddsc(&intro_text, s);
 342             sfree(s);
 343             info_rdaddsc(&intro_text, ")");
 344             if (*kw) {
 345                 keyword *kwl = kw_lookup(keywords, kw);
 346                 if (kwl && kwl->para->private_data) {
 347                     node *n = (node *)kwl->para->private_data;
 348                     info_rdaddsc(&intro_text, n->name);
 349                 }
 350             }
 351             info_rdaddsc(&intro_text, ".   ");
 352             info_rdadds(&intro_text, longname);
 353             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 354         }
 355
 356     for (p = sourceform; p; p = p->next)
 357         if (p->type == para_Copyright)
 358             info_para(&intro_text, NULL, NULL, p->words, keywords,
 359                       0, 0, conf.width, &conf);
 360
 361     for (p = sourceform; p; p = p->next)
 362         if (p->type == para_VersionID)
 363             info_versionid(&intro_text, p->words, &conf);
 364
 365     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 366         info_rdaddc(&intro_text, '\n');
 367
 368     /* Do the title */
 369     for (p = sourceform; p; p = p->next)
 370         if (p->type == para_Title)
 371             info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
 372
 373     nestindent = conf.listindentbefore + conf.listindentafter;
 374     nesting = 0;
 375
 376     currnode = topnode;
 377
 378     /* Do the main document */
 379     for (p = sourceform; p; p = p->next) switch (p->type) {
 380
 381       case para_QuotePush:
 382         nesting += 2;
 383         break;
 384       case para_QuotePop:
 385         nesting -= 2;
 386         assert(nesting >= 0);
 387         break;
 388
 389       case para_LcontPush:
 390         nesting += nestindent;
 391         break;
 392       case para_LcontPop:
 393         nesting -= nestindent;
 394         assert(nesting >= 0);
 395         break;
 396
 397         /*
 398          * Things we ignore because we've already processed them or
 399          * aren't going to touch them in this pass.
 400          */
 401       case para_IM:
 402       case para_BR:
 403       case para_Biblio:                /* only touch BiblioCited */
 404       case para_VersionID:
 405       case para_NoCite:
 406       case para_Title:
 407         break;
 408
 409         /*
 410          * Chapter titles.
 411          */
 412       case para_Chapter:
 413       case para_Appendix:
 414       case para_UnnumberedChapter:
 415       case para_Heading:
 416       case para_Subsect:
 417         currnode = p->private_data;
 418         assert(currnode);
 419         assert(currnode->up);
 420
 421         if (!currnode->up->started_menu) {
 422             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 423             currnode->up->started_menu = TRUE;
 424         }
 425         info_menu_item(&currnode->up->text, currnode, p, &conf);
 426
 427         has_index |= info_check_index(p->words, currnode, idx);
 428         info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
 429         nesting = 0;
 430         break;
 431
 432       case para_Rule:
 433         info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
 434         break;
 435
 436       case para_Normal:
 437       case para_Copyright:
 438       case para_DescribedThing:
 439       case para_Description:
 440       case para_BiblioCited:
 441       case para_Bullet:
 442       case para_NumberedList:
 443         has_index |= info_check_index(p->words, currnode, idx);
 444         if (p->type == para_Bullet) {
 445             bullet.next = NULL;
 446             bullet.alt = NULL;
 447             bullet.type = word_Normal;
 448             bullet.text = conf.bullet;
 449             prefix = &bullet;
 450             prefixextra = NULL;
 451             indentb = conf.listindentbefore;
 452             indenta = conf.listindentafter;
 453         } else if (p->type == para_NumberedList) {
 454             prefix = p->kwtext;
 455             prefixextra = conf.listsuffix;
 456             indentb = conf.listindentbefore;
 457             indenta = conf.listindentafter;
 458         } else if (p->type == para_Description) {
 459             prefix = NULL;
 460             prefixextra = NULL;
 461             indentb = conf.listindentbefore;
 462             indenta = conf.listindentafter;
 463         } else {
 464             prefix = NULL;
 465             prefixextra = NULL;
 466             indentb = indenta = 0;
 467         }
 468         if (p->type == para_BiblioCited) {
 469             body = dup_word_list(p->kwtext);
 470             for (wp = body; wp->next; wp = wp->next);
 471             wp->next = &spaceword;
 472             spaceword.next = p->words;
 473             spaceword.alt = NULL;
 474             spaceword.type = word_WhiteSpace;
 475             spaceword.text = NULL;
 476         } else {
 477             wp = NULL;
 478             body = p->words;
 479         }
 480         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 481                   nesting + indentb, indenta,
 482                   conf.width - nesting - indentb - indenta, &conf);
 483         if (wp) {
 484             wp->next = NULL;
 485             free_word_list(body);
 486         }
 487         break;
 488
 489       case para_Code:
 490         info_codepara(&currnode->text, p->words,
 491                       nesting + conf.indent_code,
 492                       conf.width - nesting - 2 * conf.indent_code);
 493         break;
 494     }
 495
 496     /*
 497      * Create an index node if required.
 498      */
 499     if (has_index) {
 500         node *newnode;
 501         int i, j, k;
 502         indexentry *entry;
 503         char *nodename;
 504
 505         nodename = info_node_name_for_text(conf.index_text, &conf);
 506         newnode = info_node_new(nodename, conf.charset);
 507         sfree(nodename);
 508
 509         newnode->up = topnode;
 510
 511         currnode->next = newnode;
 512         newnode->prev = currnode;
 513         currnode->listnext = newnode;
 514
 515         k = info_rdadds(&newnode->text, conf.index_text);
 516         info_rdaddsc(&newnode->text, "\n");
 517         while (k > 0) {
 518             info_rdadds(&newnode->text, conf.underline);
 519             k -= ustrwid(conf.underline, conf.charset);
 520         }
 521         info_rdaddsc(&newnode->text, "\n\n");
 522
 523         info_menu_item(&topnode->text, newnode, NULL, &conf);
 524
 525         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 526             info_idx *ii = (info_idx *)entry->backend_data;
 527
 528             for (j = 0; j < ii->nnodes; j++) {
 529                 /*
 530                  * When we have multiple references for a single
 531                  * index term, we only display the actual term on
 532                  * the first line, to make it clear that the terms
 533                  * really are the same.
 534                  */
 535                 if (j == 0)
 536                     info_rdaddsc(&newnode->text, ii->text);
 537                 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
 538                     info_rdaddc(&newnode->text, ' ');
 539                 info_rdaddsc(&newnode->text, "  *Note ");
 540                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 541                 info_rdaddsc(&newnode->text, "::\n");
 542             }
 543         }
 544     }
 545
 546     /*
 547      * Finalise the text of each node, by adding the ^_ delimiter
 548      * and the node line at the top.
 549      */
 550     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 551         char *origtext = currnode->text.output.text;
 552         currnode->text = empty_info_data;
 553         currnode->text.charset = conf.charset;
 554         info_rdaddsc(&currnode->text, "\037\nFile: ");
 555         info_rdaddsc(&currnode->text, conf.filename);
 556         info_rdaddsc(&currnode->text, ",  Node: ");
 557         info_rdaddsc(&currnode->text, currnode->name);
 558         if (currnode->prev) {
 559             info_rdaddsc(&currnode->text, ",  Prev: ");
 560             info_rdaddsc(&currnode->text, currnode->prev->name);
 561         }
 562         info_rdaddsc(&currnode->text, ",  Up: ");
 563         info_rdaddsc(&currnode->text, (currnode->up ?
 564                                        currnode->up->name : "(dir)"));
 565         if (currnode->next) {
 566             info_rdaddsc(&currnode->text, ",  Next: ");
 567             info_rdaddsc(&currnode->text, currnode->next->name);
 568         }
 569         info_rdaddsc(&currnode->text, "\n\n");
 570         info_rdaddsc(&currnode->text, origtext);
 571         /*
 572          * Just make _absolutely_ sure we end with a newline.
 573          */
 574         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 575             info_rdaddc(&currnode->text, '\n');
 576
 577         sfree(origtext);
 578     }
 579
 580     /*
 581      * Compute the offsets for the tag table.
 582      */
 583     filepos = intro_text.output.pos;
 584     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 585         currnode->pos = filepos;
 586         filepos += currnode->text.output.pos;
 587     }
 588
 589     /*
 590      * Split into sub-files.
 591      */
 592     if (conf.maxfilesize > 0) {
 593         int currfilesize = intro_text.output.pos, currfilenum = 1;
 594         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 595             if (currfilesize > intro_text.output.pos &&
 596                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 597                 currfilenum++;
 598                 currfilesize = intro_text.output.pos;
 599             }
 600             currnode->filenum = currfilenum;
 601             currfilesize += currnode->text.output.pos;
 602         }
 603     }
 604
 605     /*
 606      * Write the primary output file.
 607      */
 608     fp = fopen(conf.filename, "w");
 609     if (!fp) {
 610         error(err_cantopenw, conf.filename);
 611         return;
 612     }
 613     fputs(intro_text.output.text, fp);
 614     if (conf.maxfilesize == 0) {
 615         for (currnode = topnode; currnode; currnode = currnode->listnext)
 616             fputs(currnode->text.output.text, fp);
 617     } else {
 618         int filenum = 0;
 619         fprintf(fp, "\037\nIndirect:\n");
 620         for (currnode = topnode; currnode; currnode = currnode->listnext)
 621             if (filenum != currnode->filenum) {
 622                 filenum = currnode->filenum;
 623                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 624                         currnode->pos);
 625             }
 626     }
 627     fprintf(fp, "\037\nTag Table:\n");
 628     if (conf.maxfilesize > 0)
 629         fprintf(fp, "(Indirect)\n");
 630     for (currnode = topnode; currnode; currnode = currnode->listnext)
 631         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 632     fprintf(fp, "\037\nEnd Tag Table\n");
 633     fclose(fp);
 634
 635     /*
 636      * Write the subfiles.
 637      */
 638     if (conf.maxfilesize > 0) {
 639         int filenum = 0;
 640         fp = NULL;
 641
 642         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 643             if (filenum != currnode->filenum) {
 644                 char *fname;
 645
 646                 filenum = currnode->filenum;
 647
 648                 if (fp)
 649                     fclose(fp);
 650                 fname = snewn(strlen(conf.filename) + 40, char);
 651                 sprintf(fname, "%s-%d", conf.filename, filenum);
 652                 fp = fopen(fname, "w");
 653                 if (!fp) {
 654                     error(err_cantopenw, fname);
 655                     return;
 656                 }
 657                 sfree(fname);
 658                 fputs(intro_text.output.text, fp);
 659             }
 660             fputs(currnode->text.output.text, fp);
 661         }
 662
 663         if (fp)
 664             fclose(fp);
 665     }
 666 }
 667
 668 static int info_check_index(word *w, node *n, indexdata *idx)
 669 {
 670     int ret = 0;
 671
 672     for (; w; w = w->next) {
 673         if (w->type == word_IndexRef) {
 674             indextag *tag;
 675             int i;
 676
 677             tag = index_findtag(idx, w->text);
 678             if (!tag)
 679                 break;
 680
 681             for (i = 0; i < tag->nrefs; i++) {
 682                 indexentry *entry = tag->refs[i];
 683                 info_idx *ii = (info_idx *)entry->backend_data;
 684
 685                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 686                     /*
 687                      * If the same index term is indexed twice
 688                      * within the same section, we only want to
 689                      * mention it once in the index. So do nothing
 690                      * here.
 691                      */
 692                     continue;
 693                 }
 694
 695                 if (ii->nnodes >= ii->nodesize) {
 696                     ii->nodesize += 32;
 697                     ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
 698                 }
 699
 700                 ii->nodes[ii->nnodes++] = n;
 701
 702                 ret = 1;
 703             }
 704         }
 705     }
 706
 707     return ret;
 708 }
 709
 710 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 711 {
 712     word *ret = dup_word_list(words);
 713     word *w;
 714     keyword *kwl;
 715
 716     for (w = ret; w; w = w->next) {
 717         w->private_data = NULL;
 718         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 719             kwl = kw_lookup(keywords, w->text);
 720             if (kwl) {
 721                 if (kwl->para->type == para_NumberedList ||
 722                     kwl->para->type == para_BiblioCited) {
 723                     /*
 724                      * In Info, we do nothing special for xrefs to
 725                      * numbered list items or bibliography entries.
 726                      */
 727                     continue;
 728                 } else {
 729                     /*
 730                      * An xref to a different section has its text
 731                      * completely replaced.
 732                      */
 733                     word *w2, *w3, *w4;
 734                     w2 = w3 = w->next;
 735                     w4 = NULL;
 736                     while (w2) {
 737                         if (w2->type == word_XrefEnd) {
 738                             w4 = w2->next;
 739                             w2->next = NULL;
 740                             break;
 741                         }
 742                         w2 = w2->next;
 743                     }
 744                     free_word_list(w3);
 745
 746                     /*
 747                      * Now w is the UpperXref / LowerXref we
 748                      * started with, and w4 is the next word after
 749                      * the corresponding XrefEnd (if any). The
 750                      * simplest thing is just to stick a pointer to
 751                      * the target node structure in the private
 752                      * data field of the xref word, and let
 753                      * info_rdaddwc and friends read the node name
 754                      * out from there.
 755                      */
 756                     w->next = w4;
 757                     w->private_data = kwl->para->private_data;
 758                     assert(w->private_data);
 759                 }
 760             }
 761         }
 762     }
 763
 764     return ret;
 765 }
 766
 767 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
 768                         infoconfig *cfg) {
 769     int ret = 0;
 770
 771     for (; words && words != end; words = words->next) switch (words->type) {
 772       case word_HyperLink:
 773       case word_HyperEnd:
 774       case word_XrefEnd:
 775       case word_IndexRef:
 776         break;
 777
 778       case word_Normal:
 779       case word_Emph:
 780       case word_Code:
 781       case word_WeakCode:
 782       case word_WhiteSpace:
 783       case word_EmphSpace:
 784       case word_CodeSpace:
 785       case word_WkCodeSpace:
 786       case word_Quote:
 787       case word_EmphQuote:
 788       case word_CodeQuote:
 789       case word_WkCodeQuote:
 790         assert(words->type != word_CodeQuote &&
 791                words->type != word_WkCodeQuote);
 792         if (towordstyle(words->type) == word_Emph &&
 793             (attraux(words->aux) == attr_First ||
 794              attraux(words->aux) == attr_Only))
 795             ret += info_rdadds(id, cfg->startemph);
 796         else if (towordstyle(words->type) == word_Code &&
 797                  (attraux(words->aux) == attr_First ||
 798                   attraux(words->aux) == attr_Only))
 799             ret += info_rdadds(id, cfg->lquote);
 800         if (removeattr(words->type) == word_Normal) {
 801             if (cvt_ok(id->charset, words->text) || !words->alt)
 802                 ret += info_rdadds(id, words->text);
 803             else
 804                 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
 805         } else if (removeattr(words->type) == word_WhiteSpace) {
 806             ret += info_rdadd(id, L' ');
 807         } else if (removeattr(words->type) == word_Quote) {
 808             ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
 809                                cfg->lquote : cfg->rquote);
 810         }
 811         if (towordstyle(words->type) == word_Emph &&
 812             (attraux(words->aux) == attr_Last ||
 813              attraux(words->aux) == attr_Only))
 814             ret += info_rdadds(id, cfg->endemph);
 815         else if (towordstyle(words->type) == word_Code &&
 816                  (attraux(words->aux) == attr_Last ||
 817                   attraux(words->aux) == attr_Only))
 818             ret += info_rdadds(id, cfg->rquote);
 819         break;
 820
 821       case word_UpperXref:
 822       case word_LowerXref:
 823         if (xrefs && words->private_data) {
 824             /*
 825              * This bit is structural and so must be done in char
 826              * rather than wchar_t.
 827              */
 828             ret += info_rdaddsc(id, "*Note ");
 829             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 830             ret += info_rdaddsc(id, "::");
 831         }
 832         break;
 833     }
 834
 835     return ret;
 836 }
 837
 838 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
 839
 840 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
 841     int w = 0;
 842     while (words) {
 843         w += info_width_internal(words, xrefs, cfg);
 844         words = words->next;
 845     }
 846     return w;
 847 }
 848
 849 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
 850     int wid;
 851     int attr;
 852
 853     switch (words->type) {
 854       case word_HyperLink:
 855       case word_HyperEnd:
 856       case word_XrefEnd:
 857       case word_IndexRef:
 858         return 0;
 859
 860       case word_UpperXref:
 861       case word_LowerXref:
 862         if (xrefs && words->private_data) {
 863             /* "*Note " plus "::" comes to 8 characters */
 864             return 8 + strwid(((node *)words->private_data)->name,
 865                               cfg->charset);
 866         } else
 867             return 0;
 868     }
 869
 870     assert(words->type < word_internal_endattrs);
 871
 872     wid = 0;
 873     attr = towordstyle(words->type);
 874
 875     if (attr == word_Emph || attr == word_Code) {
 876         if (attraux(words->aux) == attr_Only ||
 877             attraux(words->aux) == attr_First)
 878             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 879                            cfg->charset);
 880     }
 881     if (attr == word_Emph || attr == word_Code) {
 882         if (attraux(words->aux) == attr_Only ||
 883             attraux(words->aux) == attr_Last)
 884             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 885                            cfg->charset);
 886     }
 887
 888     switch (words->type) {
 889       case word_Normal:
 890       case word_Emph:
 891       case word_Code:
 892       case word_WeakCode:
 893         if (cvt_ok(cfg->charset, words->text) || !words->alt)
 894             wid += ustrwid(words->text, cfg->charset);
 895         else
 896             wid += info_width_internal_list(words->alt, xrefs, cfg);
 897         return wid;
 898
 899       case word_WhiteSpace:
 900       case word_EmphSpace:
 901       case word_CodeSpace:
 902       case word_WkCodeSpace:
 903       case word_Quote:
 904       case word_EmphQuote:
 905       case word_CodeQuote:
 906       case word_WkCodeQuote:
 907         assert(words->type != word_CodeQuote &&
 908                words->type != word_WkCodeQuote);
 909         if (removeattr(words->type) == word_Quote) {
 910             if (quoteaux(words->aux) == quote_Open)
 911                 wid += ustrwid(cfg->lquote, cfg->charset);
 912             else
 913                 wid += ustrwid(cfg->rquote, cfg->charset);
 914         } else
 915             wid++;                     /* space */
 916     }
 917     return wid;
 918 }
 919
 920 static int info_width_noxrefs(void *ctx, word *words)
 921 {
 922     return info_width_internal(words, FALSE, (infoconfig *)ctx);
 923 }
 924 static int info_width_xrefs(void *ctx, word *words)
 925 {
 926     return info_width_internal(words, TRUE, (infoconfig *)ctx);
 927 }
 928
 929 static void info_heading(info_data *text, word *tprefix,
 930                          word *words, int width, infoconfig *cfg) {
 931     int length;
 932     int firstlinewidth, wrapwidth;
 933     wrappedline *wrapping, *p;
 934
 935     length = 0;
 936     if (tprefix) {
 937         length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
 938         length += info_rdadds(text, cfg->sectsuffix);
 939     }
 940
 941     wrapwidth = width;
 942     firstlinewidth = width - length;
 943
 944     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
 945                          info_width_noxrefs, cfg, 0);
 946     for (p = wrapping; p; p = p->next) {
 947         length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
 948         info_rdadd(text, L'\n');
 949         while (length > 0) {
 950             info_rdadds(text, cfg->underline);
 951             length -= ustrwid(cfg->underline, cfg->charset);
 952         }
 953         info_rdadd(text, L'\n');
 954         length = 0;
 955     }
 956     wrap_free(wrapping);
 957     info_rdadd(text, L'\n');
 958 }
 959
 960 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
 961 {
 962     while (indent--) info_rdadd(text, L' ');
 963     while (width > 0) {
 964         info_rdadds(text, cfg->rule);
 965         width -= ustrwid(cfg->rule, cfg->charset);
 966     }
 967     info_rdadd(text, L'\n');
 968     info_rdadd(text, L'\n');
 969 }
 970
 971 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
 972                       word *input, keywordlist *keywords, int indent,
 973                       int extraindent, int width, infoconfig *cfg) {
 974     wrappedline *wrapping, *p;
 975     word *words;
 976     int e;
 977     int i;
 978     int firstlinewidth = width;
 979
 980     words = info_transform_wordlist(input, keywords);
 981
 982     if (prefix) {
 983         for (i = 0; i < indent; i++)
 984             info_rdadd(text, L' ');
 985         e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
 986         if (prefixextra)
 987             e += info_rdadds(text, prefixextra);
 988         /* If the prefix is too long, shorten the first line to fit. */
 989         e = extraindent - e;
 990         if (e < 0) {
 991             firstlinewidth += e;       /* this decreases it, since e < 0 */
 992             if (firstlinewidth < 0) {
 993                 e = indent + extraindent;
 994                 firstlinewidth = width;
 995                 info_rdadd(text, L'\n');
 996             } else
 997                 e = 0;
 998         }
 999     } else
1000         e = indent + extraindent;
1001
1002     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1003                          cfg, 0);
1004     for (p = wrapping; p; p = p->next) {
1005         for (i = 0; i < e; i++)
1006             info_rdadd(text, L' ');
1007         info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1008         info_rdadd(text, L'\n');
1009         e = indent + extraindent;
1010     }
1011     wrap_free(wrapping);
1012     info_rdadd(text, L'\n');
1013
1014     free_word_list(words);
1015 }
1016
1017 static void info_codepara(info_data *text, word *words,
1018                           int indent, int width) {
1019     int i;
1020
1021     for (; words; words = words->next) if (words->type == word_WeakCode) {
1022         for (i = 0; i < indent; i++)
1023             info_rdadd(text, L' ');
1024         if (info_rdadds(text, words->text) > width) {
1025             /* FIXME: warn */
1026         }
1027         info_rdadd(text, L'\n');
1028     }
1029
1030     info_rdadd(text, L'\n');
1031 }
1032
1033 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1034     info_rdadd(text, L'[');
1035     info_rdaddwc(text, words, NULL, FALSE, cfg);
1036     info_rdadds(text, L"]\n");
1037 }
1038
1039 static node *info_node_new(char *name, int charset)
1040 {
1041     node *n;
1042
1043     n = snew(node);
1044     n->text = empty_info_data;
1045     n->text.charset = charset;
1046     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1047     n->name = dupstr(name);
1048     n->started_menu = FALSE;
1049
1050     return n;
1051 }
1052
1053 static char *info_node_name_core(info_data *id, filepos *fpos)
1054 {
1055     char *p, *q;
1056
1057     /*
1058      * We cannot have commas, colons or parentheses in a node name.
1059      * Remove any that we find, with a warning.
1060      */
1061     p = q = id->output.text;
1062     while (*p) {
1063         if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1064             error(err_infonodechar, fpos, *p);
1065         } else {
1066             *q++ = *p;
1067         }
1068         p++;
1069     }
1070     *q = '\0';
1071
1072     return id->output.text;
1073 }
1074
1075 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1076 {
1077     info_data id = EMPTY_INFO_DATA;
1078
1079     id.charset = cfg->charset;
1080     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1081                  NULL, FALSE, cfg);
1082     info_rdaddsc(&id, NULL);
1083
1084     return info_node_name_core(&id, &par->fpos);
1085 }
1086
1087 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1088 {
1089     info_data id = EMPTY_INFO_DATA;
1090
1091     id.charset = cfg->charset;
1092     info_rdadds(&id, text);
1093     info_rdaddsc(&id, NULL);
1094
1095     return info_node_name_core(&id, NULL);
1096 }
1097
1098 static void info_menu_item(info_data *text, node *n, paragraph *p,
1099                            infoconfig *cfg)
1100 {
1101     /*
1102      * FIXME: Depending on how we're doing node names in this info
1103      * file, we might want to do
1104      *
1105      *   * Node name:: Chapter title
1106      *
1107      * _or_
1108      *
1109      *   * Chapter number: Node name.
1110      *
1111      * This function mostly works in char rather than wchar_t,
1112      * because a menu item is a structural component.
1113      */
1114     info_rdaddsc(text, "* ");
1115     info_rdaddsc(text, n->name);
1116     info_rdaddsc(text, "::");
1117     if (p) {
1118         info_rdaddc(text, ' ');
1119         info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1120     }
1121     info_rdaddc(text, '\n');
1122 }
1123
1124 /*
1125  * These functions implement my wrapper on the rdadd* calls which
1126  * allows me to switch arbitrarily between literal octet-string
1127  * text and charset-translated Unicode. (Because no matter what
1128  * character set I write the actual text in, I expect info readers
1129  * to treat node names and file names literally and to expect
1130  * keywords like `*Note' in their canonical form, so I have to take
1131  * steps to ensure that those structural elements of the file
1132  * aren't messed with.)
1133  */
1134 static int info_rdadds(info_data *d, wchar_t const *wcs)
1135 {
1136     if (!d->wcmode) {
1137         d->state = charset_init_state;
1138         d->wcmode = TRUE;
1139     }
1140
1141     if (wcs) {
1142         char buf[256];
1143         int len, width, ret;
1144
1145         width = ustrwid(wcs, d->charset);
1146
1147         len = ustrlen(wcs);
1148         while (len > 0) {
1149             int prevlen = len;
1150
1151             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1152                                        d->charset, &d->state, NULL);
1153
1154             assert(len < prevlen);
1155
1156             if (ret > 0) {
1157                 buf[ret] = '\0';
1158                 rdaddsc(&d->output, buf);
1159             }
1160         }
1161
1162         return width;
1163     } else
1164         return 0;
1165 }
1166
1167 static int info_rdaddsc(info_data *d, char const *cs)
1168 {
1169     if (d->wcmode) {
1170         char buf[256];
1171         int ret;
1172
1173         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1174                                    d->charset, &d->state, NULL);
1175         if (ret > 0) {
1176             buf[ret] = '\0';
1177             rdaddsc(&d->output, buf);
1178         }
1179
1180         d->wcmode = FALSE;
1181     }
1182
1183     if (cs) {
1184         rdaddsc(&d->output, cs);
1185         return strwid(cs, d->charset);
1186     } else
1187         return 0;
1188 }
1189
1190 static int info_rdadd(info_data *d, wchar_t wc)
1191 {
1192     wchar_t wcs[2];
1193     wcs[0] = wc;
1194     wcs[1] = L'\0';
1195     return info_rdadds(d, wcs);
1196 }
1197
1198 static int info_rdaddc(info_data *d, char c)
1199 {
1200     char cs[2];
1201     cs[0] = c;
1202     cs[1] = '\0';
1203     return info_rdaddsc(d, cs);
1204 }