mdw@git.distorted.org.uk Git - sgt/halibut/blob - bk_info.c

   1 /*
   2  * info backend for Halibut
   3  *
   4  * Possible future work:
   5  *
   6  *  - configurable choice of how to allocate node names?
   7  *     + possibly a template-like approach, choosing node names to
   8  *       be the full section title or perhaps the internal keyword?
   9  *     + neither of those seems quite right. Perhaps instead a
  10  *       Windows Help-like mechanism, where a magic config
  11  *       directive allows user choice of name for every node.
  12  *     + Only trouble with that is, now what happens to the section
  13  *       numbers? Do they become completely vestigial and just sit
  14  *       in the title text of each node? Or do we keep them in the
  15  *       menus somehow? I think people might occasionally want to
  16  *       go to a section by number, if only because all the _other_
  17  *       formats of the same document will reference the numbers
  18  *       all the time. So our menu lines could look like one of
  19  *       these:
  20  *        * Nodename: Section 1.2. Title of section.
  21  *        * Section 1.2: Nodename. Title of section.
  22  *
  23  *  - might be helpful to diagnose duplicate node names!
  24  */
  25
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <assert.h>
  29 #include "halibut.h"
  30
  31 typedef struct {
  32     char *filename;
  33     int maxfilesize;
  34     int charset;
  35     int listindentbefore, listindentafter;
  36     int indent_code, width, index_width;
  37     wchar_t *bullet, *listsuffix;
  38     wchar_t *startemph, *endemph;
  39     wchar_t *lquote, *rquote;
  40     wchar_t *sectsuffix, *underline;
  41     wchar_t *rule;
  42 } infoconfig;
  43
  44 typedef struct {
  45     rdstringc output;
  46     int charset;
  47     charset_state state;
  48     int wcmode;
  49 } info_data;
  50 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
  51 static const info_data empty_info_data = EMPTY_INFO_DATA;
  52
  53 typedef struct node_tag node;
  54 struct node_tag {
  55     node *listnext;
  56     node *up, *prev, *next, *lastchild;
  57     int pos, started_menu, filenum;
  58     char *name;
  59     info_data text;
  60 };
  61
  62 typedef struct {
  63     char *text;
  64     int length;
  65     int nnodes, nodesize;
  66     node **nodes;
  67 } info_idx;
  68
  69 static int info_rdadd(info_data *, wchar_t);
  70 static int info_rdadds(info_data *, wchar_t const *);
  71 static int info_rdaddc(info_data *, char);
  72 static int info_rdaddsc(info_data *, char const *);
  73
  74 static void info_heading(info_data *, word *, word *, int, infoconfig *);
  75 static void info_rule(info_data *, int, int, infoconfig *);
  76 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
  77                       int, int, int, infoconfig *);
  78 static void info_codepara(info_data *, word *, int, int);
  79 static void info_versionid(info_data *, word *, infoconfig *);
  80 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
  81 static word *info_transform_wordlist(word *, keywordlist *);
  82 static int info_check_index(word *, node *, indexdata *);
  83
  84 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
  85
  86 static node *info_node_new(char *name, int charset);
  87 static char *info_node_name(paragraph *p, infoconfig *);
  88
  89 static infoconfig info_configure(paragraph *source) {
  90     infoconfig ret;
  91     paragraph *p;
  92
  93     /*
  94      * Defaults.
  95      */
  96     ret.filename = dupstr("output.info");
  97     ret.maxfilesize = 64 << 10;
  98     ret.charset = CS_ASCII;
  99     ret.width = 70;
 100     ret.listindentbefore = 1;
 101     ret.listindentafter = 3;
 102     ret.indent_code = 2;
 103     ret.index_width = 40;
 104     ret.listsuffix = L".";
 105     ret.bullet = L"\x2022\0-\0\0";
 106     ret.rule = L"\x2500\0-\0\0";
 107     ret.startemph = L"_\0_\0\0";
 108     ret.endemph = uadv(ret.startemph);
 109     ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
 110     ret.rquote = uadv(ret.lquote);
 111     ret.sectsuffix = L": ";
 112     ret.underline = L"\x203E\0-\0\0";
 113
 114     /*
 115      * Two-pass configuration so that we can pick up global config
 116      * (e.g. `quotes') before having it overridden by specific
 117      * config (`info-quotes'), irrespective of the order in which
 118      * they occur.
 119      */
 120     for (p = source; p; p = p->next) {
 121         if (p->type == para_Config) {
 122             if (!ustricmp(p->keyword, L"quotes")) {
 123                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 124                     ret.lquote = uadv(p->keyword);
 125                     ret.rquote = uadv(ret.lquote);
 126                 }
 127             }
 128         }
 129     }
 130
 131     for (p = source; p; p = p->next) {
 132         if (p->type == para_Config) {
 133             if (!ustricmp(p->keyword, L"info-filename")) {
 134                 sfree(ret.filename);
 135                 ret.filename = dupstr(adv(p->origkeyword));
 136             } else if (!ustricmp(p->keyword, L"info-charset")) {
 137                 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
 138             } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
 139                 ret.maxfilesize = utoi(uadv(p->keyword));
 140             } else if (!ustricmp(p->keyword, L"info-width")) {
 141                 ret.width = utoi(uadv(p->keyword));
 142             } else if (!ustricmp(p->keyword, L"info-indent-code")) {
 143                 ret.indent_code = utoi(uadv(p->keyword));
 144             } else if (!ustricmp(p->keyword, L"info-index-width")) {
 145                 ret.index_width = utoi(uadv(p->keyword));
 146             } else if (!ustricmp(p->keyword, L"info-list-indent")) {
 147                 ret.listindentbefore = utoi(uadv(p->keyword));
 148             } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
 149                 ret.listindentafter = utoi(uadv(p->keyword));
 150             } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
 151                 ret.sectsuffix = uadv(p->keyword);
 152             } else if (!ustricmp(p->keyword, L"info-underline")) {
 153                 ret.underline = uadv(p->keyword);
 154             } else if (!ustricmp(p->keyword, L"info-bullet")) {
 155                 ret.bullet = uadv(p->keyword);
 156             } else if (!ustricmp(p->keyword, L"info-rule")) {
 157                 ret.rule = uadv(p->keyword);
 158             } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
 159                 ret.listsuffix = uadv(p->keyword);
 160             } else if (!ustricmp(p->keyword, L"info-emphasis")) {
 161                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 162                     ret.startemph = uadv(p->keyword);
 163                     ret.endemph = uadv(ret.startemph);
 164                 }
 165             } else if (!ustricmp(p->keyword, L"info-quotes")) {
 166                 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
 167                     ret.lquote = uadv(p->keyword);
 168                     ret.rquote = uadv(ret.lquote);
 169                 }
 170             }
 171         }
 172     }
 173
 174     /*
 175      * Now process fallbacks on quote characters, underlines, the
 176      * rule character, the emphasis characters, and bullets.
 177      */
 178     while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
 179            (!cvt_ok(ret.charset, ret.lquote) ||
 180             !cvt_ok(ret.charset, ret.rquote))) {
 181         ret.lquote = uadv(ret.rquote);
 182         ret.rquote = uadv(ret.lquote);
 183     }
 184
 185     while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
 186            (!cvt_ok(ret.charset, ret.startemph) ||
 187             !cvt_ok(ret.charset, ret.endemph))) {
 188         ret.startemph = uadv(ret.endemph);
 189         ret.endemph = uadv(ret.startemph);
 190     }
 191
 192     while (*ret.underline && *uadv(ret.underline) &&
 193            !cvt_ok(ret.charset, ret.underline))
 194         ret.underline = uadv(ret.underline);
 195
 196     while (*ret.bullet && *uadv(ret.bullet) &&
 197            !cvt_ok(ret.charset, ret.bullet))
 198         ret.bullet = uadv(ret.bullet);
 199
 200     while (*ret.rule && *uadv(ret.rule) &&
 201            !cvt_ok(ret.charset, ret.rule))
 202         ret.rule = uadv(ret.rule);
 203
 204     return ret;
 205 }
 206
 207 paragraph *info_config_filename(char *filename)
 208 {
 209     return cmdline_cfg_simple("info-filename", filename, NULL);
 210 }
 211
 212 void info_backend(paragraph *sourceform, keywordlist *keywords,
 213                   indexdata *idx, void *unused) {
 214     paragraph *p;
 215     infoconfig conf;
 216     word *prefix, *body, *wp;
 217     word spaceword;
 218     wchar_t *prefixextra;
 219     int nesting, nestindent;
 220     int indentb, indenta;
 221     int filepos;
 222     int has_index;
 223     info_data intro_text = EMPTY_INFO_DATA;
 224     node *topnode, *currnode;
 225     word bullet;
 226     FILE *fp;
 227
 228     IGNORE(unused);
 229
 230     conf = info_configure(sourceform);
 231
 232     /*
 233      * Go through and create a node for each section.
 234      */
 235     topnode = info_node_new("Top", conf.charset);
 236     currnode = topnode;
 237     for (p = sourceform; p; p = p->next) switch (p->type) {
 238         /*
 239          * Chapter titles.
 240          */
 241       case para_Chapter:
 242       case para_Appendix:
 243       case para_UnnumberedChapter:
 244       case para_Heading:
 245       case para_Subsect:
 246         {
 247             node *newnode, *upnode;
 248             char *nodename;
 249
 250             nodename = info_node_name(p, &conf);
 251             newnode = info_node_new(nodename, conf.charset);
 252             sfree(nodename);
 253
 254             p->private_data = newnode;
 255
 256             if (p->parent)
 257                 upnode = (node *)p->parent->private_data;
 258             else
 259                 upnode = topnode;
 260             assert(upnode);
 261             newnode->up = upnode;
 262
 263             currnode->next = newnode;
 264             newnode->prev = currnode;
 265
 266             currnode->listnext = newnode;
 267             currnode = newnode;
 268         }
 269         break;
 270       default:
 271         p->private_data = NULL;
 272         break;
 273     }
 274
 275     /*
 276      * Set up the display form of each index entry.
 277      */
 278     {
 279         int i;
 280         indexentry *entry;
 281
 282         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 283             info_idx *ii = snew(info_idx);
 284             info_data id = EMPTY_INFO_DATA;
 285
 286             id.charset = conf.charset;
 287
 288             ii->nnodes = ii->nodesize = 0;
 289             ii->nodes = NULL;
 290
 291             ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
 292
 293             ii->text = id.output.text;
 294
 295             entry->backend_data = ii;
 296         }
 297     }
 298
 299     /*
 300      * An Info file begins with a piece of introductory text which
 301      * is apparently never shown anywhere. This seems to me to be a
 302      * good place to put the copyright notice and the version IDs.
 303      * Also, Info directory entries are expected to go here.
 304      */
 305     intro_text.charset = conf.charset;
 306
 307     info_rdaddsc(&intro_text,
 308             "This Info file generated by Halibut, ");
 309     info_rdaddsc(&intro_text, version);
 310     info_rdaddsc(&intro_text, "\n\n");
 311
 312     for (p = sourceform; p; p = p->next)
 313         if (p->type == para_Config &&
 314             !ustricmp(p->keyword, L"info-dir-entry")) {
 315             wchar_t *section, *shortname, *longname, *kw;
 316             char *s;
 317
 318             section = uadv(p->keyword);
 319             shortname = *section ? uadv(section) : L"";
 320             longname = *shortname ? uadv(shortname) : L"";
 321             kw = *longname ? uadv(longname) : L"";
 322
 323             if (!*longname) {
 324                 error(err_cfginsufarg, &p->fpos, p->origkeyword, 3);
 325                 continue;
 326             }
 327
 328             info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
 329             info_rdadds(&intro_text, section);
 330             info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
 331             info_rdadds(&intro_text, shortname);
 332             info_rdaddsc(&intro_text, ": (");
 333             s = dupstr(conf.filename);
 334             if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
 335                 s[strlen(s)-5] = '\0';
 336             info_rdaddsc(&intro_text, s);
 337             sfree(s);
 338             info_rdaddsc(&intro_text, ")");
 339             if (*kw) {
 340                 keyword *kwl = kw_lookup(keywords, kw);
 341                 if (kwl && kwl->para->private_data) {
 342                     node *n = (node *)kwl->para->private_data;
 343                     info_rdaddsc(&intro_text, n->name);
 344                 }
 345             }
 346             info_rdaddsc(&intro_text, ".   ");
 347             info_rdadds(&intro_text, longname);
 348             info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
 349         }
 350
 351     for (p = sourceform; p; p = p->next)
 352         if (p->type == para_Copyright)
 353             info_para(&intro_text, NULL, NULL, p->words, keywords,
 354                       0, 0, conf.width, &conf);
 355
 356     for (p = sourceform; p; p = p->next)
 357         if (p->type == para_VersionID)
 358             info_versionid(&intro_text, p->words, &conf);
 359
 360     if (intro_text.output.text[intro_text.output.pos-1] != '\n')
 361         info_rdaddc(&intro_text, '\n');
 362
 363     /* Do the title */
 364     for (p = sourceform; p; p = p->next)
 365         if (p->type == para_Title)
 366             info_heading(&topnode->text, NULL, p->words, conf.width, &conf);
 367
 368     nestindent = conf.listindentbefore + conf.listindentafter;
 369     nesting = 0;
 370
 371     currnode = topnode;
 372
 373     /* Do the main document */
 374     for (p = sourceform; p; p = p->next) switch (p->type) {
 375
 376       case para_QuotePush:
 377         nesting += 2;
 378         break;
 379       case para_QuotePop:
 380         nesting -= 2;
 381         assert(nesting >= 0);
 382         break;
 383
 384       case para_LcontPush:
 385         nesting += nestindent;
 386         break;
 387       case para_LcontPop:
 388         nesting -= nestindent;
 389         assert(nesting >= 0);
 390         break;
 391
 392         /*
 393          * Things we ignore because we've already processed them or
 394          * aren't going to touch them in this pass.
 395          */
 396       case para_IM:
 397       case para_BR:
 398       case para_Biblio:                /* only touch BiblioCited */
 399       case para_VersionID:
 400       case para_NoCite:
 401       case para_Title:
 402         break;
 403
 404         /*
 405          * Chapter titles.
 406          */
 407       case para_Chapter:
 408       case para_Appendix:
 409       case para_UnnumberedChapter:
 410       case para_Heading:
 411       case para_Subsect:
 412         currnode = p->private_data;
 413         assert(currnode);
 414         assert(currnode->up);
 415
 416         if (!currnode->up->started_menu) {
 417             info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
 418             currnode->up->started_menu = TRUE;
 419         }
 420         info_menu_item(&currnode->up->text, currnode, p, &conf);
 421
 422         has_index |= info_check_index(p->words, currnode, idx);
 423         info_heading(&currnode->text, p->kwtext, p->words, conf.width, &conf);
 424         nesting = 0;
 425         break;
 426
 427       case para_Rule:
 428         info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
 429         break;
 430
 431       case para_Normal:
 432       case para_Copyright:
 433       case para_DescribedThing:
 434       case para_Description:
 435       case para_BiblioCited:
 436       case para_Bullet:
 437       case para_NumberedList:
 438         has_index |= info_check_index(p->words, currnode, idx);
 439         if (p->type == para_Bullet) {
 440             bullet.next = NULL;
 441             bullet.alt = NULL;
 442             bullet.type = word_Normal;
 443             bullet.text = conf.bullet;
 444             prefix = &bullet;
 445             prefixextra = NULL;
 446             indentb = conf.listindentbefore;
 447             indenta = conf.listindentafter;
 448         } else if (p->type == para_NumberedList) {
 449             prefix = p->kwtext;
 450             prefixextra = conf.listsuffix;
 451             indentb = conf.listindentbefore;
 452             indenta = conf.listindentafter;
 453         } else if (p->type == para_Description) {
 454             prefix = NULL;
 455             prefixextra = NULL;
 456             indentb = conf.listindentbefore;
 457             indenta = conf.listindentafter;
 458         } else {
 459             prefix = NULL;
 460             prefixextra = NULL;
 461             indentb = indenta = 0;
 462         }
 463         if (p->type == para_BiblioCited) {
 464             body = dup_word_list(p->kwtext);
 465             for (wp = body; wp->next; wp = wp->next);
 466             wp->next = &spaceword;
 467             spaceword.next = p->words;
 468             spaceword.alt = NULL;
 469             spaceword.type = word_WhiteSpace;
 470             spaceword.text = NULL;
 471         } else {
 472             wp = NULL;
 473             body = p->words;
 474         }
 475         info_para(&currnode->text, prefix, prefixextra, body, keywords,
 476                   nesting + indentb, indenta,
 477                   conf.width - nesting - indentb - indenta, &conf);
 478         if (wp) {
 479             wp->next = NULL;
 480             free_word_list(body);
 481         }
 482         break;
 483
 484       case para_Code:
 485         info_codepara(&currnode->text, p->words,
 486                       nesting + conf.indent_code,
 487                       conf.width - nesting - 2 * conf.indent_code);
 488         break;
 489     }
 490
 491     /*
 492      * Create an index node if required.
 493      */
 494     if (has_index) {
 495         node *newnode;
 496         int i, j, k;
 497         indexentry *entry;
 498
 499         newnode = info_node_new("Index", conf.charset);
 500         newnode->up = topnode;
 501
 502         currnode->next = newnode;
 503         newnode->prev = currnode;
 504         currnode->listnext = newnode;
 505
 506         info_rdaddsc(&newnode->text, "Index\n-----\n\n");
 507
 508         info_menu_item(&topnode->text, newnode, NULL, &conf);
 509
 510         for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
 511             info_idx *ii = (info_idx *)entry->backend_data;
 512
 513             for (j = 0; j < ii->nnodes; j++) {
 514                 /*
 515                  * When we have multiple references for a single
 516                  * index term, we only display the actual term on
 517                  * the first line, to make it clear that the terms
 518                  * really are the same.
 519                  */
 520                 if (j == 0)
 521                     info_rdaddsc(&newnode->text, ii->text);
 522                 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
 523                     info_rdaddc(&newnode->text, ' ');
 524                 info_rdaddsc(&newnode->text, "  *Note ");
 525                 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
 526                 info_rdaddsc(&newnode->text, "::\n");
 527             }
 528         }
 529     }
 530
 531     /*
 532      * Finalise the text of each node, by adding the ^_ delimiter
 533      * and the node line at the top.
 534      */
 535     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 536         char *origtext = currnode->text.output.text;
 537         currnode->text = empty_info_data;
 538         currnode->text.charset = conf.charset;
 539         info_rdaddsc(&currnode->text, "\037\nFile: ");
 540         info_rdaddsc(&currnode->text, conf.filename);
 541         info_rdaddsc(&currnode->text, ",  Node: ");
 542         info_rdaddsc(&currnode->text, currnode->name);
 543         if (currnode->prev) {
 544             info_rdaddsc(&currnode->text, ",  Prev: ");
 545             info_rdaddsc(&currnode->text, currnode->prev->name);
 546         }
 547         info_rdaddsc(&currnode->text, ",  Up: ");
 548         info_rdaddsc(&currnode->text, (currnode->up ?
 549                                        currnode->up->name : "(dir)"));
 550         if (currnode->next) {
 551             info_rdaddsc(&currnode->text, ",  Next: ");
 552             info_rdaddsc(&currnode->text, currnode->next->name);
 553         }
 554         info_rdaddsc(&currnode->text, "\n\n");
 555         info_rdaddsc(&currnode->text, origtext);
 556         /*
 557          * Just make _absolutely_ sure we end with a newline.
 558          */
 559         if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
 560             info_rdaddc(&currnode->text, '\n');
 561
 562         sfree(origtext);
 563     }
 564
 565     /*
 566      * Compute the offsets for the tag table.
 567      */
 568     filepos = intro_text.output.pos;
 569     for (currnode = topnode; currnode; currnode = currnode->listnext) {
 570         currnode->pos = filepos;
 571         filepos += currnode->text.output.pos;
 572     }
 573
 574     /*
 575      * Split into sub-files.
 576      */
 577     if (conf.maxfilesize > 0) {
 578         int currfilesize = intro_text.output.pos, currfilenum = 1;
 579         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 580             if (currfilesize > intro_text.output.pos &&
 581                 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
 582                 currfilenum++;
 583                 currfilesize = intro_text.output.pos;
 584             }
 585             currnode->filenum = currfilenum;
 586             currfilesize += currnode->text.output.pos;
 587         }
 588     }
 589
 590     /*
 591      * Write the primary output file.
 592      */
 593     fp = fopen(conf.filename, "w");
 594     if (!fp) {
 595         error(err_cantopenw, conf.filename);
 596         return;
 597     }
 598     fputs(intro_text.output.text, fp);
 599     if (conf.maxfilesize == 0) {
 600         for (currnode = topnode; currnode; currnode = currnode->listnext)
 601             fputs(currnode->text.output.text, fp);
 602     } else {
 603         int filenum = 0;
 604         fprintf(fp, "\037\nIndirect:\n");
 605         for (currnode = topnode; currnode; currnode = currnode->listnext)
 606             if (filenum != currnode->filenum) {
 607                 filenum = currnode->filenum;
 608                 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
 609                         currnode->pos);
 610             }
 611     }
 612     fprintf(fp, "\037\nTag Table:\n");
 613     if (conf.maxfilesize > 0)
 614         fprintf(fp, "(Indirect)\n");
 615     for (currnode = topnode; currnode; currnode = currnode->listnext)
 616         fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
 617     fprintf(fp, "\037\nEnd Tag Table\n");
 618     fclose(fp);
 619
 620     /*
 621      * Write the subfiles.
 622      */
 623     if (conf.maxfilesize > 0) {
 624         int filenum = 0;
 625         fp = NULL;
 626
 627         for (currnode = topnode; currnode; currnode = currnode->listnext) {
 628             if (filenum != currnode->filenum) {
 629                 char *fname;
 630
 631                 filenum = currnode->filenum;
 632
 633                 if (fp)
 634                     fclose(fp);
 635                 fname = snewn(strlen(conf.filename) + 40, char);
 636                 sprintf(fname, "%s-%d", conf.filename, filenum);
 637                 fp = fopen(fname, "w");
 638                 if (!fp) {
 639                     error(err_cantopenw, fname);
 640                     return;
 641                 }
 642                 sfree(fname);
 643                 fputs(intro_text.output.text, fp);
 644             }
 645             fputs(currnode->text.output.text, fp);
 646         }
 647
 648         if (fp)
 649             fclose(fp);
 650     }
 651 }
 652
 653 static int info_check_index(word *w, node *n, indexdata *idx)
 654 {
 655     int ret = 0;
 656
 657     for (; w; w = w->next) {
 658         if (w->type == word_IndexRef) {
 659             indextag *tag;
 660             int i;
 661
 662             tag = index_findtag(idx, w->text);
 663             if (!tag)
 664                 break;
 665
 666             for (i = 0; i < tag->nrefs; i++) {
 667                 indexentry *entry = tag->refs[i];
 668                 info_idx *ii = (info_idx *)entry->backend_data;
 669
 670                 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
 671                     /*
 672                      * If the same index term is indexed twice
 673                      * within the same section, we only want to
 674                      * mention it once in the index. So do nothing
 675                      * here.
 676                      */
 677                     continue;
 678                 }
 679
 680                 if (ii->nnodes >= ii->nodesize) {
 681                     ii->nodesize += 32;
 682                     ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
 683                 }
 684
 685                 ii->nodes[ii->nnodes++] = n;
 686
 687                 ret = 1;
 688             }
 689         }
 690     }
 691
 692     return ret;
 693 }
 694
 695 static word *info_transform_wordlist(word *words, keywordlist *keywords)
 696 {
 697     word *ret = dup_word_list(words);
 698     word *w;
 699     keyword *kwl;
 700
 701     for (w = ret; w; w = w->next) {
 702         w->private_data = NULL;
 703         if (w->type == word_UpperXref || w->type == word_LowerXref) {
 704             kwl = kw_lookup(keywords, w->text);
 705             if (kwl) {
 706                 if (kwl->para->type == para_NumberedList ||
 707                     kwl->para->type == para_BiblioCited) {
 708                     /*
 709                      * In Info, we do nothing special for xrefs to
 710                      * numbered list items or bibliography entries.
 711                      */
 712                     continue;
 713                 } else {
 714                     /*
 715                      * An xref to a different section has its text
 716                      * completely replaced.
 717                      */
 718                     word *w2, *w3, *w4;
 719                     w2 = w3 = w->next;
 720                     w4 = NULL;
 721                     while (w2) {
 722                         if (w2->type == word_XrefEnd) {
 723                             w4 = w2->next;
 724                             w2->next = NULL;
 725                             break;
 726                         }
 727                         w2 = w2->next;
 728                     }
 729                     free_word_list(w3);
 730
 731                     /*
 732                      * Now w is the UpperXref / LowerXref we
 733                      * started with, and w4 is the next word after
 734                      * the corresponding XrefEnd (if any). The
 735                      * simplest thing is just to stick a pointer to
 736                      * the target node structure in the private
 737                      * data field of the xref word, and let
 738                      * info_rdaddwc and friends read the node name
 739                      * out from there.
 740                      */
 741                     w->next = w4;
 742                     w->private_data = kwl->para->private_data;
 743                     assert(w->private_data);
 744                 }
 745             }
 746         }
 747     }
 748
 749     return ret;
 750 }
 751
 752 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
 753                         infoconfig *cfg) {
 754     int ret = 0;
 755
 756     for (; words && words != end; words = words->next) switch (words->type) {
 757       case word_HyperLink:
 758       case word_HyperEnd:
 759       case word_XrefEnd:
 760       case word_IndexRef:
 761         break;
 762
 763       case word_Normal:
 764       case word_Emph:
 765       case word_Code:
 766       case word_WeakCode:
 767       case word_WhiteSpace:
 768       case word_EmphSpace:
 769       case word_CodeSpace:
 770       case word_WkCodeSpace:
 771       case word_Quote:
 772       case word_EmphQuote:
 773       case word_CodeQuote:
 774       case word_WkCodeQuote:
 775         assert(words->type != word_CodeQuote &&
 776                words->type != word_WkCodeQuote);
 777         if (towordstyle(words->type) == word_Emph &&
 778             (attraux(words->aux) == attr_First ||
 779              attraux(words->aux) == attr_Only))
 780             ret += info_rdadds(id, cfg->startemph);
 781         else if (towordstyle(words->type) == word_Code &&
 782                  (attraux(words->aux) == attr_First ||
 783                   attraux(words->aux) == attr_Only))
 784             ret += info_rdadds(id, cfg->lquote);
 785         if (removeattr(words->type) == word_Normal) {
 786             if (cvt_ok(id->charset, words->text) || !words->alt)
 787                 ret += info_rdadds(id, words->text);
 788             else
 789                 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
 790         } else if (removeattr(words->type) == word_WhiteSpace) {
 791             ret += info_rdadd(id, L' ');
 792         } else if (removeattr(words->type) == word_Quote) {
 793             ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
 794                                cfg->lquote : cfg->rquote);
 795         }
 796         if (towordstyle(words->type) == word_Emph &&
 797             (attraux(words->aux) == attr_Last ||
 798              attraux(words->aux) == attr_Only))
 799             ret += info_rdadds(id, cfg->endemph);
 800         else if (towordstyle(words->type) == word_Code &&
 801                  (attraux(words->aux) == attr_Last ||
 802                   attraux(words->aux) == attr_Only))
 803             ret += info_rdadds(id, cfg->rquote);
 804         break;
 805
 806       case word_UpperXref:
 807       case word_LowerXref:
 808         if (xrefs && words->private_data) {
 809             /*
 810              * This bit is structural and so must be done in char
 811              * rather than wchar_t.
 812              */
 813             ret += info_rdaddsc(id, "*Note ");
 814             ret += info_rdaddsc(id, ((node *)words->private_data)->name);
 815             ret += info_rdaddsc(id, "::");
 816         }
 817         break;
 818     }
 819
 820     return ret;
 821 }
 822
 823 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
 824
 825 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
 826     int w = 0;
 827     while (words) {
 828         w += info_width_internal(words, xrefs, cfg);
 829         words = words->next;
 830     }
 831     return w;
 832 }
 833
 834 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
 835     int wid;
 836     int attr;
 837
 838     switch (words->type) {
 839       case word_HyperLink:
 840       case word_HyperEnd:
 841       case word_XrefEnd:
 842       case word_IndexRef:
 843         return 0;
 844
 845       case word_UpperXref:
 846       case word_LowerXref:
 847         if (xrefs && words->private_data) {
 848             /* "*Note " plus "::" comes to 8 characters */
 849             return 8 + strwid(((node *)words->private_data)->name,
 850                               cfg->charset);
 851         } else
 852             return 0;
 853     }
 854
 855     assert(words->type < word_internal_endattrs);
 856
 857     wid = 0;
 858     attr = towordstyle(words->type);
 859
 860     if (attr == word_Emph || attr == word_Code) {
 861         if (attraux(words->aux) == attr_Only ||
 862             attraux(words->aux) == attr_First)
 863             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 864                            cfg->charset);
 865     }
 866     if (attr == word_Emph || attr == word_Code) {
 867         if (attraux(words->aux) == attr_Only ||
 868             attraux(words->aux) == attr_Last)
 869             wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
 870                            cfg->charset);
 871     }
 872
 873     switch (words->type) {
 874       case word_Normal:
 875       case word_Emph:
 876       case word_Code:
 877       case word_WeakCode:
 878         if (cvt_ok(cfg->charset, words->text) || !words->alt)
 879             wid += ustrwid(words->text, cfg->charset);
 880         else
 881             wid += info_width_internal_list(words->alt, xrefs, cfg);
 882         return wid;
 883
 884       case word_WhiteSpace:
 885       case word_EmphSpace:
 886       case word_CodeSpace:
 887       case word_WkCodeSpace:
 888       case word_Quote:
 889       case word_EmphQuote:
 890       case word_CodeQuote:
 891       case word_WkCodeQuote:
 892         assert(words->type != word_CodeQuote &&
 893                words->type != word_WkCodeQuote);
 894         if (removeattr(words->type) == word_Quote) {
 895             if (quoteaux(words->aux) == quote_Open)
 896                 wid += ustrwid(cfg->lquote, cfg->charset);
 897             else
 898                 wid += ustrwid(cfg->rquote, cfg->charset);
 899         } else
 900             wid++;                     /* space */
 901     }
 902     return wid;
 903 }
 904
 905 static int info_width_noxrefs(void *ctx, word *words)
 906 {
 907     return info_width_internal(words, FALSE, (infoconfig *)ctx);
 908 }
 909 static int info_width_xrefs(void *ctx, word *words)
 910 {
 911     return info_width_internal(words, TRUE, (infoconfig *)ctx);
 912 }
 913
 914 static void info_heading(info_data *text, word *tprefix,
 915                          word *words, int width, infoconfig *cfg) {
 916     int length;
 917     int firstlinewidth, wrapwidth;
 918     wrappedline *wrapping, *p;
 919
 920     length = 0;
 921     if (tprefix) {
 922         length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
 923         length += info_rdadds(text, cfg->sectsuffix);
 924     }
 925
 926     wrapwidth = width;
 927     firstlinewidth = width - length;
 928
 929     wrapping = wrap_para(words, firstlinewidth, wrapwidth,
 930                          info_width_noxrefs, cfg, 0);
 931     for (p = wrapping; p; p = p->next) {
 932         length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
 933         info_rdadd(text, L'\n');
 934         while (length > 0) {
 935             info_rdadds(text, cfg->underline);
 936             length -= ustrwid(cfg->underline, cfg->charset);
 937         }
 938         info_rdadd(text, L'\n');
 939         length = 0;
 940     }
 941     wrap_free(wrapping);
 942     info_rdadd(text, L'\n');
 943 }
 944
 945 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
 946 {
 947     while (indent--) info_rdadd(text, L' ');
 948     while (width > 0) {
 949         info_rdadds(text, cfg->rule);
 950         width -= ustrwid(cfg->rule, cfg->charset);
 951     }
 952     info_rdadd(text, L'\n');
 953     info_rdadd(text, L'\n');
 954 }
 955
 956 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
 957                       word *input, keywordlist *keywords, int indent,
 958                       int extraindent, int width, infoconfig *cfg) {
 959     wrappedline *wrapping, *p;
 960     word *words;
 961     int e;
 962     int i;
 963     int firstlinewidth = width;
 964
 965     words = info_transform_wordlist(input, keywords);
 966
 967     if (prefix) {
 968         for (i = 0; i < indent; i++)
 969             info_rdadd(text, L' ');
 970         e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
 971         if (prefixextra)
 972             e += info_rdadds(text, prefixextra);
 973         /* If the prefix is too long, shorten the first line to fit. */
 974         e = extraindent - e;
 975         if (e < 0) {
 976             firstlinewidth += e;       /* this decreases it, since e < 0 */
 977             if (firstlinewidth < 0) {
 978                 e = indent + extraindent;
 979                 firstlinewidth = width;
 980                 info_rdadd(text, L'\n');
 981             } else
 982                 e = 0;
 983         }
 984     } else
 985         e = indent + extraindent;
 986
 987     wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
 988                          cfg, 0);
 989     for (p = wrapping; p; p = p->next) {
 990         for (i = 0; i < e; i++)
 991             info_rdadd(text, L' ');
 992         info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
 993         info_rdadd(text, L'\n');
 994         e = indent + extraindent;
 995     }
 996     wrap_free(wrapping);
 997     info_rdadd(text, L'\n');
 998
 999     free_word_list(words);
1000 }
1001
1002 static void info_codepara(info_data *text, word *words,
1003                           int indent, int width) {
1004     int i;
1005
1006     for (; words; words = words->next) if (words->type == word_WeakCode) {
1007         for (i = 0; i < indent; i++)
1008             info_rdadd(text, L' ');
1009         if (info_rdadds(text, words->text) > width) {
1010             /* FIXME: warn */
1011         }
1012         info_rdadd(text, L'\n');
1013     }
1014
1015     info_rdadd(text, L'\n');
1016 }
1017
1018 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1019     info_rdadd(text, L'[');
1020     info_rdaddwc(text, words, NULL, FALSE, cfg);
1021     info_rdadds(text, L"]\n");
1022 }
1023
1024 static node *info_node_new(char *name, int charset)
1025 {
1026     node *n;
1027
1028     n = snew(node);
1029     n->text = empty_info_data;
1030     n->text.charset = charset;
1031     n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1032     n->name = dupstr(name);
1033     n->started_menu = FALSE;
1034
1035     return n;
1036 }
1037
1038 static char *info_node_name(paragraph *par, infoconfig *cfg)
1039 {
1040     info_data id = EMPTY_INFO_DATA;
1041     char *p, *q;
1042
1043     id.charset = cfg->charset;
1044     info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1045                  NULL, FALSE, cfg);
1046     info_rdaddsc(&id, NULL);
1047
1048     /*
1049      * We cannot have commas or colons in a node name. Remove any
1050      * that we find, with a warning.
1051      */
1052     p = q = id.output.text;
1053     while (*p) {
1054         if (*p == ':' || *p == ',') {
1055             error(err_infonodechar, &par->fpos, *p);
1056         } else {
1057             *q++ = *p;
1058         }
1059         p++;
1060     }
1061     *p = '\0';
1062
1063     return id.output.text;
1064 }
1065
1066 static void info_menu_item(info_data *text, node *n, paragraph *p,
1067                            infoconfig *cfg)
1068 {
1069     /*
1070      * FIXME: Depending on how we're doing node names in this info
1071      * file, we might want to do
1072      *
1073      *   * Node name:: Chapter title
1074      *
1075      * _or_
1076      *
1077      *   * Chapter number: Node name.
1078      *
1079      * This function mostly works in char rather than wchar_t,
1080      * because a menu item is a structural component.
1081      */
1082     info_rdaddsc(text, "* ");
1083     info_rdaddsc(text, n->name);
1084     info_rdaddsc(text, "::");
1085     if (p) {
1086         info_rdaddc(text, ' ');
1087         info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1088     }
1089     info_rdaddc(text, '\n');
1090 }
1091
1092 /*
1093  * These functions implement my wrapper on the rdadd* calls which
1094  * allows me to switch arbitrarily between literal octet-string
1095  * text and charset-translated Unicode. (Because no matter what
1096  * character set I write the actual text in, I expect info readers
1097  * to treat node names and file names literally and to expect
1098  * keywords like `*Note' in their canonical form, so I have to take
1099  * steps to ensure that those structural elements of the file
1100  * aren't messed with.)
1101  */
1102 static int info_rdadds(info_data *d, wchar_t const *wcs)
1103 {
1104     if (!d->wcmode) {
1105         d->state = charset_init_state;
1106         d->wcmode = TRUE;
1107     }
1108
1109     if (wcs) {
1110         char buf[256];
1111         int len, width, ret;
1112
1113         width = ustrwid(wcs, d->charset);
1114
1115         len = ustrlen(wcs);
1116         while (len > 0) {
1117             int prevlen = len;
1118
1119             ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1120                                        d->charset, &d->state, NULL);
1121
1122             assert(len < prevlen);
1123
1124             if (ret > 0) {
1125                 buf[ret] = '\0';
1126                 rdaddsc(&d->output, buf);
1127             }
1128         }
1129
1130         return width;
1131     } else
1132         return 0;
1133 }
1134
1135 static int info_rdaddsc(info_data *d, char const *cs)
1136 {
1137     if (d->wcmode) {
1138         char buf[256];
1139         int ret;
1140
1141         ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1142                                    d->charset, &d->state, NULL);
1143         if (ret > 0) {
1144             buf[ret] = '\0';
1145             rdaddsc(&d->output, buf);
1146         }
1147
1148         d->wcmode = FALSE;
1149     }
1150
1151     if (cs) {
1152         rdaddsc(&d->output, cs);
1153         return strwid(cs, d->charset);
1154     } else
1155         return 0;
1156 }
1157
1158 static int info_rdadd(info_data *d, wchar_t wc)
1159 {
1160     wchar_t wcs[2];
1161     wcs[0] = wc;
1162     wcs[1] = L'\0';
1163     return info_rdadds(d, wcs);
1164 }
1165
1166 static int info_rdaddc(info_data *d, char c)
1167 {
1168     char cs[2];
1169     cs[0] = c;
1170     cs[1] = '\0';
1171     return info_rdaddsc(d, cs);
1172 }